Files
trueref-legacy/src/lib/server/search/search.service.test.ts
2026-03-27 01:25:46 +01:00

841 lines
24 KiB
TypeScript

/**
* Unit tests for SearchService (TRUEREF-0006).
*
* Uses an in-memory SQLite database seeded with known data to verify
* BM25 snippet search, library search, query preprocessing, and
* response formatting.
*/
import { describe, it, expect, beforeEach } from 'vitest';
import Database from 'better-sqlite3';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { SearchService } from './search.service';
import { preprocessQuery } from './query-preprocessor';
import { computeTrustScore } from './trust-score';
import { formatLibraryResults, formatSnippetResults } from './formatters';
// ---------------------------------------------------------------------------
// In-memory test DB factory
// ---------------------------------------------------------------------------
function createTestDb(): Database.Database {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
// Run the migration SQL (split on the drizzle separator).
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
const statements = migrationSql
.split('--> statement-breakpoint')
.map((s) => s.trim())
.filter(Boolean);
for (const stmt of statements) {
client.exec(stmt);
}
// Apply FTS5 virtual table + triggers.
const ftsSql = readFileSync(join(import.meta.dirname, '../db/fts.sql'), 'utf-8');
client.exec(ftsSql);
return client;
}
// ---------------------------------------------------------------------------
// Seed helpers
// ---------------------------------------------------------------------------
const NOW_S = Math.floor(Date.now() / 1000);
function seedRepo(
client: Database.Database,
overrides: {
id?: string;
title?: string;
description?: string | null;
source?: string;
state?: string;
total_snippets?: number;
trust_score?: number;
stars?: number | null;
} = {}
) {
const id = overrides.id ?? '/test/repo';
client
.prepare(
`INSERT INTO repositories
(id, title, description, source, source_url, state, total_snippets, trust_score, stars, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
)
.run(
id,
overrides.title ?? 'Test Repo',
overrides.description ?? null,
overrides.source ?? 'github',
`https://github.com${id}`,
overrides.state ?? 'indexed',
overrides.total_snippets ?? 0,
overrides.trust_score ?? 0,
overrides.stars ?? null,
NOW_S,
NOW_S
);
return id;
}
function seedDocument(client: Database.Database, repositoryId: string): string {
const docId = crypto.randomUUID();
client
.prepare(
`INSERT INTO documents (id, repository_id, file_path, checksum, indexed_at)
VALUES (?, ?, ?, ?, ?)`
)
.run(docId, repositoryId, 'README.md', 'abc', NOW_S);
return docId;
}
function seedSnippet(
client: Database.Database,
opts: {
repositoryId: string;
documentId: string;
content: string;
title?: string | null;
breadcrumb?: string | null;
type?: 'code' | 'info';
language?: string | null;
versionId?: string | null;
}
): string {
const id = crypto.randomUUID();
client
.prepare(
`INSERT INTO snippets
(id, document_id, repository_id, version_id, type, title, content, language, breadcrumb, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
)
.run(
id,
opts.documentId,
opts.repositoryId,
opts.versionId ?? null,
opts.type ?? 'info',
opts.title ?? null,
opts.content,
opts.language ?? null,
opts.breadcrumb ?? null,
NOW_S
);
return id;
}
function seedVersion(client: Database.Database, repositoryId: string, tag: string): string {
const id = `${repositoryId}/${tag}`;
client
.prepare(
`INSERT INTO repository_versions (id, repository_id, tag, state, created_at)
VALUES (?, ?, ?, ?, ?)`
)
.run(id, repositoryId, tag, 'indexed', NOW_S);
return id;
}
// ---------------------------------------------------------------------------
// preprocessQuery
// ---------------------------------------------------------------------------
describe('preprocessQuery', () => {
it('trims and collapses whitespace', () => {
expect(preprocessQuery(' hello world ')).toBe('hello world*');
});
it('removes parentheses', () => {
expect(preprocessQuery('(hello)')).toBe('hello*');
});
it('appends wildcard to last token when >= 3 chars', () => {
expect(preprocessQuery('foo bar baz')).toBe('foo bar baz*');
});
it('does not append wildcard when last token is < 3 chars', () => {
expect(preprocessQuery('foo ba')).toBe('foo ba');
});
it('does not double-append wildcard', () => {
expect(preprocessQuery('hello*')).toBe('hello*');
});
it('preserves AND / OR / NOT operators', () => {
const result = preprocessQuery('hello AND world');
expect(result).toBe('hello AND world*');
});
it('returns empty string for blank input', () => {
expect(preprocessQuery(' ')).toBe('');
});
it('handles single short token without wildcard', () => {
expect(preprocessQuery('ab')).toBe('ab');
});
// Punctuation-heavy and code-like queries
it('normalizes code-like queries with slashes', () => {
// "foo/bar/baz" should extract searchable terms
const result = preprocessQuery('foo/bar/baz');
expect(result).toContain('foo');
expect(result).toContain('bar');
expect(result).toContain('baz');
});
it('extracts terms from dot-notation queries', () => {
// "object.method.name" should extract searchable parts
const result = preprocessQuery('object.method.name');
expect(result).toContain('object');
expect(result).toContain('method');
expect(result).toContain('name');
});
it('handles snake_case identifiers', () => {
// "my_function_name" should be preserved
const result = preprocessQuery('my_function_name');
expect(result).toContain('my_function_name');
});
it('removes punctuation from parenthesized expressions', () => {
// "(hello world)" → "hello world*"
const result = preprocessQuery('(hello world)');
expect(result).toContain('hello');
expect(result).toContain('world');
});
it('handles bracket-enclosed content', () => {
// "[foo bar]" → "foo bar*"
const result = preprocessQuery('[foo bar]');
expect(result).toContain('foo');
expect(result).toContain('bar');
});
it('returns empty string for pure punctuation', () => {
expect(preprocessQuery('!@#$%^&*()')).toBe('');
});
it('returns empty string for punctuation with operators only', () => {
expect(preprocessQuery('!!! AND *** OR ((()))')).toBe('');
});
it('normalizes C++ style template syntax', () => {
// "vector<int>" → "vector int*"
const result = preprocessQuery('vector<int>');
expect(result).toContain('vector');
expect(result).toContain('int');
});
it('handles colons and semicolons in code snippets', () => {
// "http://example.com; function()" → extracts searchable terms
const result = preprocessQuery('http://example.com; function()');
expect(result).toContain('http');
expect(result).toContain('example');
expect(result).toContain('com');
expect(result).toContain('function');
});
it('normalizes arithmetic operators', () => {
// "a + b * c" → "a b c*"
const result = preprocessQuery('a + b * c');
// Should extract terms, but skip operators
const terms = result.split(/\s+/).filter((t) => !['AND', 'OR', 'NOT'].includes(t));
expect(terms.length).toBeGreaterThan(0);
});
it('returns single searchable term with wildcard when >=3 chars', () => {
const result = preprocessQuery('!!!hello!!!');
expect(result).toBe('hello*');
});
it('returns single short term without wildcard', () => {
const result = preprocessQuery('!!!ab!!!');
expect(result).toBe('ab');
});
});
// ---------------------------------------------------------------------------
// computeTrustScore
// ---------------------------------------------------------------------------
describe('computeTrustScore', () => {
const now = new Date();
function makeRepo(overrides: Record<string, unknown> = {}) {
return {
id: '/test/repo',
title: 'Test',
description: null,
source: 'github' as const,
sourceUrl: 'https://github.com/test/repo',
branch: 'main',
state: 'indexed' as const,
totalSnippets: 0,
totalTokens: 0,
trustScore: 0,
benchmarkScore: 0,
stars: null,
githubToken: null,
lastIndexedAt: null,
createdAt: now,
updatedAt: now,
...overrides
};
}
it('returns 0 for a repo with no qualifying attributes', () => {
const repo = makeRepo({ source: 'local', state: 'pending', description: null, stars: null });
expect(computeTrustScore(repo)).toBe(0);
});
it('awards 1 point for github source', () => {
const repo = makeRepo({ source: 'github', state: 'pending', description: null, stars: null });
expect(computeTrustScore(repo)).toBe(1);
});
it('awards 1 point for indexed state', () => {
const repo = makeRepo({ source: 'local', state: 'indexed', description: null, stars: null });
expect(computeTrustScore(repo)).toBe(1);
});
it('awards 1 point for having a description', () => {
const repo = makeRepo({
source: 'local',
state: 'pending',
description: 'A library',
stars: null
});
expect(computeTrustScore(repo)).toBe(1);
});
it('caps score at 10', () => {
const repo = makeRepo({
source: 'github',
state: 'indexed',
description: 'A great library',
stars: 1_000_000,
totalSnippets: 10_000
});
expect(computeTrustScore(repo)).toBeLessThanOrEqual(10);
});
it('computes star score on log10 scale', () => {
// 9999 stars: log10(10000) = 4 → min(4, 4) = 4
const repo = makeRepo({ source: 'local', state: 'pending', description: null, stars: 9999 });
const score = computeTrustScore(repo);
expect(score).toBeCloseTo(Math.min(4, Math.log10(10000)), 1);
});
it('awards documentation coverage proportionally (500 snippets = 1 pt, 1500 = 3 pts)', () => {
// 500 snippets → min(3, 500/500) = 1.0
const repo500 = makeRepo({
source: 'local',
state: 'pending',
description: null,
stars: null,
totalSnippets: 500
});
expect(computeTrustScore(repo500)).toBeCloseTo(1, 1);
// 1500 snippets → min(3, 1500/500) = 3.0
const repo1500 = makeRepo({
source: 'local',
state: 'pending',
description: null,
stars: null,
totalSnippets: 1500
});
expect(computeTrustScore(repo1500)).toBeCloseTo(3, 1);
});
});
// ---------------------------------------------------------------------------
// SearchService.searchSnippets
// ---------------------------------------------------------------------------
describe('SearchService.searchSnippets', () => {
let client: Database.Database;
let service: SearchService;
let repoId: string;
let docId: string;
beforeEach(() => {
client = createTestDb();
service = new SearchService(client);
repoId = seedRepo(client);
docId = seedDocument(client, repoId);
});
it('returns results matching a simple keyword', () => {
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'The quick brown fox jumps over the lazy dog',
title: 'Fox story'
});
const results = service.searchSnippets('fox', { repositoryId: repoId });
expect(results.length).toBeGreaterThan(0);
expect(results[0].snippet.title).toBe('Fox story');
});
it('returns empty array for a blank query', () => {
const results = service.searchSnippets(' ', { repositoryId: repoId });
expect(results).toHaveLength(0);
});
it('returns empty array when no snippets match', () => {
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'Hello world'
});
const results = service.searchSnippets('zzznomatch', { repositoryId: repoId });
expect(results).toHaveLength(0);
});
it('filters by repositoryId — does not return snippets from other repos', () => {
const otherRepoId = seedRepo(client, { id: '/other/repo', title: 'Other Repo' });
const otherDocId = seedDocument(client, otherRepoId);
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'TypeScript generics tutorial'
});
seedSnippet(client, {
repositoryId: otherRepoId,
documentId: otherDocId,
content: 'TypeScript generics advanced'
});
const results = service.searchSnippets('TypeScript generics', { repositoryId: repoId });
expect(results.every((r) => r.snippet.repositoryId === repoId)).toBe(true);
});
it('filters by type when provided', () => {
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'TypeScript interface definition',
type: 'info'
});
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'TypeScript interface example',
type: 'code',
language: 'typescript'
});
const codeResults = service.searchSnippets('TypeScript interface', {
repositoryId: repoId,
type: 'code'
});
expect(codeResults.every((r) => r.snippet.type === 'code')).toBe(true);
const infoResults = service.searchSnippets('TypeScript interface', {
repositoryId: repoId,
type: 'info'
});
expect(infoResults.every((r) => r.snippet.type === 'info')).toBe(true);
});
it('filters by versionId when provided', () => {
const versionId = seedVersion(client, repoId, 'v1.0.0');
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'Versioned React hooks documentation',
versionId
});
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'React hooks documentation (unversioned)',
versionId: null
});
const results = service.searchSnippets('React hooks', {
repositoryId: repoId,
versionId
});
expect(results.every((r) => r.snippet.versionId === versionId)).toBe(true);
});
it('respects limit and offset', () => {
for (let i = 0; i < 5; i++) {
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: `pagination content item number ${i} relevant`
});
}
const page1 = service.searchSnippets('pagination content', {
repositoryId: repoId,
limit: 2,
offset: 0
});
const page2 = service.searchSnippets('pagination content', {
repositoryId: repoId,
limit: 2,
offset: 2
});
expect(page1.length).toBeLessThanOrEqual(2);
expect(page2.length).toBeLessThanOrEqual(2);
if (page1.length > 0 && page2.length > 0) {
// Pages must not overlap.
const ids1 = new Set(page1.map((r) => r.snippet.id));
expect(page2.some((r) => ids1.has(r.snippet.id))).toBe(false);
}
});
it('returns scores (negative BM25 values)', () => {
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'SQLite full text search tutorial'
});
const results = service.searchSnippets('SQLite full text search', { repositoryId: repoId });
expect(results.length).toBeGreaterThan(0);
// BM25 returns negative values for matched documents.
expect(results[0].score).toBeLessThan(0);
});
it('includes repository metadata in results', () => {
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'repository metadata check'
});
const results = service.searchSnippets('metadata check', { repositoryId: repoId });
expect(results.length).toBeGreaterThan(0);
expect(results[0].repository.id).toBe(repoId);
expect(results[0].repository.title).toBe('Test Repo');
});
it('uses porter stemmer — matches stemmed forms', () => {
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'running tests efficiently'
});
// "run" should match "running" via porter stemmer.
const results = service.searchSnippets('run', { repositoryId: repoId });
expect(results.length).toBeGreaterThan(0);
});
it('uses prefix wildcard — partial word matches', () => {
seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'authentication middleware pattern'
});
// preprocessQuery appends '*' to tokens >= 3 chars.
const results = service.searchSnippets('authen', { repositoryId: repoId });
expect(results.length).toBeGreaterThan(0);
});
});
// ---------------------------------------------------------------------------
// SearchService.searchRepositories
// ---------------------------------------------------------------------------
describe('SearchService.searchRepositories', () => {
let client: Database.Database;
let service: SearchService;
beforeEach(() => {
client = createTestDb();
service = new SearchService(client);
});
it('returns empty array when no indexed repos match', () => {
seedRepo(client, { id: '/unrelated/lib', title: 'Unrelated Library' });
const results = service.searchRepositories({ libraryName: 'react' });
expect(results).toHaveLength(0);
});
it('finds a repo by title', () => {
seedRepo(client, { id: '/facebook/react', title: 'React', state: 'indexed' });
const results = service.searchRepositories({ libraryName: 'react' });
expect(results.length).toBeGreaterThan(0);
expect(results[0].repository.id).toBe('/facebook/react');
});
it('exact match ranks above prefix match', () => {
seedRepo(client, { id: '/facebook/react', title: 'React', state: 'indexed' });
seedRepo(client, { id: '/some/reactive', title: 'Reactive Lib', state: 'indexed' });
const results = service.searchRepositories({ libraryName: 'React' });
expect(results[0].repository.title).toBe('React');
});
it('excludes non-indexed repositories', () => {
seedRepo(client, { id: '/facebook/react', title: 'React', state: 'pending' });
const results = service.searchRepositories({ libraryName: 'react' });
expect(results).toHaveLength(0);
});
it('includes versions in results', () => {
const repoId = seedRepo(client, { id: '/facebook/react', title: 'React', state: 'indexed' });
seedVersion(client, repoId, 'v18.0.0');
seedVersion(client, repoId, 'v17.0.0');
const results = service.searchRepositories({ libraryName: 'react' });
expect(results.length).toBeGreaterThan(0);
expect(results[0].versions.length).toBe(2);
});
it('respects the limit option', () => {
for (let i = 0; i < 5; i++) {
seedRepo(client, {
id: `/test/lib${i}`,
title: `Test Library ${i}`,
state: 'indexed'
});
}
const results = service.searchRepositories({ libraryName: 'library', limit: 2 });
expect(results.length).toBeLessThanOrEqual(2);
});
it('returns a composite score for each result', () => {
seedRepo(client, { id: '/facebook/react', title: 'React', state: 'indexed' });
const results = service.searchRepositories({ libraryName: 'react' });
expect(results.length).toBeGreaterThan(0);
expect(typeof results[0].score).toBe('number');
expect(results[0].score).toBeGreaterThan(0);
});
it('matches on repository description', () => {
seedRepo(client, {
id: '/some/lib',
title: 'Some Library',
description: 'A react-compatible UI toolkit',
state: 'indexed'
});
const results = service.searchRepositories({ libraryName: 'react-compatible' });
expect(results.length).toBeGreaterThan(0);
});
});
// ---------------------------------------------------------------------------
// formatLibraryResults
// ---------------------------------------------------------------------------
describe('formatLibraryResults', () => {
it('returns no-match message for empty results', () => {
expect(formatLibraryResults([])).toBe('No libraries found matching your search.');
});
it('formats a single result with versions', () => {
const now = new Date();
const results: Parameters<typeof formatLibraryResults>[0] = [
{
repository: {
id: '/facebook/react',
title: 'React',
description: 'A JavaScript library for building user interfaces',
source: 'github',
sourceUrl: 'https://github.com/facebook/react',
branch: 'main',
state: 'indexed',
totalSnippets: 1000,
totalTokens: 50000,
trustScore: 8.5,
benchmarkScore: 0,
stars: 200000,
githubToken: null,
lastIndexedAt: null,
createdAt: now,
updatedAt: now
},
versions: [
{
id: '/facebook/react/v18',
repositoryId: '/facebook/react',
tag: 'v18',
title: 'React 18', commitHash: null, state: 'indexed',
totalSnippets: 1000,
indexedAt: null,
createdAt: now
}
],
score: 150
}
];
const output = formatLibraryResults(results);
expect(output).toContain('1. React');
expect(output).toContain('Library ID: /facebook/react');
expect(output).toContain('Snippets: 1000');
expect(output).toContain('Trust Score: 8.5/10');
expect(output).toContain('v18');
});
it('shows "default branch" when no versions are present', () => {
const now = new Date();
const results: Parameters<typeof formatLibraryResults>[0] = [
{
repository: {
id: '/test/lib',
title: 'Test Lib',
description: null,
source: 'local',
sourceUrl: '/path/to/lib',
branch: 'main',
state: 'indexed',
totalSnippets: 0,
totalTokens: 0,
trustScore: 0,
benchmarkScore: 0,
stars: null,
githubToken: null,
lastIndexedAt: null,
createdAt: now,
updatedAt: now
},
versions: [],
score: 50
}
];
const output = formatLibraryResults(results);
expect(output).toContain('default branch');
});
});
// ---------------------------------------------------------------------------
// formatSnippetResults
// ---------------------------------------------------------------------------
describe('formatSnippetResults', () => {
const now = new Date();
function makeSnippetResult(overrides: Partial<Parameters<typeof formatSnippetResults>[0][number]> = {}): Parameters<typeof formatSnippetResults>[0][number] {
return {
snippet: {
id: crypto.randomUUID(),
documentId: crypto.randomUUID(),
repositoryId: '/test/repo',
versionId: null,
type: 'info',
title: 'My Title',
content: 'Some content here.',
language: null,
breadcrumb: null,
tokenCount: 10,
createdAt: now
},
score: -1.5,
repository: { id: '/test/repo', title: 'Test Repo' },
...overrides
};
}
it('returns empty string for no results and no rules', () => {
expect(formatSnippetResults([])).toBe('');
});
it('prepends library rules when provided', () => {
const output = formatSnippetResults([], ['Use TypeScript', 'Prefer const']);
expect(output).toContain('## Library Rules');
expect(output).toContain('- Use TypeScript');
expect(output).toContain('- Prefer const');
});
it('formats an info snippet with title and breadcrumb', () => {
const result = makeSnippetResult({
snippet: {
id: crypto.randomUUID(),
documentId: crypto.randomUUID(),
repositoryId: '/test/repo',
versionId: null,
type: 'info',
title: 'Getting Started',
content: 'Install the package using npm.',
language: null,
breadcrumb: 'Docs > Intro',
tokenCount: 5,
createdAt: now
}
});
const output = formatSnippetResults([result]);
expect(output).toContain('### Getting Started');
expect(output).toContain('*Docs > Intro*');
expect(output).toContain('Install the package using npm.');
});
it('formats a code snippet with fenced code block', () => {
const result = makeSnippetResult({
snippet: {
id: crypto.randomUUID(),
documentId: crypto.randomUUID(),
repositoryId: '/test/repo',
versionId: null,
type: 'code',
title: 'Example',
content: 'const x = 1;',
language: 'typescript',
breadcrumb: null,
tokenCount: 5,
createdAt: now
}
});
const output = formatSnippetResults([result]);
expect(output).toContain('```typescript');
expect(output).toContain('const x = 1;');
expect(output).toContain('```');
});
it('separates multiple results with horizontal rules', () => {
const r1 = makeSnippetResult();
const r2 = makeSnippetResult();
const output = formatSnippetResults([r1, r2]);
expect(output).toContain('---');
});
it('omits title/breadcrumb lines when they are null', () => {
const result = makeSnippetResult({
snippet: {
id: crypto.randomUUID(),
documentId: crypto.randomUUID(),
repositoryId: '/test/repo',
versionId: null,
type: 'info',
title: null,
content: 'Bare content.',
language: null,
breadcrumb: null,
tokenCount: 3,
createdAt: now
}
});
const output = formatSnippetResults([result]);
expect(output).not.toContain('###');
expect(output).toContain('Bare content.');
});
});