/** * Unit tests for SearchService (TRUEREF-0006). * * Uses an in-memory SQLite database seeded with known data to verify * BM25 snippet search, library search, query preprocessing, and * response formatting. */ import { describe, it, expect, beforeEach } from 'vitest'; import Database from 'better-sqlite3'; import { readFileSync } from 'node:fs'; import { join } from 'node:path'; import { SearchService } from './search.service'; import { preprocessQuery } from './query-preprocessor'; import { computeTrustScore } from './trust-score'; import { formatLibraryResults, formatSnippetResults } from './formatters'; // --------------------------------------------------------------------------- // In-memory test DB factory // --------------------------------------------------------------------------- function createTestDb(): Database.Database { const client = new Database(':memory:'); client.pragma('foreign_keys = ON'); // Run the migration SQL (split on the drizzle separator). const migrationsFolder = join(import.meta.dirname, '../db/migrations'); const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8'); const statements = migrationSql .split('--> statement-breakpoint') .map((s) => s.trim()) .filter(Boolean); for (const stmt of statements) { client.exec(stmt); } // Apply FTS5 virtual table + triggers. const ftsSql = readFileSync(join(import.meta.dirname, '../db/fts.sql'), 'utf-8'); client.exec(ftsSql); return client; } // --------------------------------------------------------------------------- // Seed helpers // --------------------------------------------------------------------------- const NOW_S = Math.floor(Date.now() / 1000); function seedRepo( client: Database.Database, overrides: { id?: string; title?: string; description?: string | null; source?: string; state?: string; total_snippets?: number; trust_score?: number; stars?: number | null; } = {} ) { const id = overrides.id ?? '/test/repo'; client .prepare( `INSERT INTO repositories (id, title, description, source, source_url, state, total_snippets, trust_score, stars, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` ) .run( id, overrides.title ?? 'Test Repo', overrides.description ?? null, overrides.source ?? 'github', `https://github.com${id}`, overrides.state ?? 'indexed', overrides.total_snippets ?? 0, overrides.trust_score ?? 0, overrides.stars ?? null, NOW_S, NOW_S ); return id; } function seedDocument(client: Database.Database, repositoryId: string): string { const docId = crypto.randomUUID(); client .prepare( `INSERT INTO documents (id, repository_id, file_path, checksum, indexed_at) VALUES (?, ?, ?, ?, ?)` ) .run(docId, repositoryId, 'README.md', 'abc', NOW_S); return docId; } function seedSnippet( client: Database.Database, opts: { repositoryId: string; documentId: string; content: string; title?: string | null; breadcrumb?: string | null; type?: 'code' | 'info'; language?: string | null; versionId?: string | null; } ): string { const id = crypto.randomUUID(); client .prepare( `INSERT INTO snippets (id, document_id, repository_id, version_id, type, title, content, language, breadcrumb, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` ) .run( id, opts.documentId, opts.repositoryId, opts.versionId ?? null, opts.type ?? 'info', opts.title ?? null, opts.content, opts.language ?? null, opts.breadcrumb ?? null, NOW_S ); return id; } function seedVersion(client: Database.Database, repositoryId: string, tag: string): string { const id = `${repositoryId}/${tag}`; client .prepare( `INSERT INTO repository_versions (id, repository_id, tag, state, created_at) VALUES (?, ?, ?, ?, ?)` ) .run(id, repositoryId, tag, 'indexed', NOW_S); return id; } // --------------------------------------------------------------------------- // preprocessQuery // --------------------------------------------------------------------------- describe('preprocessQuery', () => { it('trims and collapses whitespace', () => { expect(preprocessQuery(' hello world ')).toBe('hello world*'); }); it('removes parentheses', () => { expect(preprocessQuery('(hello)')).toBe('hello*'); }); it('appends wildcard to last token when >= 3 chars', () => { expect(preprocessQuery('foo bar baz')).toBe('foo bar baz*'); }); it('does not append wildcard when last token is < 3 chars', () => { expect(preprocessQuery('foo ba')).toBe('foo ba'); }); it('does not double-append wildcard', () => { expect(preprocessQuery('hello*')).toBe('hello*'); }); it('preserves AND / OR / NOT operators', () => { const result = preprocessQuery('hello AND world'); expect(result).toBe('hello AND world*'); }); it('returns empty string for blank input', () => { expect(preprocessQuery(' ')).toBe(''); }); it('handles single short token without wildcard', () => { expect(preprocessQuery('ab')).toBe('ab'); }); // Punctuation-heavy and code-like queries it('normalizes code-like queries with slashes', () => { // "foo/bar/baz" should extract searchable terms const result = preprocessQuery('foo/bar/baz'); expect(result).toContain('foo'); expect(result).toContain('bar'); expect(result).toContain('baz'); }); it('extracts terms from dot-notation queries', () => { // "object.method.name" should extract searchable parts const result = preprocessQuery('object.method.name'); expect(result).toContain('object'); expect(result).toContain('method'); expect(result).toContain('name'); }); it('handles snake_case identifiers', () => { // "my_function_name" should be preserved const result = preprocessQuery('my_function_name'); expect(result).toContain('my_function_name'); }); it('removes punctuation from parenthesized expressions', () => { // "(hello world)" → "hello world*" const result = preprocessQuery('(hello world)'); expect(result).toContain('hello'); expect(result).toContain('world'); }); it('handles bracket-enclosed content', () => { // "[foo bar]" → "foo bar*" const result = preprocessQuery('[foo bar]'); expect(result).toContain('foo'); expect(result).toContain('bar'); }); it('returns empty string for pure punctuation', () => { expect(preprocessQuery('!@#$%^&*()')).toBe(''); }); it('returns empty string for punctuation with operators only', () => { expect(preprocessQuery('!!! AND *** OR ((()))')).toBe(''); }); it('normalizes C++ style template syntax', () => { // "vector" → "vector int*" const result = preprocessQuery('vector'); expect(result).toContain('vector'); expect(result).toContain('int'); }); it('handles colons and semicolons in code snippets', () => { // "http://example.com; function()" → extracts searchable terms const result = preprocessQuery('http://example.com; function()'); expect(result).toContain('http'); expect(result).toContain('example'); expect(result).toContain('com'); expect(result).toContain('function'); }); it('normalizes arithmetic operators', () => { // "a + b * c" → "a b c*" const result = preprocessQuery('a + b * c'); // Should extract terms, but skip operators const terms = result.split(/\s+/).filter((t) => !['AND', 'OR', 'NOT'].includes(t)); expect(terms.length).toBeGreaterThan(0); }); it('returns single searchable term with wildcard when >=3 chars', () => { const result = preprocessQuery('!!!hello!!!'); expect(result).toBe('hello*'); }); it('returns single short term without wildcard', () => { const result = preprocessQuery('!!!ab!!!'); expect(result).toBe('ab'); }); }); // --------------------------------------------------------------------------- // computeTrustScore // --------------------------------------------------------------------------- describe('computeTrustScore', () => { const now = new Date(); function makeRepo(overrides: Record = {}) { return { id: '/test/repo', title: 'Test', description: null, source: 'github' as const, sourceUrl: 'https://github.com/test/repo', branch: 'main', state: 'indexed' as const, totalSnippets: 0, totalTokens: 0, trustScore: 0, benchmarkScore: 0, stars: null, githubToken: null, lastIndexedAt: null, createdAt: now, updatedAt: now, ...overrides }; } it('returns 0 for a repo with no qualifying attributes', () => { const repo = makeRepo({ source: 'local', state: 'pending', description: null, stars: null }); expect(computeTrustScore(repo)).toBe(0); }); it('awards 1 point for github source', () => { const repo = makeRepo({ source: 'github', state: 'pending', description: null, stars: null }); expect(computeTrustScore(repo)).toBe(1); }); it('awards 1 point for indexed state', () => { const repo = makeRepo({ source: 'local', state: 'indexed', description: null, stars: null }); expect(computeTrustScore(repo)).toBe(1); }); it('awards 1 point for having a description', () => { const repo = makeRepo({ source: 'local', state: 'pending', description: 'A library', stars: null }); expect(computeTrustScore(repo)).toBe(1); }); it('caps score at 10', () => { const repo = makeRepo({ source: 'github', state: 'indexed', description: 'A great library', stars: 1_000_000, totalSnippets: 10_000 }); expect(computeTrustScore(repo)).toBeLessThanOrEqual(10); }); it('computes star score on log10 scale', () => { // 9999 stars: log10(10000) = 4 → min(4, 4) = 4 const repo = makeRepo({ source: 'local', state: 'pending', description: null, stars: 9999 }); const score = computeTrustScore(repo); expect(score).toBeCloseTo(Math.min(4, Math.log10(10000)), 1); }); it('awards documentation coverage proportionally (500 snippets = 1 pt, 1500 = 3 pts)', () => { // 500 snippets → min(3, 500/500) = 1.0 const repo500 = makeRepo({ source: 'local', state: 'pending', description: null, stars: null, totalSnippets: 500 }); expect(computeTrustScore(repo500)).toBeCloseTo(1, 1); // 1500 snippets → min(3, 1500/500) = 3.0 const repo1500 = makeRepo({ source: 'local', state: 'pending', description: null, stars: null, totalSnippets: 1500 }); expect(computeTrustScore(repo1500)).toBeCloseTo(3, 1); }); }); // --------------------------------------------------------------------------- // SearchService.searchSnippets // --------------------------------------------------------------------------- describe('SearchService.searchSnippets', () => { let client: Database.Database; let service: SearchService; let repoId: string; let docId: string; beforeEach(() => { client = createTestDb(); service = new SearchService(client); repoId = seedRepo(client); docId = seedDocument(client, repoId); }); it('returns results matching a simple keyword', () => { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'The quick brown fox jumps over the lazy dog', title: 'Fox story' }); const results = service.searchSnippets('fox', { repositoryId: repoId }); expect(results.length).toBeGreaterThan(0); expect(results[0].snippet.title).toBe('Fox story'); }); it('returns empty array for a blank query', () => { const results = service.searchSnippets(' ', { repositoryId: repoId }); expect(results).toHaveLength(0); }); it('returns empty array when no snippets match', () => { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'Hello world' }); const results = service.searchSnippets('zzznomatch', { repositoryId: repoId }); expect(results).toHaveLength(0); }); it('filters by repositoryId — does not return snippets from other repos', () => { const otherRepoId = seedRepo(client, { id: '/other/repo', title: 'Other Repo' }); const otherDocId = seedDocument(client, otherRepoId); seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'TypeScript generics tutorial' }); seedSnippet(client, { repositoryId: otherRepoId, documentId: otherDocId, content: 'TypeScript generics advanced' }); const results = service.searchSnippets('TypeScript generics', { repositoryId: repoId }); expect(results.every((r) => r.snippet.repositoryId === repoId)).toBe(true); }); it('filters by type when provided', () => { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'TypeScript interface definition', type: 'info' }); seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'TypeScript interface example', type: 'code', language: 'typescript' }); const codeResults = service.searchSnippets('TypeScript interface', { repositoryId: repoId, type: 'code' }); expect(codeResults.every((r) => r.snippet.type === 'code')).toBe(true); const infoResults = service.searchSnippets('TypeScript interface', { repositoryId: repoId, type: 'info' }); expect(infoResults.every((r) => r.snippet.type === 'info')).toBe(true); }); it('filters by versionId when provided', () => { const versionId = seedVersion(client, repoId, 'v1.0.0'); seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'Versioned React hooks documentation', versionId }); seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'React hooks documentation (unversioned)', versionId: null }); const results = service.searchSnippets('React hooks', { repositoryId: repoId, versionId }); expect(results.every((r) => r.snippet.versionId === versionId)).toBe(true); }); it('respects limit and offset', () => { for (let i = 0; i < 5; i++) { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: `pagination content item number ${i} relevant` }); } const page1 = service.searchSnippets('pagination content', { repositoryId: repoId, limit: 2, offset: 0 }); const page2 = service.searchSnippets('pagination content', { repositoryId: repoId, limit: 2, offset: 2 }); expect(page1.length).toBeLessThanOrEqual(2); expect(page2.length).toBeLessThanOrEqual(2); if (page1.length > 0 && page2.length > 0) { // Pages must not overlap. const ids1 = new Set(page1.map((r) => r.snippet.id)); expect(page2.some((r) => ids1.has(r.snippet.id))).toBe(false); } }); it('returns scores (negative BM25 values)', () => { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'SQLite full text search tutorial' }); const results = service.searchSnippets('SQLite full text search', { repositoryId: repoId }); expect(results.length).toBeGreaterThan(0); // BM25 returns negative values for matched documents. expect(results[0].score).toBeLessThan(0); }); it('includes repository metadata in results', () => { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'repository metadata check' }); const results = service.searchSnippets('metadata check', { repositoryId: repoId }); expect(results.length).toBeGreaterThan(0); expect(results[0].repository.id).toBe(repoId); expect(results[0].repository.title).toBe('Test Repo'); }); it('uses porter stemmer — matches stemmed forms', () => { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'running tests efficiently' }); // "run" should match "running" via porter stemmer. const results = service.searchSnippets('run', { repositoryId: repoId }); expect(results.length).toBeGreaterThan(0); }); it('uses prefix wildcard — partial word matches', () => { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'authentication middleware pattern' }); // preprocessQuery appends '*' to tokens >= 3 chars. const results = service.searchSnippets('authen', { repositoryId: repoId }); expect(results.length).toBeGreaterThan(0); }); }); // --------------------------------------------------------------------------- // SearchService.searchRepositories // --------------------------------------------------------------------------- describe('SearchService.searchRepositories', () => { let client: Database.Database; let service: SearchService; beforeEach(() => { client = createTestDb(); service = new SearchService(client); }); it('returns empty array when no indexed repos match', () => { seedRepo(client, { id: '/unrelated/lib', title: 'Unrelated Library' }); const results = service.searchRepositories({ libraryName: 'react' }); expect(results).toHaveLength(0); }); it('finds a repo by title', () => { seedRepo(client, { id: '/facebook/react', title: 'React', state: 'indexed' }); const results = service.searchRepositories({ libraryName: 'react' }); expect(results.length).toBeGreaterThan(0); expect(results[0].repository.id).toBe('/facebook/react'); }); it('exact match ranks above prefix match', () => { seedRepo(client, { id: '/facebook/react', title: 'React', state: 'indexed' }); seedRepo(client, { id: '/some/reactive', title: 'Reactive Lib', state: 'indexed' }); const results = service.searchRepositories({ libraryName: 'React' }); expect(results[0].repository.title).toBe('React'); }); it('excludes non-indexed repositories', () => { seedRepo(client, { id: '/facebook/react', title: 'React', state: 'pending' }); const results = service.searchRepositories({ libraryName: 'react' }); expect(results).toHaveLength(0); }); it('includes versions in results', () => { const repoId = seedRepo(client, { id: '/facebook/react', title: 'React', state: 'indexed' }); seedVersion(client, repoId, 'v18.0.0'); seedVersion(client, repoId, 'v17.0.0'); const results = service.searchRepositories({ libraryName: 'react' }); expect(results.length).toBeGreaterThan(0); expect(results[0].versions.length).toBe(2); }); it('respects the limit option', () => { for (let i = 0; i < 5; i++) { seedRepo(client, { id: `/test/lib${i}`, title: `Test Library ${i}`, state: 'indexed' }); } const results = service.searchRepositories({ libraryName: 'library', limit: 2 }); expect(results.length).toBeLessThanOrEqual(2); }); it('returns a composite score for each result', () => { seedRepo(client, { id: '/facebook/react', title: 'React', state: 'indexed' }); const results = service.searchRepositories({ libraryName: 'react' }); expect(results.length).toBeGreaterThan(0); expect(typeof results[0].score).toBe('number'); expect(results[0].score).toBeGreaterThan(0); }); it('matches on repository description', () => { seedRepo(client, { id: '/some/lib', title: 'Some Library', description: 'A react-compatible UI toolkit', state: 'indexed' }); const results = service.searchRepositories({ libraryName: 'react-compatible' }); expect(results.length).toBeGreaterThan(0); }); }); // --------------------------------------------------------------------------- // formatLibraryResults // --------------------------------------------------------------------------- describe('formatLibraryResults', () => { it('returns no-match message for empty results', () => { expect(formatLibraryResults([])).toBe('No libraries found matching your search.'); }); it('formats a single result with versions', () => { const now = new Date(); const results: Parameters[0] = [ { repository: { id: '/facebook/react', title: 'React', description: 'A JavaScript library for building user interfaces', source: 'github', sourceUrl: 'https://github.com/facebook/react', branch: 'main', state: 'indexed', totalSnippets: 1000, totalTokens: 50000, trustScore: 8.5, benchmarkScore: 0, stars: 200000, githubToken: null, lastIndexedAt: null, createdAt: now, updatedAt: now }, versions: [ { id: '/facebook/react/v18', repositoryId: '/facebook/react', tag: 'v18', title: 'React 18', commitHash: null, state: 'indexed', totalSnippets: 1000, indexedAt: null, createdAt: now } ], score: 150 } ]; const output = formatLibraryResults(results); expect(output).toContain('1. React'); expect(output).toContain('Library ID: /facebook/react'); expect(output).toContain('Snippets: 1000'); expect(output).toContain('Trust Score: 8.5/10'); expect(output).toContain('v18'); }); it('shows "default branch" when no versions are present', () => { const now = new Date(); const results: Parameters[0] = [ { repository: { id: '/test/lib', title: 'Test Lib', description: null, source: 'local', sourceUrl: '/path/to/lib', branch: 'main', state: 'indexed', totalSnippets: 0, totalTokens: 0, trustScore: 0, benchmarkScore: 0, stars: null, githubToken: null, lastIndexedAt: null, createdAt: now, updatedAt: now }, versions: [], score: 50 } ]; const output = formatLibraryResults(results); expect(output).toContain('default branch'); }); }); // --------------------------------------------------------------------------- // formatSnippetResults // --------------------------------------------------------------------------- describe('formatSnippetResults', () => { const now = new Date(); function makeSnippetResult(overrides: Partial[0][number]> = {}): Parameters[0][number] { return { snippet: { id: crypto.randomUUID(), documentId: crypto.randomUUID(), repositoryId: '/test/repo', versionId: null, type: 'info', title: 'My Title', content: 'Some content here.', language: null, breadcrumb: null, tokenCount: 10, createdAt: now }, score: -1.5, repository: { id: '/test/repo', title: 'Test Repo' }, ...overrides }; } it('returns empty string for no results and no rules', () => { expect(formatSnippetResults([])).toBe(''); }); it('prepends library rules when provided', () => { const output = formatSnippetResults([], ['Use TypeScript', 'Prefer const']); expect(output).toContain('## Library Rules'); expect(output).toContain('- Use TypeScript'); expect(output).toContain('- Prefer const'); }); it('formats an info snippet with title and breadcrumb', () => { const result = makeSnippetResult({ snippet: { id: crypto.randomUUID(), documentId: crypto.randomUUID(), repositoryId: '/test/repo', versionId: null, type: 'info', title: 'Getting Started', content: 'Install the package using npm.', language: null, breadcrumb: 'Docs > Intro', tokenCount: 5, createdAt: now } }); const output = formatSnippetResults([result]); expect(output).toContain('### Getting Started'); expect(output).toContain('*Docs > Intro*'); expect(output).toContain('Install the package using npm.'); }); it('formats a code snippet with fenced code block', () => { const result = makeSnippetResult({ snippet: { id: crypto.randomUUID(), documentId: crypto.randomUUID(), repositoryId: '/test/repo', versionId: null, type: 'code', title: 'Example', content: 'const x = 1;', language: 'typescript', breadcrumb: null, tokenCount: 5, createdAt: now } }); const output = formatSnippetResults([result]); expect(output).toContain('```typescript'); expect(output).toContain('const x = 1;'); expect(output).toContain('```'); }); it('separates multiple results with horizontal rules', () => { const r1 = makeSnippetResult(); const r2 = makeSnippetResult(); const output = formatSnippetResults([r1, r2]); expect(output).toContain('---'); }); it('omits title/breadcrumb lines when they are null', () => { const result = makeSnippetResult({ snippet: { id: crypto.randomUUID(), documentId: crypto.randomUUID(), repositoryId: '/test/repo', versionId: null, type: 'info', title: null, content: 'Bare content.', language: null, breadcrumb: null, tokenCount: 3, createdAt: now } }); const output = formatSnippetResults([result]); expect(output).not.toContain('###'); expect(output).toContain('Bare content.'); }); });