/** * Unit tests for HybridSearchService, VectorSearch, and RRF (TRUEREF-0008). * * Uses an in-memory SQLite database and a mock EmbeddingProvider. * No real network calls are made. */ import { describe, it, expect, beforeEach } from 'vitest'; import Database from 'better-sqlite3'; import { readFileSync } from 'node:fs'; import { join } from 'node:path'; import { SearchService } from './search.service.js'; import { HybridSearchService } from './hybrid.search.service.js'; import { VectorSearch, cosineSimilarity } from './vector.search.js'; import { reciprocalRankFusion } from './rrf.js'; import type { EmbeddingProvider, EmbeddingVector } from '../embeddings/provider.js'; import { loadSqliteVec } from '../db/sqlite-vec.js'; import { SqliteVecStore } from './sqlite-vec.store.js'; // --------------------------------------------------------------------------- // In-memory DB factory // --------------------------------------------------------------------------- function createTestDb(): Database.Database { const client = new Database(':memory:'); client.pragma('foreign_keys = ON'); loadSqliteVec(client); const migrationsFolder = join(import.meta.dirname, '../db/migrations'); // Run all migrations in order const migrations = [ '0000_large_master_chief.sql', '0001_quick_nighthawk.sql', '0002_silky_stellaris.sql', '0003_multiversion_config.sql', '0004_complete_sentry.sql', '0005_fix_stage_defaults.sql', '0006_yielding_centennial.sql' ]; for (const migrationFile of migrations) { const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8'); const statements = migrationSql .split('--> statement-breakpoint') .map((s) => s.trim()) .filter(Boolean); for (const stmt of statements) { client.exec(stmt); } } const ftsSql = readFileSync(join(import.meta.dirname, '../db/fts.sql'), 'utf-8'); client.exec(ftsSql); return client; } // --------------------------------------------------------------------------- // Seed helpers // --------------------------------------------------------------------------- const NOW_S = Math.floor(Date.now() / 1000); function seedRepo(client: Database.Database, id = '/test/repo'): string { client .prepare( `INSERT OR IGNORE INTO repositories (id, title, source, source_url, state, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?)` ) .run(id, 'Test Repo', 'github', `https://github.com${id}`, 'indexed', NOW_S, NOW_S); return id; } function seedDocument(client: Database.Database, repositoryId: string): string { const docId = crypto.randomUUID(); client .prepare( `INSERT INTO documents (id, repository_id, file_path, checksum, indexed_at) VALUES (?, ?, ?, ?, ?)` ) .run(docId, repositoryId, 'README.md', 'abc', NOW_S); return docId; } function seedSnippet( client: Database.Database, opts: { repositoryId: string; documentId: string; content: string; title?: string | null; type?: 'code' | 'info'; } ): string { const id = crypto.randomUUID(); client .prepare( `INSERT INTO snippets (id, document_id, repository_id, type, title, content, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)` ) .run( id, opts.documentId, opts.repositoryId, opts.type ?? 'info', opts.title ?? null, opts.content, NOW_S ); return id; } function seedEmbedding( client: Database.Database, snippetId: string, values: number[], profileId = 'local-default', model = 'test-model' ): void { const f32 = new Float32Array(values); client .prepare( `INSERT OR REPLACE INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at) VALUES (?, ?, ?, ?, ?, ?)` ) .run(snippetId, profileId, model, values.length, Buffer.from(f32.buffer), NOW_S); new SqliteVecStore(client).upsertEmbedding(profileId, snippetId, f32); } // --------------------------------------------------------------------------- // Mock EmbeddingProvider // --------------------------------------------------------------------------- function makeMockProvider(returnValues: number[][] = [[1, 0, 0, 0]]): EmbeddingProvider { return { name: 'mock', dimensions: returnValues[0]?.length ?? 4, model: 'test-model', async embed(texts: string[]): Promise { return texts.map((_, i) => { const vals = returnValues[i % returnValues.length]; return { values: new Float32Array(vals), dimensions: vals.length, model: 'test-model' }; }); }, async isAvailable(): Promise { return true; } }; } function makeNoopProvider(): EmbeddingProvider { return { name: 'noop', dimensions: 0, model: 'none', async embed(): Promise { return []; }, async isAvailable(): Promise { return false; } }; } // =========================================================================== // cosineSimilarity // =========================================================================== describe('cosineSimilarity', () => { it('returns 1.0 for identical vectors', () => { const v = new Float32Array([1, 2, 3]); expect(cosineSimilarity(v, v)).toBeCloseTo(1.0, 5); }); it('returns 0.0 for orthogonal vectors', () => { const a = new Float32Array([1, 0]); const b = new Float32Array([0, 1]); expect(cosineSimilarity(a, b)).toBeCloseTo(0.0, 5); }); it('returns -1.0 for opposite vectors', () => { const a = new Float32Array([1, 0]); const b = new Float32Array([-1, 0]); expect(cosineSimilarity(a, b)).toBeCloseTo(-1.0, 5); }); it('returns 0 for zero-magnitude vector', () => { const a = new Float32Array([0, 0]); const b = new Float32Array([1, 2]); expect(cosineSimilarity(a, b)).toBe(0); }); it('throws when dimensions do not match', () => { const a = new Float32Array([1, 2]); const b = new Float32Array([1, 2, 3]); expect(() => cosineSimilarity(a, b)).toThrow('dimension mismatch'); }); it('computes correct similarity for non-trivial vectors', () => { // [1,1] · [1,0] = 1; |[1,1]| = sqrt(2); |[1,0]| = 1 → 1/sqrt(2) ≈ 0.7071 const a = new Float32Array([1, 1]); const b = new Float32Array([1, 0]); expect(cosineSimilarity(a, b)).toBeCloseTo(1 / Math.sqrt(2), 4); }); }); // =========================================================================== // reciprocalRankFusion // =========================================================================== describe('reciprocalRankFusion', () => { it('returns empty array for empty inputs', () => { expect(reciprocalRankFusion([], [])).toHaveLength(0); }); it('fuses a single list preserving order', () => { const ranking = [ { id: 'a', score: 10 }, { id: 'b', score: 5 }, { id: 'c', score: 1 } ]; const result = reciprocalRankFusion(ranking); expect(result.map((r) => r.id)).toEqual(['a', 'b', 'c']); }); it('deduplicates items appearing in multiple lists', () => { const r1 = [{ id: 'a', score: 1 }]; const r2 = [{ id: 'a', score: 1 }]; const result = reciprocalRankFusion(r1, r2); expect(result.filter((r) => r.id === 'a')).toHaveLength(1); }); it('boosts items appearing in multiple lists', () => { // 'a' appears in both rankings at rank 0. // 'b' appears only in r1 at rank 1. // 'a' should outscore 'b'. const r1 = [ { id: 'a', score: 1 }, { id: 'b', score: 0.5 } ]; const r2 = [{ id: 'a', score: 1 }]; const result = reciprocalRankFusion(r1, r2); const aScore = result.find((r) => r.id === 'a')!.rrfScore; const bScore = result.find((r) => r.id === 'b')!.rrfScore; expect(aScore).toBeGreaterThan(bScore); }); it('assigns higher rrfScore to higher-ranked items', () => { const ranking = [ { id: 'first', score: 100 }, { id: 'second', score: 50 } ]; const result = reciprocalRankFusion(ranking); expect(result[0].id).toBe('first'); expect(result[0].rrfScore).toBeGreaterThan(result[1].rrfScore); }); it('handles three lists correctly', () => { const r1 = [ { id: 'a', score: 1 }, { id: 'b', score: 0 } ]; const r2 = [ { id: 'b', score: 1 }, { id: 'c', score: 0 } ]; const r3 = [ { id: 'a', score: 1 }, { id: 'c', score: 0 } ]; const result = reciprocalRankFusion(r1, r2, r3); // 'a' appears first in r1 and r3 → higher combined score than 'b' or 'c'. expect(result[0].id).toBe('a'); expect(result.map((r) => r.id)).toContain('b'); expect(result.map((r) => r.id)).toContain('c'); }); it('produces positive rrfScores', () => { const ranking = [{ id: 'x', score: 0 }]; const result = reciprocalRankFusion(ranking); expect(result[0].rrfScore).toBeGreaterThan(0); }); }); // =========================================================================== // VectorSearch // =========================================================================== describe('VectorSearch', () => { let client: Database.Database; let repoId: string; let docId: string; beforeEach(() => { client = createTestDb(); repoId = seedRepo(client); docId = seedDocument(client, repoId); }); it('returns empty array when no embeddings exist', () => { const vs = new VectorSearch(client); const results = vs.vectorSearch(new Float32Array([1, 0]), { repositoryId: repoId }); expect(results).toHaveLength(0); }); it('returns results sorted by descending cosine similarity', () => { const s1 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'alpha' }); const s2 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'beta' }); const s3 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'gamma' }); // Query: [1, 0, 0, 0] // s1: [1, 0, 0, 0] → similarity 1.0 (most similar) // s2: [0, 1, 0, 0] → similarity 0.0 // s3: [0, 0, 1, 0] → similarity 0.0 seedEmbedding(client, s1, [1, 0, 0, 0]); seedEmbedding(client, s2, [0, 1, 0, 0]); seedEmbedding(client, s3, [0, 0, 1, 0]); const vs = new VectorSearch(client); const results = vs.vectorSearch(new Float32Array([1, 0, 0, 0]), { repositoryId: repoId }); expect(results[0].snippetId).toBe(s1); expect(results[0].score).toBeCloseTo(1.0, 4); expect(results.length).toBe(3); }); it('respects the limit parameter', () => { for (let i = 0; i < 5; i++) { const id = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: `item ${i}` }); seedEmbedding(client, id, [i * 0.1, 1 - i * 0.1]); } const vs = new VectorSearch(client); const results = vs.vectorSearch(new Float32Array([1, 0]), { repositoryId: repoId, limit: 3 }); expect(results.length).toBeLessThanOrEqual(3); }); it('only returns snippets from the specified repository', () => { const otherRepoId = seedRepo(client, '/other/repo'); const otherDocId = seedDocument(client, otherRepoId); const s1 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'mine' }); const s2 = seedSnippet(client, { repositoryId: otherRepoId, documentId: otherDocId, content: 'theirs' }); seedEmbedding(client, s1, [1, 0]); seedEmbedding(client, s2, [1, 0]); const vs = new VectorSearch(client); const results = vs.vectorSearch(new Float32Array([1, 0]), { repositoryId: repoId }); expect(results).toHaveLength(1); expect(results[0].snippetId).toBe(s1); }); it('handles embeddings with negative values', () => { const s1 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'neg' }); seedEmbedding(client, s1, [-0.5, 0.5]); const vs = new VectorSearch(client); const results = vs.vectorSearch(new Float32Array([-0.5, 0.5]), { repositoryId: repoId }); expect(results[0].score).toBeCloseTo(1.0, 4); }); it('filters by profileId using per-profile vec tables', () => { client .prepare( `INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` ) .run('secondary-profile', 'local-transformers', 'Secondary', 1, 0, 'test-model', 2, '{}', NOW_S, NOW_S); const defaultSnippet = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'default profile snippet' }); const secondarySnippet = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'secondary profile snippet' }); seedEmbedding(client, defaultSnippet, [1, 0], 'local-default'); seedEmbedding(client, secondarySnippet, [1, 0], 'secondary-profile'); const vs = new VectorSearch(client); const defaultResults = vs.vectorSearch(new Float32Array([1, 0]), { repositoryId: repoId, profileId: 'local-default' }); const secondaryResults = vs.vectorSearch(new Float32Array([1, 0]), { repositoryId: repoId, profileId: 'secondary-profile' }); expect(defaultResults.map((result) => result.snippetId)).toEqual([defaultSnippet]); expect(secondaryResults.map((result) => result.snippetId)).toEqual([secondarySnippet]); }); }); // =========================================================================== // HybridSearchService // =========================================================================== describe('HybridSearchService', () => { let client: Database.Database; let searchService: SearchService; let repoId: string; let docId: string; beforeEach(() => { client = createTestDb(); searchService = new SearchService(client); repoId = seedRepo(client); docId = seedDocument(client, repoId); }); // ------------------------------------------------------------------------- // FTS5-only mode (no provider / alpha = 0) // ------------------------------------------------------------------------- it('returns FTS5 results when embeddingProvider is null', async () => { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'hello world' }); const svc = new HybridSearchService(client, searchService, null); const { results } = await svc.search('hello', { repositoryId: repoId }); expect(results.length).toBeGreaterThan(0); expect(results[0].snippet.content).toBe('hello world'); }); it('returns FTS5 results when alpha = 0', async () => { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'alpha zero test' }); const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); const { results } = await svc.search('alpha zero', { repositoryId: repoId, alpha: 0 }); expect(results.length).toBeGreaterThan(0); }); it('returns empty array when FTS5 query is blank and no provider', async () => { const svc = new HybridSearchService(client, searchService, null); const { results } = await svc.search(' ', { repositoryId: repoId }); expect(results).toHaveLength(0); }); it('falls back to FTS5 when noop provider returns empty embeddings', async () => { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'noop fallback test' }); const svc = new HybridSearchService(client, searchService, makeNoopProvider()); const { results } = await svc.search('noop fallback', { repositoryId: repoId }); expect(results.length).toBeGreaterThan(0); }); // ------------------------------------------------------------------------- // Hybrid mode // ------------------------------------------------------------------------- it('returns results when hybrid mode is active (alpha = 0.5)', async () => { const s1 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'hybrid search keyword match' }); seedEmbedding(client, s1, [1, 0, 0, 0]); const provider = makeMockProvider([[1, 0, 0, 0]]); const svc = new HybridSearchService(client, searchService, provider); const { results } = await svc.search('hybrid search', { repositoryId: repoId, alpha: 0.5 }); expect(results.length).toBeGreaterThan(0); }); it('deduplicates snippets appearing in both FTS5 and vector results', async () => { const s1 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'deduplicate this snippet carefully' }); seedEmbedding(client, s1, [1, 0]); const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); const { results } = await svc.search('deduplicate snippet', { repositoryId: repoId, alpha: 0.5 }); // No duplicate IDs. const ids = results.map((r) => r.snippet.id); expect(ids.length).toBe(new Set(ids).size); }); it('respects the limit option', async () => { for (let i = 0; i < 10; i++) { const id = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: `pagination test item number ${i} relevant content here` }); seedEmbedding(client, id, [1, i * 0.1]); } const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); const { results } = await svc.search('pagination test', { repositoryId: repoId, limit: 3, alpha: 0.5 }); expect(results.length).toBeLessThanOrEqual(3); }); // ------------------------------------------------------------------------- // Pure vector mode // ------------------------------------------------------------------------- it('returns vector-ranked results when alpha = 1', async () => { const s1 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'vector only mode' }); const s2 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'unrelated content' }); // s1 is aligned with the query; s2 is orthogonal. seedEmbedding(client, s1, [1, 0]); seedEmbedding(client, s2, [0, 1]); const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); const { results } = await svc.search('anything', { repositoryId: repoId, alpha: 1 }); expect(results[0].snippet.id).toBe(s1); }); // ------------------------------------------------------------------------- // Result structure // ------------------------------------------------------------------------- it('results include snippet and repository metadata', async () => { const s1 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'metadata check snippet content', title: 'My Snippet Title' }); seedEmbedding(client, s1, [1, 0]); const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); const { results } = await svc.search('metadata check', { repositoryId: repoId, alpha: 0.5 }); expect(results.length).toBeGreaterThan(0); const first = results[0]; expect(first.snippet.id).toBeDefined(); expect(first.snippet.content).toBeDefined(); expect(first.repository.id).toBe(repoId); expect(first.repository.title).toBe('Test Repo'); }); it('all results belong to the requested repository', async () => { const otherRepoId = seedRepo(client, '/other/repo'); const otherDocId = seedDocument(client, otherRepoId); for (let i = 0; i < 3; i++) { const id = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: `target repository keyword item ${i}` }); seedEmbedding(client, id, [1, i * 0.1]); } for (let i = 0; i < 3; i++) { const id = seedSnippet(client, { repositoryId: otherRepoId, documentId: otherDocId, content: `other repository keyword item ${i}` }); seedEmbedding(client, id, [1, i * 0.1]); } const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); const { results } = await svc.search('repository keyword', { repositoryId: repoId, alpha: 0.5 }); expect(results.every((r) => r.snippet.repositoryId === repoId)).toBe(true); }); it('filters by snippet type when provided', async () => { const code = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'function example code snippet', type: 'code' }); const info = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'function example info snippet', type: 'info' }); seedEmbedding(client, code, [1, 0]); seedEmbedding(client, info, [1, 0]); const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); const { results: codeResults } = await svc.search('function example', { repositoryId: repoId, type: 'code', alpha: 0.5 }); expect(codeResults.every((r) => r.snippet.type === 'code')).toBe(true); }); // ------------------------------------------------------------------------- // Default alpha // ------------------------------------------------------------------------- it('uses alpha = 0.5 when not specified', async () => { const s1 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'default alpha hybrid test content' }); seedEmbedding(client, s1, [1, 0]); const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); // Should not throw and should return results. const { results } = await svc.search('default alpha hybrid', { repositoryId: repoId }); expect(Array.isArray(results)).toBe(true); }); it('filters by versionId — excludes snippets from other versions', async () => { const client = createTestDb(); const repoId = seedRepo(client); const docId = seedDocument(client, repoId); // Create two versions client .prepare( `INSERT INTO repository_versions (id, repository_id, tag, state, total_snippets, created_at) VALUES (?, ?, ?, ?, ?, ?)` ) .run('/test/repo/v1.0', repoId, 'v1.0', 'indexed', 0, NOW_S); client .prepare( `INSERT INTO repository_versions (id, repository_id, tag, state, total_snippets, created_at) VALUES (?, ?, ?, ?, ?, ?)` ) .run('/test/repo/v2.0', repoId, 'v2.0', 'indexed', 0, NOW_S); // Create embedding profile client .prepare( `INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` ) .run('test-profile', 'local-transformers', 'Test', 1, 1, 'test-model', 4, '{}', NOW_S, NOW_S); // Snippet A in version 1.0 const snippetA = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'version 1 text' }); client .prepare('UPDATE snippets SET version_id = ? WHERE id = ?') .run('/test/repo/v1.0', snippetA); // Seed embedding for snippetA const embedA = [0.1, 0.2, 0.3, 0.4]; const f32A = new Float32Array(embedA); client .prepare( `INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at) VALUES (?, ?, ?, ?, ?, ?)` ) .run(snippetA, 'test-profile', 'test-model', 4, Buffer.from(f32A.buffer), NOW_S); // Snippet B in version 2.0 const snippetB = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'version 2 text' }); client .prepare('UPDATE snippets SET version_id = ? WHERE id = ?') .run('/test/repo/v2.0', snippetB); // Seed embedding for snippetB const embedB = [0.2, 0.3, 0.4, 0.5]; const f32B = new Float32Array(embedB); client .prepare( `INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at) VALUES (?, ?, ?, ?, ?, ?)` ) .run(snippetB, 'test-profile', 'test-model', 4, Buffer.from(f32B.buffer), NOW_S); const vs = new VectorSearch(client); const query = new Float32Array([0.1, 0.2, 0.3, 0.4]); // Query with versionId v1.0 should only return snippetA const resultsV1 = vs.vectorSearch(query, { repositoryId: repoId, versionId: '/test/repo/v1.0', profileId: 'test-profile' }); expect(resultsV1.map((r) => r.snippetId)).toContain(snippetA); expect(resultsV1.map((r) => r.snippetId)).not.toContain(snippetB); // Query with versionId v2.0 should only return snippetB const resultsV2 = vs.vectorSearch(query, { repositoryId: repoId, versionId: '/test/repo/v2.0', profileId: 'test-profile' }); expect(resultsV2.map((r) => r.snippetId)).not.toContain(snippetA); expect(resultsV2.map((r) => r.snippetId)).toContain(snippetB); // Query without versionId should return both const resultsAll = vs.vectorSearch(query, { repositoryId: repoId, profileId: 'test-profile' }); expect(resultsAll.map((r) => r.snippetId)).toContain(snippetA); expect(resultsAll.map((r) => r.snippetId)).toContain(snippetB); }); it('searchMode=keyword never calls provider.embed()', async () => { const client = createTestDb(); const repoId = seedRepo(client); const docId = seedDocument(client, repoId); seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'keyword only test' }); let embedCalled = false; const mockProvider: EmbeddingProvider = { name: 'mock', dimensions: 4, model: 'test-model', async embed() { embedCalled = true; return []; }, async isAvailable() { return true; } }; const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, mockProvider); const { results } = await hybridService.search('keyword', { repositoryId: repoId, searchMode: 'keyword' }); expect(embedCalled).toBe(false); expect(results.length).toBeGreaterThan(0); }); it('searchMode=semantic uses only vector search', async () => { const client = createTestDb(); const repoId = seedRepo(client); const docId = seedDocument(client, repoId); // Create profile client .prepare( `INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` ) .run('test-profile', 'local-transformers', 'Test', 1, 1, 'test-model', 4, '{}', NOW_S, NOW_S); const snippetId = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'semantic test' }); // Seed embedding const embed = [0.5, 0.5, 0.5, 0.5]; const f32 = new Float32Array(embed); client .prepare( `INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at) VALUES (?, ?, ?, ?, ?, ?)` ) .run(snippetId, 'test-profile', 'test-model', 4, Buffer.from(f32.buffer), NOW_S); const mockProvider: EmbeddingProvider = { name: 'mock', dimensions: 4, model: 'test-model', async embed() { return [ { values: new Float32Array([0.5, 0.5, 0.5, 0.5]), dimensions: 4, model: 'test-model' } ]; }, async isAvailable() { return true; } }; const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, mockProvider); const { results } = await hybridService.search('semantic', { repositoryId: repoId, searchMode: 'semantic', profileId: 'test-profile' }); // Should return results (alpha=1 pure vector mode) expect(results.length).toBeGreaterThan(0); }); // ------------------------------------------------------------------------- // Semantic-only mode (searchMode=semantic) // ------------------------------------------------------------------------- it('searchMode=semantic returns empty array when provider is null', async () => { const client = createTestDb(); const repoId = seedRepo(client); const docId = seedDocument(client, repoId); seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'semantic null provider test' }); const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, null); const { results } = await hybridService.search('test query', { repositoryId: repoId, searchMode: 'semantic' }); // No provider: semantic mode should return empty. expect(results).toHaveLength(0); }); it('searchMode=semantic returns empty array for blank query', async () => { const client = createTestDb(); const repoId = seedRepo(client); seedDocument(client, repoId); const mockProvider = makeMockProvider([[1, 0, 0, 0]]); const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, mockProvider); const { results } = await hybridService.search(' ', { repositoryId: repoId, searchMode: 'semantic' }); // Blank query: should return empty. expect(results).toHaveLength(0); }); it('searchMode=semantic falls back to empty when provider fails', async () => { const client = createTestDb(); const repoId = seedRepo(client); seedDocument(client, repoId); const noopProvider = makeNoopProvider(); const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, noopProvider); const { results } = await hybridService.search('test query', { repositoryId: repoId, searchMode: 'semantic' }); // Provider fails: should return empty (not fall back to FTS). expect(results).toHaveLength(0); }); // ------------------------------------------------------------------------- // Fallback behavior in auto/hybrid modes // ------------------------------------------------------------------------- it('searchMode=auto falls back to vector when FTS has no results and provider available', async () => { const client = createTestDb(); const repoId = seedRepo(client); const docId = seedDocument(client, repoId); // Create profile client .prepare( `INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` ) .run('test-profile', 'local-transformers', 'Test', 1, 1, 'test-model', 4, '{}', NOW_S, NOW_S); // Seed a snippet that won't match punctuation-heavy query through FTS. const snippetId = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'example content' }); // Seed embedding for the snippet. const embed = [0.5, 0.5, 0.5, 0.5]; const f32 = new Float32Array(embed); client .prepare( `INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at) VALUES (?, ?, ?, ?, ?, ?)` ) .run(snippetId, 'test-profile', 'test-model', 4, Buffer.from(f32.buffer), NOW_S); // Mock provider that always returns a matching embedding. const mockProvider: EmbeddingProvider = { name: 'mock', dimensions: 4, model: 'test-model', async embed() { return [ { values: new Float32Array([0.5, 0.5, 0.5, 0.5]), dimensions: 4, model: 'test-model' } ]; }, async isAvailable() { return true; } }; const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, mockProvider); // Query with heavy punctuation that preprocesses to nothing. const { results } = await hybridService.search('!!!@@@###', { repositoryId: repoId, searchMode: 'auto', profileId: 'test-profile' }); // Should have fallen back to vector search and found the snippet. expect(results.length).toBeGreaterThan(0); expect(results[0].snippet.id).toBe(snippetId); }); it('searchMode=auto continues with FTS results when available', async () => { const client = createTestDb(); const repoId = seedRepo(client); const docId = seedDocument(client, repoId); // Seed FTS-matchable snippet. seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'hello world example' }); const mockProvider = makeMockProvider([[1, 0]]); const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, mockProvider); const { results } = await hybridService.search('hello', { repositoryId: repoId, searchMode: 'auto' }); // Should find results through FTS (not fallback to vector). expect(results.length).toBeGreaterThan(0); }); it('searchMode=hybrid falls back to vector on no FTS results', async () => { const client = createTestDb(); const repoId = seedRepo(client); const docId = seedDocument(client, repoId); // Create profile client .prepare( `INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` ) .run('test-profile', 'local-transformers', 'Test', 1, 1, 'test-model', 4, '{}', NOW_S, NOW_S); // Seed snippet with vector embedding only. const snippetId = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'vector search test' }); const embed = [0.7, 0.3, 0.2, 0.1]; const f32 = new Float32Array(embed); client .prepare( `INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at) VALUES (?, ?, ?, ?, ?, ?)` ) .run(snippetId, 'test-profile', 'test-model', 4, Buffer.from(f32.buffer), NOW_S); const mockProvider: EmbeddingProvider = { name: 'mock', dimensions: 4, model: 'test-model', async embed() { return [ { values: new Float32Array([0.7, 0.3, 0.2, 0.1]), dimensions: 4, model: 'test-model' } ]; }, async isAvailable() { return true; } }; const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, mockProvider); // Query that won't match through FTS after punctuation normalization. const { results } = await hybridService.search('%%%vector%%%', { repositoryId: repoId, searchMode: 'hybrid', alpha: 0.5, profileId: 'test-profile' }); // Should fall back to vector and find the snippet. expect(results.length).toBeGreaterThan(0); }); it('punctuation-heavy query returns empty when no vector provider and FTS preprocesses to nothing', async () => { const client = createTestDb(); const repoId = seedRepo(client); const docId = seedDocument(client, repoId); // No embeddings or provider. seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'example content' }); const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, null); const { results } = await hybridService.search('!!!@@@###$$$', { repositoryId: repoId }); // No provider and FTS preprocesses to empty: should return empty. expect(results).toHaveLength(0); }); });