fix(FEEDBACK-0001): complete iteration 0 - harden context search
This commit is contained in:
@@ -818,4 +818,246 @@ describe('HybridSearchService', () => {
|
||||
// Should return results (alpha=1 pure vector mode)
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Semantic-only mode (searchMode=semantic)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('searchMode=semantic returns empty array when provider is null', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'semantic null provider test'
|
||||
});
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, null);
|
||||
|
||||
const results = await hybridService.search('test query', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'semantic'
|
||||
});
|
||||
|
||||
// No provider: semantic mode should return empty.
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('searchMode=semantic returns empty array for blank query', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
const mockProvider = makeMockProvider([[1, 0, 0, 0]]);
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
const results = await hybridService.search(' ', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'semantic'
|
||||
});
|
||||
|
||||
// Blank query: should return empty.
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('searchMode=semantic falls back to empty when provider fails', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
const noopProvider = makeNoopProvider();
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, noopProvider);
|
||||
|
||||
const results = await hybridService.search('test query', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'semantic'
|
||||
});
|
||||
|
||||
// Provider fails: should return empty (not fall back to FTS).
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Fallback behavior in auto/hybrid modes
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('searchMode=auto falls back to vector when FTS has no results and provider available', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
// Create profile
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run('test-profile', 'local-transformers', 'Test', 1, 1, 'test-model', 4, '{}', NOW_S, NOW_S);
|
||||
|
||||
// Seed a snippet that won't match punctuation-heavy query through FTS.
|
||||
const snippetId = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'example content'
|
||||
});
|
||||
|
||||
// Seed embedding for the snippet.
|
||||
const embed = [0.5, 0.5, 0.5, 0.5];
|
||||
const f32 = new Float32Array(embed);
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(snippetId, 'test-profile', 'test-model', 4, Buffer.from(f32.buffer), NOW_S);
|
||||
|
||||
// Mock provider that always returns a matching embedding.
|
||||
const mockProvider: EmbeddingProvider = {
|
||||
name: 'mock',
|
||||
dimensions: 4,
|
||||
model: 'test-model',
|
||||
async embed() {
|
||||
return [
|
||||
{
|
||||
values: new Float32Array([0.5, 0.5, 0.5, 0.5]),
|
||||
dimensions: 4,
|
||||
model: 'test-model'
|
||||
}
|
||||
];
|
||||
},
|
||||
async isAvailable() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
// Query with heavy punctuation that preprocesses to nothing.
|
||||
const results = await hybridService.search('!!!@@@###', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'auto',
|
||||
profileId: 'test-profile'
|
||||
});
|
||||
|
||||
// Should have fallen back to vector search and found the snippet.
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
expect(results[0].snippet.id).toBe(snippetId);
|
||||
});
|
||||
|
||||
it('searchMode=auto continues with FTS results when available', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
// Seed FTS-matchable snippet.
|
||||
seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'hello world example'
|
||||
});
|
||||
|
||||
const mockProvider = makeMockProvider([[1, 0]]);
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
const results = await hybridService.search('hello', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'auto'
|
||||
});
|
||||
|
||||
// Should find results through FTS (not fallback to vector).
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('searchMode=hybrid falls back to vector on no FTS results', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
// Create profile
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run('test-profile', 'local-transformers', 'Test', 1, 1, 'test-model', 4, '{}', NOW_S, NOW_S);
|
||||
|
||||
// Seed snippet with vector embedding only.
|
||||
const snippetId = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'vector search test'
|
||||
});
|
||||
|
||||
const embed = [0.7, 0.3, 0.2, 0.1];
|
||||
const f32 = new Float32Array(embed);
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(snippetId, 'test-profile', 'test-model', 4, Buffer.from(f32.buffer), NOW_S);
|
||||
|
||||
const mockProvider: EmbeddingProvider = {
|
||||
name: 'mock',
|
||||
dimensions: 4,
|
||||
model: 'test-model',
|
||||
async embed() {
|
||||
return [
|
||||
{
|
||||
values: new Float32Array([0.7, 0.3, 0.2, 0.1]),
|
||||
dimensions: 4,
|
||||
model: 'test-model'
|
||||
}
|
||||
];
|
||||
},
|
||||
async isAvailable() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
// Query that won't match through FTS after punctuation normalization.
|
||||
const results = await hybridService.search('%%%vector%%%', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'hybrid',
|
||||
alpha: 0.5,
|
||||
profileId: 'test-profile'
|
||||
});
|
||||
|
||||
// Should fall back to vector and find the snippet.
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('punctuation-heavy query returns empty when no vector provider and FTS preprocesses to nothing', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
// No embeddings or provider.
|
||||
seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'example content'
|
||||
});
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, null);
|
||||
|
||||
const results = await hybridService.search('!!!@@@###$$$', {
|
||||
repositoryId: repoId
|
||||
});
|
||||
|
||||
// No provider and FTS preprocesses to empty: should return empty.
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user