feat(TRUEREF-0008): implement hybrid semantic search with RRF
- Cosine similarity vector search over stored embeddings - Reciprocal Rank Fusion (K=60) combining FTS5 + vector rankings - Configurable alpha weight between keyword and semantic search - Graceful degradation to FTS5-only when no embedding provider configured Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
624
src/lib/server/search/hybrid.search.service.test.ts
Normal file
624
src/lib/server/search/hybrid.search.service.test.ts
Normal file
@@ -0,0 +1,624 @@
|
|||||||
|
/**
|
||||||
|
* Unit tests for HybridSearchService, VectorSearch, and RRF (TRUEREF-0008).
|
||||||
|
*
|
||||||
|
* Uses an in-memory SQLite database and a mock EmbeddingProvider.
|
||||||
|
* No real network calls are made.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, beforeEach } from 'vitest';
|
||||||
|
import Database from 'better-sqlite3';
|
||||||
|
import { readFileSync } from 'node:fs';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
|
||||||
|
import { SearchService } from './search.service.js';
|
||||||
|
import { HybridSearchService } from './hybrid.search.service.js';
|
||||||
|
import { VectorSearch, cosineSimilarity } from './vector.search.js';
|
||||||
|
import { reciprocalRankFusion } from './rrf.js';
|
||||||
|
import type { EmbeddingProvider, EmbeddingVector } from '../embeddings/provider.js';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// In-memory DB factory
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function createTestDb(): Database.Database {
|
||||||
|
const client = new Database(':memory:');
|
||||||
|
client.pragma('foreign_keys = ON');
|
||||||
|
|
||||||
|
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||||
|
const migrationSql = readFileSync(
|
||||||
|
join(migrationsFolder, '0000_large_master_chief.sql'),
|
||||||
|
'utf-8'
|
||||||
|
);
|
||||||
|
const statements = migrationSql
|
||||||
|
.split('--> statement-breakpoint')
|
||||||
|
.map((s) => s.trim())
|
||||||
|
.filter(Boolean);
|
||||||
|
for (const stmt of statements) {
|
||||||
|
client.exec(stmt);
|
||||||
|
}
|
||||||
|
|
||||||
|
const ftsSql = readFileSync(join(import.meta.dirname, '../db/fts.sql'), 'utf-8');
|
||||||
|
client.exec(ftsSql);
|
||||||
|
|
||||||
|
return client;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Seed helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const NOW_S = Math.floor(Date.now() / 1000);
|
||||||
|
|
||||||
|
function seedRepo(client: Database.Database, id = '/test/repo'): string {
|
||||||
|
client
|
||||||
|
.prepare(
|
||||||
|
`INSERT OR IGNORE INTO repositories
|
||||||
|
(id, title, source, source_url, state, created_at, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
||||||
|
)
|
||||||
|
.run(id, 'Test Repo', 'github', `https://github.com${id}`, 'indexed', NOW_S, NOW_S);
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
function seedDocument(client: Database.Database, repositoryId: string): string {
|
||||||
|
const docId = crypto.randomUUID();
|
||||||
|
client
|
||||||
|
.prepare(
|
||||||
|
`INSERT INTO documents (id, repository_id, file_path, checksum, indexed_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?)`
|
||||||
|
)
|
||||||
|
.run(docId, repositoryId, 'README.md', 'abc', NOW_S);
|
||||||
|
return docId;
|
||||||
|
}
|
||||||
|
|
||||||
|
function seedSnippet(
|
||||||
|
client: Database.Database,
|
||||||
|
opts: {
|
||||||
|
repositoryId: string;
|
||||||
|
documentId: string;
|
||||||
|
content: string;
|
||||||
|
title?: string | null;
|
||||||
|
type?: 'code' | 'info';
|
||||||
|
}
|
||||||
|
): string {
|
||||||
|
const id = crypto.randomUUID();
|
||||||
|
client
|
||||||
|
.prepare(
|
||||||
|
`INSERT INTO snippets
|
||||||
|
(id, document_id, repository_id, type, title, content, created_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
||||||
|
)
|
||||||
|
.run(
|
||||||
|
id,
|
||||||
|
opts.documentId,
|
||||||
|
opts.repositoryId,
|
||||||
|
opts.type ?? 'info',
|
||||||
|
opts.title ?? null,
|
||||||
|
opts.content,
|
||||||
|
NOW_S
|
||||||
|
);
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
function seedEmbedding(
|
||||||
|
client: Database.Database,
|
||||||
|
snippetId: string,
|
||||||
|
values: number[],
|
||||||
|
model = 'test-model'
|
||||||
|
): void {
|
||||||
|
const f32 = new Float32Array(values);
|
||||||
|
client
|
||||||
|
.prepare(
|
||||||
|
`INSERT OR REPLACE INTO snippet_embeddings
|
||||||
|
(snippet_id, model, dimensions, embedding, created_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?)`
|
||||||
|
)
|
||||||
|
.run(snippetId, model, values.length, Buffer.from(f32.buffer), NOW_S);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Mock EmbeddingProvider
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function makeMockProvider(
|
||||||
|
returnValues: number[][] = [[1, 0, 0, 0]]
|
||||||
|
): EmbeddingProvider {
|
||||||
|
return {
|
||||||
|
name: 'mock',
|
||||||
|
dimensions: returnValues[0]?.length ?? 4,
|
||||||
|
model: 'test-model',
|
||||||
|
async embed(texts: string[]): Promise<EmbeddingVector[]> {
|
||||||
|
return texts.map((_, i) => {
|
||||||
|
const vals = returnValues[i % returnValues.length];
|
||||||
|
return {
|
||||||
|
values: new Float32Array(vals),
|
||||||
|
dimensions: vals.length,
|
||||||
|
model: 'test-model'
|
||||||
|
};
|
||||||
|
});
|
||||||
|
},
|
||||||
|
async isAvailable(): Promise<boolean> {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeNoopProvider(): EmbeddingProvider {
|
||||||
|
return {
|
||||||
|
name: 'noop',
|
||||||
|
dimensions: 0,
|
||||||
|
model: 'none',
|
||||||
|
async embed(_texts: string[]): Promise<EmbeddingVector[]> {
|
||||||
|
return [];
|
||||||
|
},
|
||||||
|
async isAvailable(): Promise<boolean> {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===========================================================================
|
||||||
|
// cosineSimilarity
|
||||||
|
// ===========================================================================
|
||||||
|
|
||||||
|
describe('cosineSimilarity', () => {
|
||||||
|
it('returns 1.0 for identical vectors', () => {
|
||||||
|
const v = new Float32Array([1, 2, 3]);
|
||||||
|
expect(cosineSimilarity(v, v)).toBeCloseTo(1.0, 5);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns 0.0 for orthogonal vectors', () => {
|
||||||
|
const a = new Float32Array([1, 0]);
|
||||||
|
const b = new Float32Array([0, 1]);
|
||||||
|
expect(cosineSimilarity(a, b)).toBeCloseTo(0.0, 5);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns -1.0 for opposite vectors', () => {
|
||||||
|
const a = new Float32Array([1, 0]);
|
||||||
|
const b = new Float32Array([-1, 0]);
|
||||||
|
expect(cosineSimilarity(a, b)).toBeCloseTo(-1.0, 5);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns 0 for zero-magnitude vector', () => {
|
||||||
|
const a = new Float32Array([0, 0]);
|
||||||
|
const b = new Float32Array([1, 2]);
|
||||||
|
expect(cosineSimilarity(a, b)).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws when dimensions do not match', () => {
|
||||||
|
const a = new Float32Array([1, 2]);
|
||||||
|
const b = new Float32Array([1, 2, 3]);
|
||||||
|
expect(() => cosineSimilarity(a, b)).toThrow('dimension mismatch');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('computes correct similarity for non-trivial vectors', () => {
|
||||||
|
// [1,1] · [1,0] = 1; |[1,1]| = sqrt(2); |[1,0]| = 1 → 1/sqrt(2) ≈ 0.7071
|
||||||
|
const a = new Float32Array([1, 1]);
|
||||||
|
const b = new Float32Array([1, 0]);
|
||||||
|
expect(cosineSimilarity(a, b)).toBeCloseTo(1 / Math.sqrt(2), 4);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ===========================================================================
|
||||||
|
// reciprocalRankFusion
|
||||||
|
// ===========================================================================
|
||||||
|
|
||||||
|
describe('reciprocalRankFusion', () => {
|
||||||
|
it('returns empty array for empty inputs', () => {
|
||||||
|
expect(reciprocalRankFusion([], [])).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('fuses a single list preserving order', () => {
|
||||||
|
const ranking = [
|
||||||
|
{ id: 'a', score: 10 },
|
||||||
|
{ id: 'b', score: 5 },
|
||||||
|
{ id: 'c', score: 1 }
|
||||||
|
];
|
||||||
|
const result = reciprocalRankFusion(ranking);
|
||||||
|
expect(result.map((r) => r.id)).toEqual(['a', 'b', 'c']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('deduplicates items appearing in multiple lists', () => {
|
||||||
|
const r1 = [{ id: 'a', score: 1 }];
|
||||||
|
const r2 = [{ id: 'a', score: 1 }];
|
||||||
|
const result = reciprocalRankFusion(r1, r2);
|
||||||
|
expect(result.filter((r) => r.id === 'a')).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('boosts items appearing in multiple lists', () => {
|
||||||
|
// 'a' appears in both rankings at rank 0.
|
||||||
|
// 'b' appears only in r1 at rank 1.
|
||||||
|
// 'a' should outscore 'b'.
|
||||||
|
const r1 = [
|
||||||
|
{ id: 'a', score: 1 },
|
||||||
|
{ id: 'b', score: 0.5 }
|
||||||
|
];
|
||||||
|
const r2 = [{ id: 'a', score: 1 }];
|
||||||
|
const result = reciprocalRankFusion(r1, r2);
|
||||||
|
const aScore = result.find((r) => r.id === 'a')!.rrfScore;
|
||||||
|
const bScore = result.find((r) => r.id === 'b')!.rrfScore;
|
||||||
|
expect(aScore).toBeGreaterThan(bScore);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('assigns higher rrfScore to higher-ranked items', () => {
|
||||||
|
const ranking = [
|
||||||
|
{ id: 'first', score: 100 },
|
||||||
|
{ id: 'second', score: 50 }
|
||||||
|
];
|
||||||
|
const result = reciprocalRankFusion(ranking);
|
||||||
|
expect(result[0].id).toBe('first');
|
||||||
|
expect(result[0].rrfScore).toBeGreaterThan(result[1].rrfScore);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles three lists correctly', () => {
|
||||||
|
const r1 = [{ id: 'a', score: 1 }, { id: 'b', score: 0 }];
|
||||||
|
const r2 = [{ id: 'b', score: 1 }, { id: 'c', score: 0 }];
|
||||||
|
const r3 = [{ id: 'a', score: 1 }, { id: 'c', score: 0 }];
|
||||||
|
const result = reciprocalRankFusion(r1, r2, r3);
|
||||||
|
// 'a' appears first in r1 and r3 → higher combined score than 'b' or 'c'.
|
||||||
|
expect(result[0].id).toBe('a');
|
||||||
|
expect(result.map((r) => r.id)).toContain('b');
|
||||||
|
expect(result.map((r) => r.id)).toContain('c');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('produces positive rrfScores', () => {
|
||||||
|
const ranking = [{ id: 'x', score: 0 }];
|
||||||
|
const result = reciprocalRankFusion(ranking);
|
||||||
|
expect(result[0].rrfScore).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ===========================================================================
|
||||||
|
// VectorSearch
|
||||||
|
// ===========================================================================
|
||||||
|
|
||||||
|
describe('VectorSearch', () => {
|
||||||
|
let client: Database.Database;
|
||||||
|
let repoId: string;
|
||||||
|
let docId: string;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
client = createTestDb();
|
||||||
|
repoId = seedRepo(client);
|
||||||
|
docId = seedDocument(client, repoId);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty array when no embeddings exist', () => {
|
||||||
|
const vs = new VectorSearch(client);
|
||||||
|
const results = vs.vectorSearch(new Float32Array([1, 0]), repoId);
|
||||||
|
expect(results).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns results sorted by descending cosine similarity', () => {
|
||||||
|
const s1 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'alpha' });
|
||||||
|
const s2 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'beta' });
|
||||||
|
const s3 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'gamma' });
|
||||||
|
|
||||||
|
// Query: [1, 0, 0, 0]
|
||||||
|
// s1: [1, 0, 0, 0] → similarity 1.0 (most similar)
|
||||||
|
// s2: [0, 1, 0, 0] → similarity 0.0
|
||||||
|
// s3: [0, 0, 1, 0] → similarity 0.0
|
||||||
|
seedEmbedding(client, s1, [1, 0, 0, 0]);
|
||||||
|
seedEmbedding(client, s2, [0, 1, 0, 0]);
|
||||||
|
seedEmbedding(client, s3, [0, 0, 1, 0]);
|
||||||
|
|
||||||
|
const vs = new VectorSearch(client);
|
||||||
|
const results = vs.vectorSearch(new Float32Array([1, 0, 0, 0]), repoId);
|
||||||
|
|
||||||
|
expect(results[0].snippetId).toBe(s1);
|
||||||
|
expect(results[0].score).toBeCloseTo(1.0, 4);
|
||||||
|
expect(results.length).toBe(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('respects the limit parameter', () => {
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
const id = seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: `item ${i}`
|
||||||
|
});
|
||||||
|
seedEmbedding(client, id, [i * 0.1, 1 - i * 0.1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const vs = new VectorSearch(client);
|
||||||
|
const results = vs.vectorSearch(new Float32Array([1, 0]), repoId, 3);
|
||||||
|
expect(results.length).toBeLessThanOrEqual(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('only returns snippets from the specified repository', () => {
|
||||||
|
const otherRepoId = seedRepo(client, '/other/repo');
|
||||||
|
const otherDocId = seedDocument(client, otherRepoId);
|
||||||
|
|
||||||
|
const s1 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'mine' });
|
||||||
|
const s2 = seedSnippet(client, {
|
||||||
|
repositoryId: otherRepoId,
|
||||||
|
documentId: otherDocId,
|
||||||
|
content: 'theirs'
|
||||||
|
});
|
||||||
|
|
||||||
|
seedEmbedding(client, s1, [1, 0]);
|
||||||
|
seedEmbedding(client, s2, [1, 0]);
|
||||||
|
|
||||||
|
const vs = new VectorSearch(client);
|
||||||
|
const results = vs.vectorSearch(new Float32Array([1, 0]), repoId);
|
||||||
|
|
||||||
|
expect(results).toHaveLength(1);
|
||||||
|
expect(results[0].snippetId).toBe(s1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles embeddings with negative values', () => {
|
||||||
|
const s1 = seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'neg' });
|
||||||
|
seedEmbedding(client, s1, [-0.5, 0.5]);
|
||||||
|
|
||||||
|
const vs = new VectorSearch(client);
|
||||||
|
const results = vs.vectorSearch(new Float32Array([-0.5, 0.5]), repoId);
|
||||||
|
expect(results[0].score).toBeCloseTo(1.0, 4);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ===========================================================================
|
||||||
|
// HybridSearchService
|
||||||
|
// ===========================================================================
|
||||||
|
|
||||||
|
describe('HybridSearchService', () => {
|
||||||
|
let client: Database.Database;
|
||||||
|
let searchService: SearchService;
|
||||||
|
let repoId: string;
|
||||||
|
let docId: string;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
client = createTestDb();
|
||||||
|
searchService = new SearchService(client);
|
||||||
|
repoId = seedRepo(client);
|
||||||
|
docId = seedDocument(client, repoId);
|
||||||
|
});
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// FTS5-only mode (no provider / alpha = 0)
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
it('returns FTS5 results when embeddingProvider is null', async () => {
|
||||||
|
seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'hello world' });
|
||||||
|
|
||||||
|
const svc = new HybridSearchService(client, searchService, null);
|
||||||
|
const results = await svc.search('hello', { repositoryId: repoId });
|
||||||
|
|
||||||
|
expect(results.length).toBeGreaterThan(0);
|
||||||
|
expect(results[0].snippet.content).toBe('hello world');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns FTS5 results when alpha = 0', async () => {
|
||||||
|
seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'alpha zero test' });
|
||||||
|
|
||||||
|
const provider = makeMockProvider([[1, 0]]);
|
||||||
|
const svc = new HybridSearchService(client, searchService, provider);
|
||||||
|
const results = await svc.search('alpha zero', { repositoryId: repoId, alpha: 0 });
|
||||||
|
|
||||||
|
expect(results.length).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty array when FTS5 query is blank and no provider', async () => {
|
||||||
|
const svc = new HybridSearchService(client, searchService, null);
|
||||||
|
const results = await svc.search(' ', { repositoryId: repoId });
|
||||||
|
expect(results).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back to FTS5 when noop provider returns empty embeddings', async () => {
|
||||||
|
seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: 'noop fallback test'
|
||||||
|
});
|
||||||
|
|
||||||
|
const svc = new HybridSearchService(client, searchService, makeNoopProvider());
|
||||||
|
const results = await svc.search('noop fallback', { repositoryId: repoId });
|
||||||
|
|
||||||
|
expect(results.length).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Hybrid mode
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
it('returns results when hybrid mode is active (alpha = 0.5)', async () => {
|
||||||
|
const s1 = seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: 'hybrid search keyword match'
|
||||||
|
});
|
||||||
|
seedEmbedding(client, s1, [1, 0, 0, 0]);
|
||||||
|
|
||||||
|
const provider = makeMockProvider([[1, 0, 0, 0]]);
|
||||||
|
const svc = new HybridSearchService(client, searchService, provider);
|
||||||
|
|
||||||
|
const results = await svc.search('hybrid search', {
|
||||||
|
repositoryId: repoId,
|
||||||
|
alpha: 0.5
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results.length).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('deduplicates snippets appearing in both FTS5 and vector results', async () => {
|
||||||
|
const s1 = seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: 'deduplicate this snippet carefully'
|
||||||
|
});
|
||||||
|
seedEmbedding(client, s1, [1, 0]);
|
||||||
|
|
||||||
|
const provider = makeMockProvider([[1, 0]]);
|
||||||
|
const svc = new HybridSearchService(client, searchService, provider);
|
||||||
|
|
||||||
|
const results = await svc.search('deduplicate snippet', {
|
||||||
|
repositoryId: repoId,
|
||||||
|
alpha: 0.5
|
||||||
|
});
|
||||||
|
|
||||||
|
// No duplicate IDs.
|
||||||
|
const ids = results.map((r) => r.snippet.id);
|
||||||
|
expect(ids.length).toBe(new Set(ids).size);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('respects the limit option', async () => {
|
||||||
|
for (let i = 0; i < 10; i++) {
|
||||||
|
const id = seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: `pagination test item number ${i} relevant content here`
|
||||||
|
});
|
||||||
|
seedEmbedding(client, id, [1, i * 0.1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const provider = makeMockProvider([[1, 0]]);
|
||||||
|
const svc = new HybridSearchService(client, searchService, provider);
|
||||||
|
|
||||||
|
const results = await svc.search('pagination test', {
|
||||||
|
repositoryId: repoId,
|
||||||
|
limit: 3,
|
||||||
|
alpha: 0.5
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results.length).toBeLessThanOrEqual(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Pure vector mode
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
it('returns vector-ranked results when alpha = 1', async () => {
|
||||||
|
const s1 = seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: 'vector only mode'
|
||||||
|
});
|
||||||
|
const s2 = seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: 'unrelated content'
|
||||||
|
});
|
||||||
|
|
||||||
|
// s1 is aligned with the query; s2 is orthogonal.
|
||||||
|
seedEmbedding(client, s1, [1, 0]);
|
||||||
|
seedEmbedding(client, s2, [0, 1]);
|
||||||
|
|
||||||
|
const provider = makeMockProvider([[1, 0]]);
|
||||||
|
const svc = new HybridSearchService(client, searchService, provider);
|
||||||
|
|
||||||
|
const results = await svc.search('anything', {
|
||||||
|
repositoryId: repoId,
|
||||||
|
alpha: 1
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results[0].snippet.id).toBe(s1);
|
||||||
|
});
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Result structure
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
it('results include snippet and repository metadata', async () => {
|
||||||
|
const s1 = seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: 'metadata check snippet content',
|
||||||
|
title: 'My Snippet Title'
|
||||||
|
});
|
||||||
|
seedEmbedding(client, s1, [1, 0]);
|
||||||
|
|
||||||
|
const provider = makeMockProvider([[1, 0]]);
|
||||||
|
const svc = new HybridSearchService(client, searchService, provider);
|
||||||
|
|
||||||
|
const results = await svc.search('metadata check', {
|
||||||
|
repositoryId: repoId,
|
||||||
|
alpha: 0.5
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results.length).toBeGreaterThan(0);
|
||||||
|
const first = results[0];
|
||||||
|
expect(first.snippet.id).toBeDefined();
|
||||||
|
expect(first.snippet.content).toBeDefined();
|
||||||
|
expect(first.repository.id).toBe(repoId);
|
||||||
|
expect(first.repository.title).toBe('Test Repo');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('all results belong to the requested repository', async () => {
|
||||||
|
const otherRepoId = seedRepo(client, '/other/repo');
|
||||||
|
const otherDocId = seedDocument(client, otherRepoId);
|
||||||
|
|
||||||
|
for (let i = 0; i < 3; i++) {
|
||||||
|
const id = seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: `target repository keyword item ${i}`
|
||||||
|
});
|
||||||
|
seedEmbedding(client, id, [1, i * 0.1]);
|
||||||
|
}
|
||||||
|
for (let i = 0; i < 3; i++) {
|
||||||
|
const id = seedSnippet(client, {
|
||||||
|
repositoryId: otherRepoId,
|
||||||
|
documentId: otherDocId,
|
||||||
|
content: `other repository keyword item ${i}`
|
||||||
|
});
|
||||||
|
seedEmbedding(client, id, [1, i * 0.1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const provider = makeMockProvider([[1, 0]]);
|
||||||
|
const svc = new HybridSearchService(client, searchService, provider);
|
||||||
|
|
||||||
|
const results = await svc.search('repository keyword', {
|
||||||
|
repositoryId: repoId,
|
||||||
|
alpha: 0.5
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results.every((r) => r.snippet.repositoryId === repoId)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('filters by snippet type when provided', async () => {
|
||||||
|
const code = seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: 'function example code snippet',
|
||||||
|
type: 'code'
|
||||||
|
});
|
||||||
|
const info = seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: 'function example info snippet',
|
||||||
|
type: 'info'
|
||||||
|
});
|
||||||
|
seedEmbedding(client, code, [1, 0]);
|
||||||
|
seedEmbedding(client, info, [1, 0]);
|
||||||
|
|
||||||
|
const provider = makeMockProvider([[1, 0]]);
|
||||||
|
const svc = new HybridSearchService(client, searchService, provider);
|
||||||
|
|
||||||
|
const codeResults = await svc.search('function example', {
|
||||||
|
repositoryId: repoId,
|
||||||
|
type: 'code',
|
||||||
|
alpha: 0.5
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(codeResults.every((r) => r.snippet.type === 'code')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Default alpha
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
it('uses alpha = 0.5 when not specified', async () => {
|
||||||
|
const s1 = seedSnippet(client, {
|
||||||
|
repositoryId: repoId,
|
||||||
|
documentId: docId,
|
||||||
|
content: 'default alpha hybrid test content'
|
||||||
|
});
|
||||||
|
seedEmbedding(client, s1, [1, 0]);
|
||||||
|
|
||||||
|
const provider = makeMockProvider([[1, 0]]);
|
||||||
|
const svc = new HybridSearchService(client, searchService, provider);
|
||||||
|
|
||||||
|
// Should not throw and should return results.
|
||||||
|
const results = await svc.search('default alpha hybrid', { repositoryId: repoId });
|
||||||
|
expect(Array.isArray(results)).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
226
src/lib/server/search/hybrid.search.service.ts
Normal file
226
src/lib/server/search/hybrid.search.service.ts
Normal file
@@ -0,0 +1,226 @@
|
|||||||
|
/**
|
||||||
|
* HybridSearchService — combines FTS5 keyword search with vector similarity
|
||||||
|
* search using Reciprocal Rank Fusion (RRF) to produce a hybrid ranking.
|
||||||
|
*
|
||||||
|
* When no embedding provider is configured (or alpha = 0), the service
|
||||||
|
* transparently falls back to FTS5-only mode with zero overhead.
|
||||||
|
*
|
||||||
|
* Configuration model:
|
||||||
|
* alpha = 0.0 → FTS5 only
|
||||||
|
* alpha = 0.5 → balanced hybrid (default)
|
||||||
|
* alpha = 1.0 → vector only
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type Database from 'better-sqlite3';
|
||||||
|
import type { EmbeddingProvider } from '../embeddings/provider.js';
|
||||||
|
import type { SnippetSearchResult } from './search.service.js';
|
||||||
|
import { SearchService } from './search.service.js';
|
||||||
|
import { VectorSearch } from './vector.search.js';
|
||||||
|
import { reciprocalRankFusion } from './rrf.js';
|
||||||
|
import type { Snippet } from '$lib/types';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Public interfaces
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export interface HybridSearchOptions {
|
||||||
|
repositoryId: string;
|
||||||
|
versionId?: string;
|
||||||
|
type?: 'code' | 'info';
|
||||||
|
/** Maximum number of results to return. Default: 20. */
|
||||||
|
limit?: number;
|
||||||
|
/**
|
||||||
|
* Blend weight between FTS5 and vector search.
|
||||||
|
* 0.0 = FTS5 only, 1.0 = vector only, 0.5 = balanced.
|
||||||
|
* Default: 0.5.
|
||||||
|
*/
|
||||||
|
alpha?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Global search configuration stored in the settings table under
|
||||||
|
* `search_config`.
|
||||||
|
*/
|
||||||
|
export interface SearchConfig {
|
||||||
|
/** Blend weight (0.0–1.0). Default: 0.5. */
|
||||||
|
alpha: number;
|
||||||
|
/** Maximum results per search. Default: 20. */
|
||||||
|
maxResults: number;
|
||||||
|
/** True when an embedding provider is configured. */
|
||||||
|
enableHybrid: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Raw DB row used when re-fetching snippets by ID
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
interface RawSnippetById {
|
||||||
|
id: string;
|
||||||
|
document_id: string;
|
||||||
|
repository_id: string;
|
||||||
|
version_id: string | null;
|
||||||
|
type: 'code' | 'info';
|
||||||
|
title: string | null;
|
||||||
|
content: string;
|
||||||
|
language: string | null;
|
||||||
|
breadcrumb: string | null;
|
||||||
|
token_count: number | null;
|
||||||
|
created_at: number;
|
||||||
|
repo_id: string;
|
||||||
|
repo_title: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// HybridSearchService
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export class HybridSearchService {
|
||||||
|
private readonly vectorSearch: VectorSearch;
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
private readonly db: Database.Database,
|
||||||
|
private readonly searchService: SearchService,
|
||||||
|
private readonly embeddingProvider: EmbeddingProvider | null
|
||||||
|
) {
|
||||||
|
this.vectorSearch = new VectorSearch(db);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute a hybrid search combining FTS5 and (optionally) vector search.
|
||||||
|
*
|
||||||
|
* When `embeddingProvider` is null or `alpha` is 0, the method returns
|
||||||
|
* FTS5 results directly without embedding the query.
|
||||||
|
*
|
||||||
|
* @param query - Raw search string (preprocessing handled by SearchService).
|
||||||
|
* @param options - Search parameters including repositoryId and alpha blend.
|
||||||
|
* @returns Ranked array of SnippetSearchResult, deduplicated by snippet ID.
|
||||||
|
*/
|
||||||
|
async search(
|
||||||
|
query: string,
|
||||||
|
options: HybridSearchOptions
|
||||||
|
): Promise<SnippetSearchResult[]> {
|
||||||
|
const limit = options.limit ?? 20;
|
||||||
|
const alpha = options.alpha ?? 0.5;
|
||||||
|
|
||||||
|
// Always run FTS5 — it is synchronous and fast.
|
||||||
|
const ftsResults = this.searchService.searchSnippets(query, {
|
||||||
|
repositoryId: options.repositoryId,
|
||||||
|
versionId: options.versionId,
|
||||||
|
type: options.type,
|
||||||
|
limit: limit * 3 // wider candidate pool for fusion
|
||||||
|
});
|
||||||
|
|
||||||
|
// Degenerate cases: no provider or pure FTS5 mode.
|
||||||
|
if (!this.embeddingProvider || alpha === 0) {
|
||||||
|
return ftsResults.slice(0, limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Embed query and run vector search.
|
||||||
|
const embeddings = await this.embeddingProvider.embed([query]);
|
||||||
|
|
||||||
|
// Provider may be a Noop (returns empty array) — fall back gracefully.
|
||||||
|
if (embeddings.length === 0) {
|
||||||
|
return ftsResults.slice(0, limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
const queryEmbedding = embeddings[0].values;
|
||||||
|
|
||||||
|
const vectorResults = this.vectorSearch.vectorSearch(
|
||||||
|
queryEmbedding,
|
||||||
|
options.repositoryId,
|
||||||
|
limit * 3
|
||||||
|
);
|
||||||
|
|
||||||
|
// Pure vector mode: skip RRF and return vector results directly.
|
||||||
|
if (alpha === 1) {
|
||||||
|
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||||
|
return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build ranked lists for RRF. Score field is unused by RRF — only
|
||||||
|
// the array index (rank) matters.
|
||||||
|
const ftsRanked = ftsResults.map((r, i) => ({ id: r.snippet.id, score: i }));
|
||||||
|
const vecRanked = vectorResults.map((r, i) => ({ id: r.snippetId, score: i }));
|
||||||
|
|
||||||
|
const fused = reciprocalRankFusion(ftsRanked, vecRanked);
|
||||||
|
|
||||||
|
const topIds = fused.slice(0, limit).map((r) => r.id);
|
||||||
|
return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Private helpers
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load full snippet + repository data for the given ordered snippet IDs.
|
||||||
|
*
|
||||||
|
* Results are returned in the same order as `ids` so callers receive the
|
||||||
|
* RRF-ranked list intact. Snippets not found in the database (or filtered
|
||||||
|
* out by optional type constraint) are silently omitted.
|
||||||
|
*/
|
||||||
|
private fetchSnippetsByIds(
|
||||||
|
ids: string[],
|
||||||
|
repositoryId: string,
|
||||||
|
type?: 'code' | 'info'
|
||||||
|
): SnippetSearchResult[] {
|
||||||
|
if (ids.length === 0) return [];
|
||||||
|
|
||||||
|
const placeholders = ids.map(() => '?').join(', ');
|
||||||
|
const params: unknown[] = [...ids, repositoryId];
|
||||||
|
let typeClause = '';
|
||||||
|
if (type !== undefined) {
|
||||||
|
typeClause = ' AND s.type = ?';
|
||||||
|
params.push(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
const rows = this.db
|
||||||
|
.prepare<unknown[], RawSnippetById>(
|
||||||
|
`SELECT
|
||||||
|
s.id, s.document_id, s.repository_id, s.version_id, s.type,
|
||||||
|
s.title, s.content, s.language, s.breadcrumb, s.token_count,
|
||||||
|
s.created_at,
|
||||||
|
r.id AS repo_id,
|
||||||
|
r.title AS repo_title
|
||||||
|
FROM snippets s
|
||||||
|
JOIN repositories r ON r.id = s.repository_id
|
||||||
|
WHERE s.id IN (${placeholders})
|
||||||
|
AND s.repository_id = ?${typeClause}`
|
||||||
|
)
|
||||||
|
.all(...params) as RawSnippetById[];
|
||||||
|
|
||||||
|
// Build a map for O(1) lookup, then reconstruct in rank order.
|
||||||
|
const byId = new Map<string, RawSnippetById>();
|
||||||
|
for (const row of rows) {
|
||||||
|
byId.set(row.id, row);
|
||||||
|
}
|
||||||
|
|
||||||
|
const results: SnippetSearchResult[] = [];
|
||||||
|
for (const id of ids) {
|
||||||
|
const row = byId.get(id);
|
||||||
|
if (!row) continue;
|
||||||
|
|
||||||
|
const snippet: Snippet = {
|
||||||
|
id: row.id,
|
||||||
|
documentId: row.document_id,
|
||||||
|
repositoryId: row.repository_id,
|
||||||
|
versionId: row.version_id,
|
||||||
|
type: row.type,
|
||||||
|
title: row.title,
|
||||||
|
content: row.content,
|
||||||
|
language: row.language,
|
||||||
|
breadcrumb: row.breadcrumb,
|
||||||
|
tokenCount: row.token_count,
|
||||||
|
createdAt: new Date(row.created_at * 1000)
|
||||||
|
};
|
||||||
|
|
||||||
|
results.push({
|
||||||
|
snippet,
|
||||||
|
score: 0, // RRF score not mapped to BM25 scale; consumers use rank position.
|
||||||
|
repository: { id: row.repo_id, title: row.repo_title }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
51
src/lib/server/search/rrf.ts
Normal file
51
src/lib/server/search/rrf.ts
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
/**
|
||||||
|
* Reciprocal Rank Fusion (RRF) implementation.
|
||||||
|
*
|
||||||
|
* RRF combines multiple ranked lists into a single fused ranking without
|
||||||
|
* requiring score normalization. The standard constant K=60 is used to
|
||||||
|
* dampen the influence of very high ranks.
|
||||||
|
*
|
||||||
|
* Reference: Cormack, Clarke & Buettcher (2009) — "Reciprocal Rank Fusion
|
||||||
|
* outperforms Condorcet and individual Rank Learning Methods."
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** A single item in a ranked list, identified by an opaque string id. */
|
||||||
|
export interface RankedItem {
|
||||||
|
id: string;
|
||||||
|
score: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Output item produced by RRF. */
|
||||||
|
export interface FusedItem {
|
||||||
|
id: string;
|
||||||
|
rrfScore: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Combine multiple ranked lists using Reciprocal Rank Fusion.
|
||||||
|
*
|
||||||
|
* Each item's contribution per list is `1 / (K + rank + 1)` where rank is
|
||||||
|
* 0-based. Items that appear in multiple lists accumulate contributions from
|
||||||
|
* each list, naturally boosting items ranked highly across several sources.
|
||||||
|
*
|
||||||
|
* @param rankings - One or more arrays of `{ id, score }` items sorted by
|
||||||
|
* descending relevance (index 0 = most relevant).
|
||||||
|
* @returns Fused array sorted by descending rrfScore, deduplicated by id.
|
||||||
|
*/
|
||||||
|
export function reciprocalRankFusion(
|
||||||
|
...rankings: Array<Array<RankedItem>>
|
||||||
|
): Array<FusedItem> {
|
||||||
|
const K = 60; // Standard RRF constant.
|
||||||
|
const scores = new Map<string, number>();
|
||||||
|
|
||||||
|
for (const ranking of rankings) {
|
||||||
|
ranking.forEach(({ id }, rank) => {
|
||||||
|
const current = scores.get(id) ?? 0;
|
||||||
|
scores.set(id, current + 1 / (K + rank + 1));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return Array.from(scores.entries())
|
||||||
|
.map(([id, rrfScore]) => ({ id, rrfScore }))
|
||||||
|
.sort((a, b) => b.rrfScore - a.rrfScore);
|
||||||
|
}
|
||||||
108
src/lib/server/search/vector.search.ts
Normal file
108
src/lib/server/search/vector.search.ts
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
/**
|
||||||
|
* Vector similarity search over stored snippet embeddings.
|
||||||
|
*
|
||||||
|
* SQLite does not natively support vector operations, so cosine similarity is
|
||||||
|
* computed in JavaScript after loading candidate embeddings from the
|
||||||
|
* snippet_embeddings table.
|
||||||
|
*
|
||||||
|
* Performance note: For repositories with > 50k snippets, pre-filtering by
|
||||||
|
* FTS5 candidates before computing cosine similarity is recommended. For v1,
|
||||||
|
* in-memory computation is acceptable.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type Database from 'better-sqlite3';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Types
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export interface VectorSearchResult {
|
||||||
|
snippetId: string;
|
||||||
|
score: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Raw DB row from snippet_embeddings joined with snippets. */
|
||||||
|
interface RawEmbeddingRow {
|
||||||
|
snippet_id: string;
|
||||||
|
embedding: Buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Math helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute cosine similarity between two Float32Array vectors.
|
||||||
|
*
|
||||||
|
* Returns a value in [-1, 1] where 1 is identical direction. Returns 0 when
|
||||||
|
* either vector has zero magnitude to avoid division by zero.
|
||||||
|
*/
|
||||||
|
export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
||||||
|
if (a.length !== b.length) {
|
||||||
|
throw new Error(
|
||||||
|
`Embedding dimension mismatch: ${a.length} vs ${b.length}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let dot = 0;
|
||||||
|
let normA = 0;
|
||||||
|
let normB = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < a.length; i++) {
|
||||||
|
dot += a[i] * b[i];
|
||||||
|
normA += a[i] * a[i];
|
||||||
|
normB += b[i] * b[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
||||||
|
if (denom === 0) return 0;
|
||||||
|
return dot / denom;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// VectorSearch class
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export class VectorSearch {
|
||||||
|
private readonly stmt: Database.Statement<[string], RawEmbeddingRow>;
|
||||||
|
|
||||||
|
constructor(private readonly db: Database.Database) {
|
||||||
|
// Prepare once — reused for every call.
|
||||||
|
this.stmt = this.db.prepare<[string], RawEmbeddingRow>(`
|
||||||
|
SELECT se.snippet_id, se.embedding
|
||||||
|
FROM snippet_embeddings se
|
||||||
|
JOIN snippets s ON s.id = se.snippet_id
|
||||||
|
WHERE s.repository_id = ?
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search stored embeddings by cosine similarity to the query embedding.
|
||||||
|
*
|
||||||
|
* @param queryEmbedding - The embedded representation of the search query.
|
||||||
|
* @param repositoryId - Scope the search to a single repository.
|
||||||
|
* @param limit - Maximum number of results to return. Default: 50.
|
||||||
|
* @returns Results sorted by descending cosine similarity score.
|
||||||
|
*/
|
||||||
|
vectorSearch(
|
||||||
|
queryEmbedding: Float32Array,
|
||||||
|
repositoryId: string,
|
||||||
|
limit = 50
|
||||||
|
): VectorSearchResult[] {
|
||||||
|
const rows = this.stmt.all(repositoryId);
|
||||||
|
|
||||||
|
const scored: VectorSearchResult[] = rows.map((row) => {
|
||||||
|
const embedding = new Float32Array(
|
||||||
|
row.embedding.buffer,
|
||||||
|
row.embedding.byteOffset,
|
||||||
|
row.embedding.byteLength / 4
|
||||||
|
);
|
||||||
|
return {
|
||||||
|
snippetId: row.snippet_id,
|
||||||
|
score: cosineSimilarity(queryEmbedding, embedding)
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user