feat(TRUEREF-0023): add sqlite-vec search pipeline
This commit is contained in:
@@ -1,16 +1,12 @@
|
||||
/**
|
||||
* Vector similarity search over stored snippet embeddings.
|
||||
*
|
||||
* SQLite does not natively support vector operations, so cosine similarity is
|
||||
* computed in JavaScript after loading candidate embeddings from the
|
||||
* snippet_embeddings table.
|
||||
*
|
||||
* Performance note: For repositories with > 50k snippets, pre-filtering by
|
||||
* FTS5 candidates before computing cosine similarity is recommended. For v1,
|
||||
* in-memory computation is acceptable.
|
||||
* Uses sqlite-vec vector_top_k() for ANN search instead of in-memory cosine
|
||||
* similarity computation over all embeddings.
|
||||
*/
|
||||
|
||||
import type Database from 'better-sqlite3';
|
||||
import { SqliteVecStore } from './sqlite-vec.store.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
@@ -28,12 +24,6 @@ export interface VectorSearchOptions {
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
/** Raw DB row from snippet_embeddings joined with snippets. */
|
||||
interface RawEmbeddingRow {
|
||||
snippet_id: string;
|
||||
embedding: Buffer;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Math helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -69,46 +59,26 @@ export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class VectorSearch {
|
||||
constructor(private readonly db: Database.Database) {}
|
||||
private readonly sqliteVecStore: SqliteVecStore;
|
||||
|
||||
constructor(private readonly db: Database.Database) {
|
||||
this.sqliteVecStore = new SqliteVecStore(db);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search stored embeddings by cosine similarity to the query embedding.
|
||||
*
|
||||
* Uses in-memory cosine similarity computation. The vec_embedding column
|
||||
* stores raw Float32 bytes for forward compatibility with vector-capable
|
||||
* libSQL builds; scoring is performed in JS using the same bytes.
|
||||
*
|
||||
* @param queryEmbedding - The embedded representation of the search query.
|
||||
* @param options - Search options including repositoryId, optional versionId, profileId, and limit.
|
||||
* @returns Results sorted by descending cosine similarity score.
|
||||
*/
|
||||
vectorSearch(queryEmbedding: Float32Array, options: VectorSearchOptions): VectorSearchResult[] {
|
||||
const { repositoryId, versionId, profileId = 'local-default', limit = 50 } = options;
|
||||
|
||||
let sql = `
|
||||
SELECT se.snippet_id, se.embedding
|
||||
FROM snippet_embeddings se
|
||||
JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ?
|
||||
AND se.profile_id = ?
|
||||
`;
|
||||
const params: unknown[] = [repositoryId, profileId];
|
||||
|
||||
if (versionId) {
|
||||
sql += ' AND s.version_id = ?';
|
||||
params.push(versionId);
|
||||
}
|
||||
|
||||
const rows = this.db.prepare<unknown[], RawEmbeddingRow>(sql).all(...params);
|
||||
|
||||
const scored: VectorSearchResult[] = rows.map((row) => {
|
||||
const embedding = new Float32Array(
|
||||
row.embedding.buffer,
|
||||
row.embedding.byteOffset,
|
||||
row.embedding.byteLength / 4
|
||||
);
|
||||
return {
|
||||
snippetId: row.snippet_id,
|
||||
score: cosineSimilarity(queryEmbedding, embedding)
|
||||
};
|
||||
});
|
||||
|
||||
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
||||
return this.sqliteVecStore
|
||||
.queryNearestNeighbors(queryEmbedding, options)
|
||||
.map((result) => ({ snippetId: result.snippetId, score: result.score }));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user