Files
trueref/src/lib/server/search/vector.search.ts
2026-04-01 14:09:19 +02:00

85 lines
2.6 KiB
TypeScript

/**
* Vector similarity search over stored snippet embeddings.
*
* Uses sqlite-vec vector_top_k() for ANN search instead of in-memory cosine
* similarity computation over all embeddings.
*/
import type Database from 'better-sqlite3';
import { SqliteVecStore } from './sqlite-vec.store.js';
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
export interface VectorSearchResult {
snippetId: string;
score: number;
}
export interface VectorSearchOptions {
repositoryId: string;
versionId?: string;
profileId?: string;
limit?: number;
}
// ---------------------------------------------------------------------------
// Math helpers
// ---------------------------------------------------------------------------
/**
* Compute cosine similarity between two Float32Array vectors.
*
* Returns a value in [-1, 1] where 1 is identical direction. Returns 0 when
* either vector has zero magnitude to avoid division by zero.
*/
export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
if (a.length !== b.length) {
throw new Error(`Embedding dimension mismatch: ${a.length} vs ${b.length}`);
}
let dot = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
const denom = Math.sqrt(normA) * Math.sqrt(normB);
if (denom === 0) return 0;
return dot / denom;
}
// ---------------------------------------------------------------------------
// VectorSearch class
// ---------------------------------------------------------------------------
export class VectorSearch {
private readonly sqliteVecStore: SqliteVecStore;
constructor(private readonly db: Database.Database) {
this.sqliteVecStore = new SqliteVecStore(db);
}
/**
* Search stored embeddings by cosine similarity to the query embedding.
*
* Uses in-memory cosine similarity computation. The vec_embedding column
* stores raw Float32 bytes for forward compatibility with vector-capable
* libSQL builds; scoring is performed in JS using the same bytes.
*
* @param queryEmbedding - The embedded representation of the search query.
* @param options - Search options including repositoryId, optional versionId, profileId, and limit.
* @returns Results sorted by descending cosine similarity score.
*/
vectorSearch(queryEmbedding: Float32Array, options: VectorSearchOptions): VectorSearchResult[] {
return this.sqliteVecStore
.queryNearestNeighbors(queryEmbedding, options)
.map((result) => ({ snippetId: result.snippetId, score: result.score }));
}
}