/** * Vector similarity search over stored snippet embeddings. * * Uses sqlite-vec vector_top_k() for ANN search instead of in-memory cosine * similarity computation over all embeddings. */ import type Database from 'better-sqlite3'; import { SqliteVecStore } from './sqlite-vec.store.js'; // --------------------------------------------------------------------------- // Types // --------------------------------------------------------------------------- export interface VectorSearchResult { snippetId: string; score: number; } export interface VectorSearchOptions { repositoryId: string; versionId?: string; profileId?: string; limit?: number; } // --------------------------------------------------------------------------- // Math helpers // --------------------------------------------------------------------------- /** * Compute cosine similarity between two Float32Array vectors. * * Returns a value in [-1, 1] where 1 is identical direction. Returns 0 when * either vector has zero magnitude to avoid division by zero. */ export function cosineSimilarity(a: Float32Array, b: Float32Array): number { if (a.length !== b.length) { throw new Error(`Embedding dimension mismatch: ${a.length} vs ${b.length}`); } let dot = 0; let normA = 0; let normB = 0; for (let i = 0; i < a.length; i++) { dot += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } const denom = Math.sqrt(normA) * Math.sqrt(normB); if (denom === 0) return 0; return dot / denom; } // --------------------------------------------------------------------------- // VectorSearch class // --------------------------------------------------------------------------- export class VectorSearch { private readonly sqliteVecStore: SqliteVecStore; constructor(private readonly db: Database.Database) { this.sqliteVecStore = new SqliteVecStore(db); } /** * Search stored embeddings by cosine similarity to the query embedding. * * Uses in-memory cosine similarity computation. The vec_embedding column * stores raw Float32 bytes for forward compatibility with vector-capable * libSQL builds; scoring is performed in JS using the same bytes. * * @param queryEmbedding - The embedded representation of the search query. * @param options - Search options including repositoryId, optional versionId, profileId, and limit. * @returns Results sorted by descending cosine similarity score. */ vectorSearch(queryEmbedding: Float32Array, options: VectorSearchOptions): VectorSearchResult[] { return this.sqliteVecStore .queryNearestNeighbors(queryEmbedding, options) .map((result) => ({ snippetId: result.snippetId, score: result.score })); } }