import type Database from 'better-sqlite3'; import { loadSqliteVec, quoteSqliteIdentifier, sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js'; export interface SqliteVecQueryOptions { repositoryId: string; versionId?: string; profileId?: string; limit?: number; } export interface SqliteVecQueryResult { snippetId: string; score: number; distance: number; } interface ProfileDimensionsRow { dimensions: number; } interface StoredDimensionsRow { count: number; min_dimensions: number | null; max_dimensions: number | null; } interface SnippetRowidRow { rowid: number; } interface RawKnnRow { snippet_id: string; distance: number; } interface CanonicalEmbeddingRow { snippet_id: string; embedding: Buffer; } interface StoredEmbeddingRef { profile_id: string; snippet_id: string; } interface ProfileStoreTables { vectorTableName: string; rowidTableName: string; quotedVectorTableName: string; quotedRowidTableName: string; dimensions: number; } function toEmbeddingBuffer(values: Float32Array): Buffer { return Buffer.from(values.buffer, values.byteOffset, values.byteLength); } function distanceToScore(distance: number): number { return 1 / (1 + distance); } export class SqliteVecStore { constructor(private readonly db: Database.Database) {} ensureProfileStore(profileId: string, preferredDimensions?: number): number { const tables = this.getProfileStoreTables(profileId, preferredDimensions); this.db.exec(` CREATE TABLE IF NOT EXISTS ${tables.quotedRowidTableName} ( rowid INTEGER PRIMARY KEY, snippet_id TEXT NOT NULL UNIQUE REFERENCES snippets(id) ON DELETE CASCADE ); `); this.db.exec(` CREATE VIRTUAL TABLE IF NOT EXISTS ${tables.quotedVectorTableName} USING vec0(embedding float[${tables.dimensions}]); `); return tables.dimensions; } upsertEmbedding(profileId: string, snippetId: string, embedding: Float32Array): void { const tables = this.getProfileStoreTables(profileId, embedding.length); this.ensureProfileStore(profileId, tables.dimensions); const existingRow = this.db .prepare<[string], SnippetRowidRow>( `SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?` ) .get(snippetId); const embeddingBuffer = toEmbeddingBuffer(embedding); if (existingRow) { this.db .prepare<[Buffer, number]>( `UPDATE ${tables.quotedVectorTableName} SET embedding = ? WHERE rowid = ?` ) .run(embeddingBuffer, existingRow.rowid); return; } const insertResult = this.db .prepare<[Buffer]>(`INSERT INTO ${tables.quotedVectorTableName} (embedding) VALUES (?)`) .run(embeddingBuffer); this.db .prepare<[number, string]>( `INSERT INTO ${tables.quotedRowidTableName} (rowid, snippet_id) VALUES (?, ?)` ) .run(Number(insertResult.lastInsertRowid), snippetId); } upsertEmbeddingBuffer( profileId: string, snippetId: string, embedding: Buffer, dimensions?: number ): void { const vector = new Float32Array( embedding.buffer, embedding.byteOffset, dimensions ?? Math.floor(embedding.byteLength / Float32Array.BYTES_PER_ELEMENT) ); this.upsertEmbedding(profileId, snippetId, vector); } deleteEmbedding(profileId: string, snippetId: string): void { const tables = this.getProfileStoreTables(profileId); this.ensureProfileStore(profileId); const existingRow = this.db .prepare<[string], SnippetRowidRow>( `SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?` ) .get(snippetId); if (!existingRow) { return; } this.db .prepare<[number]>(`DELETE FROM ${tables.quotedVectorTableName} WHERE rowid = ?`) .run(existingRow.rowid); this.db .prepare<[string]>(`DELETE FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`) .run(snippetId); } deleteEmbeddingsForDocumentIds(documentIds: string[]): void { if (documentIds.length === 0) { return; } const placeholders = documentIds.map(() => '?').join(', '); const rows = this.db .prepare( `SELECT DISTINCT se.profile_id, se.snippet_id FROM snippet_embeddings se INNER JOIN snippets s ON s.id = se.snippet_id WHERE s.document_id IN (${placeholders})` ) .all(...documentIds); this.deleteEmbeddingRefs(rows); } deleteEmbeddingsForRepository(repositoryId: string): void { const rows = this.db .prepare<[string], StoredEmbeddingRef>( `SELECT DISTINCT se.profile_id, se.snippet_id FROM snippet_embeddings se INNER JOIN snippets s ON s.id = se.snippet_id WHERE s.repository_id = ?` ) .all(repositoryId); this.deleteEmbeddingRefs(rows); } deleteEmbeddingsForVersion(repositoryId: string, versionId: string): void { const rows = this.db .prepare<[string, string], StoredEmbeddingRef>( `SELECT DISTINCT se.profile_id, se.snippet_id FROM snippet_embeddings se INNER JOIN snippets s ON s.id = se.snippet_id WHERE s.repository_id = ? AND s.version_id = ?` ) .all(repositoryId, versionId); this.deleteEmbeddingRefs(rows); } queryNearestNeighbors( queryEmbedding: Float32Array, options: SqliteVecQueryOptions ): SqliteVecQueryResult[] { const { repositoryId, versionId, profileId = 'local-default', limit = 50 } = options; if (limit <= 0) { return []; } const tables = this.getProfileStoreTables(profileId, queryEmbedding.length); this.ensureProfileStore(profileId, tables.dimensions); const totalRows = this.synchronizeProfileStore(profileId, tables); if (totalRows === 0) { return []; } let sql = ` SELECT rowids.snippet_id, vec.distance FROM ${tables.quotedVectorTableName} vec JOIN ${tables.quotedRowidTableName} rowids ON rowids.rowid = vec.rowid JOIN snippets s ON s.id = rowids.snippet_id WHERE vec.embedding MATCH ? AND vec.k = ? AND s.repository_id = ? `; const params: unknown[] = [toEmbeddingBuffer(queryEmbedding), totalRows, repositoryId]; if (versionId !== undefined) { sql += ' AND s.version_id = ?'; params.push(versionId); } sql += ' ORDER BY vec.distance ASC LIMIT ?'; params.push(limit); const rows = this.db.prepare(sql).all(...params); return rows.map((row) => ({ snippetId: row.snippet_id, score: distanceToScore(row.distance), distance: row.distance })); } private synchronizeProfileStore(profileId: string, tables: ProfileStoreTables): number { this.db .prepare<[string, number]>( `DELETE FROM ${tables.quotedRowidTableName} WHERE rowid IN ( SELECT rowids.rowid FROM ${tables.quotedRowidTableName} rowids LEFT JOIN snippet_embeddings se ON se.snippet_id = rowids.snippet_id AND se.profile_id = ? AND se.dimensions = ? LEFT JOIN ${tables.quotedVectorTableName} vec ON vec.rowid = rowids.rowid WHERE se.snippet_id IS NULL OR vec.rowid IS NULL )` ) .run(profileId, tables.dimensions); this.db .prepare( `DELETE FROM ${tables.quotedVectorTableName} WHERE rowid NOT IN (SELECT rowid FROM ${tables.quotedRowidTableName})` ) .run(); const missingRows = this.db .prepare<[string, number], CanonicalEmbeddingRow>( `SELECT se.snippet_id, se.embedding FROM snippet_embeddings se LEFT JOIN ${tables.quotedRowidTableName} rowids ON rowids.snippet_id = se.snippet_id WHERE se.profile_id = ? AND se.dimensions = ? AND rowids.snippet_id IS NULL` ) .all(profileId, tables.dimensions); if (missingRows.length > 0) { const backfill = this.db.transaction((rows: CanonicalEmbeddingRow[]) => { for (const row of rows) { this.upsertEmbedding( profileId, row.snippet_id, new Float32Array( row.embedding.buffer, row.embedding.byteOffset, tables.dimensions ) ); } }); backfill(missingRows); } return ( this.db .prepare<[], { count: number }>( `SELECT COUNT(*) AS count FROM ${tables.quotedVectorTableName} vec JOIN ${tables.quotedRowidTableName} rowids ON rowids.rowid = vec.rowid` ) .get()?.count ?? 0 ); } private deleteEmbeddingRefs(rows: StoredEmbeddingRef[]): void { if (rows.length === 0) { return; } const removeRows = this.db.transaction((refs: StoredEmbeddingRef[]) => { for (const ref of refs) { this.deleteEmbedding(ref.profile_id, ref.snippet_id); } }); removeRows(rows); } private getProfileStoreTables( profileId: string, preferredDimensions?: number ): ProfileStoreTables { loadSqliteVec(this.db); const dimensionsRow = this.db .prepare<[string], ProfileDimensionsRow>( 'SELECT dimensions FROM embedding_profiles WHERE id = ?' ) .get(profileId); if (!dimensionsRow) { throw new Error(`Embedding profile not found: ${profileId}`); } const storedDimensions = this.db .prepare<[string], StoredDimensionsRow>( `SELECT COUNT(*) AS count, MIN(dimensions) AS min_dimensions, MAX(dimensions) AS max_dimensions FROM snippet_embeddings WHERE profile_id = ?` ) .get(profileId); const effectiveDimensions = this.resolveDimensions( profileId, dimensionsRow.dimensions, storedDimensions, preferredDimensions ); const vectorTableName = sqliteVecTableName(profileId); const rowidTableName = sqliteVecRowidTableName(profileId); return { vectorTableName, rowidTableName, quotedVectorTableName: quoteSqliteIdentifier(vectorTableName), quotedRowidTableName: quoteSqliteIdentifier(rowidTableName), dimensions: effectiveDimensions }; } private resolveDimensions( profileId: string, profileDimensions: number, storedDimensions: StoredDimensionsRow | undefined, preferredDimensions?: number ): number { if (storedDimensions && storedDimensions.count > 0) { if (storedDimensions.min_dimensions !== storedDimensions.max_dimensions) { throw new Error(`Stored embedding dimensions are inconsistent for profile ${profileId}`); } const canonicalDimensions = storedDimensions.min_dimensions; if (canonicalDimensions === null) { throw new Error(`Stored embedding dimensions are missing for profile ${profileId}`); } if ( preferredDimensions !== undefined && preferredDimensions !== canonicalDimensions ) { throw new Error( `Embedding dimension mismatch for profile ${profileId}: expected ${canonicalDimensions}, received ${preferredDimensions}` ); } return canonicalDimensions; } return preferredDimensions ?? profileDimensions; } }