85 lines
2.6 KiB
TypeScript
85 lines
2.6 KiB
TypeScript
/**
|
|
* Vector similarity search over stored snippet embeddings.
|
|
*
|
|
* Uses sqlite-vec vector_top_k() for ANN search instead of in-memory cosine
|
|
* similarity computation over all embeddings.
|
|
*/
|
|
|
|
import type Database from 'better-sqlite3';
|
|
import { SqliteVecStore } from './sqlite-vec.store.js';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Types
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export interface VectorSearchResult {
|
|
snippetId: string;
|
|
score: number;
|
|
}
|
|
|
|
export interface VectorSearchOptions {
|
|
repositoryId: string;
|
|
versionId?: string;
|
|
profileId?: string;
|
|
limit?: number;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Math helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Compute cosine similarity between two Float32Array vectors.
|
|
*
|
|
* Returns a value in [-1, 1] where 1 is identical direction. Returns 0 when
|
|
* either vector has zero magnitude to avoid division by zero.
|
|
*/
|
|
export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
|
if (a.length !== b.length) {
|
|
throw new Error(`Embedding dimension mismatch: ${a.length} vs ${b.length}`);
|
|
}
|
|
|
|
let dot = 0;
|
|
let normA = 0;
|
|
let normB = 0;
|
|
|
|
for (let i = 0; i < a.length; i++) {
|
|
dot += a[i] * b[i];
|
|
normA += a[i] * a[i];
|
|
normB += b[i] * b[i];
|
|
}
|
|
|
|
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
if (denom === 0) return 0;
|
|
return dot / denom;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// VectorSearch class
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export class VectorSearch {
|
|
private readonly sqliteVecStore: SqliteVecStore;
|
|
|
|
constructor(private readonly db: Database.Database) {
|
|
this.sqliteVecStore = new SqliteVecStore(db);
|
|
}
|
|
|
|
/**
|
|
* Search stored embeddings by cosine similarity to the query embedding.
|
|
*
|
|
* Uses in-memory cosine similarity computation. The vec_embedding column
|
|
* stores raw Float32 bytes for forward compatibility with vector-capable
|
|
* libSQL builds; scoring is performed in JS using the same bytes.
|
|
*
|
|
* @param queryEmbedding - The embedded representation of the search query.
|
|
* @param options - Search options including repositoryId, optional versionId, profileId, and limit.
|
|
* @returns Results sorted by descending cosine similarity score.
|
|
*/
|
|
vectorSearch(queryEmbedding: Float32Array, options: VectorSearchOptions): VectorSearchResult[] {
|
|
return this.sqliteVecStore
|
|
.queryNearestNeighbors(queryEmbedding, options)
|
|
.map((result) => ({ snippetId: result.snippetId, score: result.score }));
|
|
}
|
|
}
|