import { randomUUID } from 'node:crypto'; import type Database from 'better-sqlite3'; import type { NewDocument, NewSnippet } from '$lib/types'; import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js'; import type { SerializedDocument, SerializedEmbedding, SerializedFields, SerializedSnippet } from './worker-types.js'; type DocumentLike = Pick< NewDocument, | 'id' | 'repositoryId' | 'versionId' | 'filePath' | 'title' | 'language' | 'tokenCount' | 'checksum' > & { indexedAt: Date | number; }; type SnippetLike = Pick< NewSnippet, | 'id' | 'documentId' | 'repositoryId' | 'versionId' | 'type' | 'title' | 'content' | 'language' | 'breadcrumb' | 'tokenCount' > & { createdAt: Date | number; }; export interface CloneFromAncestorRequest { ancestorVersionId: string; targetVersionId: string; repositoryId: string; unchangedPaths: string[]; } export interface PersistedEmbedding { snippetId: string; profileId: string; model: string; dimensions: number; embedding: Buffer | Uint8Array; } function toEpochSeconds(value: Date | number): number { return value instanceof Date ? Math.floor(value.getTime() / 1000) : value; } function toSnake(key: string): string { return key.replace(/[A-Z]/g, (char) => `_${char.toLowerCase()}`); } function replaceSnippetsInternal( db: Database.Database, changedDocIds: string[], newDocuments: DocumentLike[], newSnippets: SnippetLike[] ): void { const sqliteVecStore = new SqliteVecStore(db); const insertDoc = db.prepare( `INSERT INTO documents (id, repository_id, version_id, file_path, title, language, token_count, checksum, indexed_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)` ); const insertSnippet = db.prepare( `INSERT INTO snippets (id, document_id, repository_id, version_id, type, title, content, language, breadcrumb, token_count, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` ); db.transaction(() => { sqliteVecStore.deleteEmbeddingsForDocumentIds(changedDocIds); if (changedDocIds.length > 0) { const placeholders = changedDocIds.map(() => '?').join(','); db.prepare(`DELETE FROM documents WHERE id IN (${placeholders})`).run(...changedDocIds); } for (const doc of newDocuments) { insertDoc.run( doc.id, doc.repositoryId, doc.versionId ?? null, doc.filePath, doc.title ?? null, doc.language ?? null, doc.tokenCount ?? 0, doc.checksum, toEpochSeconds(doc.indexedAt) ); } for (const snippet of newSnippets) { insertSnippet.run( snippet.id, snippet.documentId, snippet.repositoryId, snippet.versionId ?? null, snippet.type, snippet.title ?? null, snippet.content, snippet.language ?? null, snippet.breadcrumb ?? null, snippet.tokenCount ?? 0, toEpochSeconds(snippet.createdAt) ); } })(); } export function replaceSnippets( db: Database.Database, changedDocIds: string[], newDocuments: NewDocument[], newSnippets: NewSnippet[] ): void { replaceSnippetsInternal(db, changedDocIds, newDocuments, newSnippets); } export function replaceSerializedSnippets( db: Database.Database, changedDocIds: string[], documents: SerializedDocument[], snippets: SerializedSnippet[] ): void { replaceSnippetsInternal(db, changedDocIds, documents, snippets); } export function cloneFromAncestor(db: Database.Database, request: CloneFromAncestorRequest): void { const sqliteVecStore = new SqliteVecStore(db); const { ancestorVersionId, targetVersionId, repositoryId, unchangedPaths } = request; db.transaction(() => { const pathList = [...unchangedPaths]; if (pathList.length === 0) { return; } const placeholders = pathList.map(() => '?').join(','); const ancestorDocs = db .prepare(`SELECT * FROM documents WHERE version_id = ? AND file_path IN (${placeholders})`) .all(ancestorVersionId, ...pathList) as Array<{ id: string; repository_id: string; file_path: string; title: string | null; language: string | null; token_count: number; checksum: string; indexed_at: number; }>; const docIdMap = new Map(); const nowEpoch = Math.floor(Date.now() / 1000); for (const doc of ancestorDocs) { const newDocId = randomUUID(); docIdMap.set(doc.id, newDocId); db.prepare( `INSERT INTO documents (id, repository_id, version_id, file_path, title, language, token_count, checksum, indexed_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)` ).run( newDocId, repositoryId, targetVersionId, doc.file_path, doc.title, doc.language, doc.token_count, doc.checksum, nowEpoch ); } if (docIdMap.size === 0) return; const oldDocIds = [...docIdMap.keys()]; const snippetPlaceholders = oldDocIds.map(() => '?').join(','); const ancestorSnippets = db .prepare(`SELECT * FROM snippets WHERE document_id IN (${snippetPlaceholders})`) .all(...oldDocIds) as Array<{ id: string; document_id: string; repository_id: string; version_id: string | null; type: string; title: string | null; content: string; language: string | null; breadcrumb: string | null; token_count: number; created_at: number; }>; const snippetIdMap = new Map(); for (const snippet of ancestorSnippets) { const newSnippetId = randomUUID(); snippetIdMap.set(snippet.id, newSnippetId); const newDocId = docIdMap.get(snippet.document_id)!; db.prepare( `INSERT INTO snippets (id, document_id, repository_id, version_id, type, title, content, language, breadcrumb, token_count, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` ).run( newSnippetId, newDocId, repositoryId, targetVersionId, snippet.type, snippet.title, snippet.content, snippet.language, snippet.breadcrumb, snippet.token_count, snippet.created_at ); } if (snippetIdMap.size === 0) { return; } const oldSnippetIds = [...snippetIdMap.keys()]; const embPlaceholders = oldSnippetIds.map(() => '?').join(','); const ancestorEmbeddings = db .prepare(`SELECT * FROM snippet_embeddings WHERE snippet_id IN (${embPlaceholders})`) .all(...oldSnippetIds) as Array<{ snippet_id: string; profile_id: string; model: string; dimensions: number; embedding: Buffer; created_at: number; }>; for (const emb of ancestorEmbeddings) { const newSnippetId = snippetIdMap.get(emb.snippet_id)!; db.prepare( `INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at) VALUES (?, ?, ?, ?, ?, ?)` ).run(newSnippetId, emb.profile_id, emb.model, emb.dimensions, emb.embedding, emb.created_at); sqliteVecStore.upsertEmbeddingBuffer( emb.profile_id, newSnippetId, emb.embedding, emb.dimensions ); } })(); } export function upsertEmbeddings(db: Database.Database, embeddings: PersistedEmbedding[]): void { if (embeddings.length === 0) { return; } const sqliteVecStore = new SqliteVecStore(db); const insert = db.prepare<[string, string, string, number, Buffer]>(` INSERT OR REPLACE INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at) VALUES (?, ?, ?, ?, ?, unixepoch()) `); db.transaction(() => { for (const item of embeddings) { const embeddingBuffer = Buffer.isBuffer(item.embedding) ? item.embedding : Buffer.from(item.embedding); insert.run(item.snippetId, item.profileId, item.model, item.dimensions, embeddingBuffer); sqliteVecStore.upsertEmbeddingBuffer( item.profileId, item.snippetId, embeddingBuffer, item.dimensions ); } })(); } export function upsertSerializedEmbeddings( db: Database.Database, embeddings: SerializedEmbedding[] ): void { upsertEmbeddings( db, embeddings.map((item) => ({ snippetId: item.snippetId, profileId: item.profileId, model: item.model, dimensions: item.dimensions, embedding: item.embedding })) ); } export function updateRepo( db: Database.Database, repositoryId: string, fields: SerializedFields ): void { const now = Math.floor(Date.now() / 1000); const allFields = { ...fields, updatedAt: now }; const sets = Object.keys(allFields) .map((key) => `${toSnake(key)} = ?`) .join(', '); const values = [...Object.values(allFields), repositoryId]; db.prepare(`UPDATE repositories SET ${sets} WHERE id = ?`).run(...values); } export function updateJob(db: Database.Database, jobId: string, fields: SerializedFields): void { const sets = Object.keys(fields) .map((key) => `${toSnake(key)} = ?`) .join(', '); const values = [...Object.values(fields), jobId]; db.prepare(`UPDATE indexing_jobs SET ${sets} WHERE id = ?`).run(...values); } export function updateVersion( db: Database.Database, versionId: string, fields: SerializedFields ): void { const sets = Object.keys(fields) .map((key) => `${toSnake(key)} = ?`) .join(', '); const values = [...Object.values(fields), versionId]; db.prepare(`UPDATE repository_versions SET ${sets} WHERE id = ?`).run(...values); }