feat(TRUEREF-0023): add sqlite-vec search pipeline

This commit is contained in:
Giancarmine Salucci
2026-04-01 14:09:19 +02:00
parent 0752636847
commit 9525c58e9a
45 changed files with 4009 additions and 614 deletions

View File

@@ -12,6 +12,12 @@ import { migrate } from 'drizzle-orm/better-sqlite3/migrator';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import * as schema from '../db/schema.js';
import {
loadSqliteVec,
sqliteVecRowidTableName,
sqliteVecTableName
} from '../db/sqlite-vec.js';
import { SqliteVecStore } from '../search/sqlite-vec.store.js';
import { NoopEmbeddingProvider, EmbeddingError, type EmbeddingVector } from './provider.js';
import { OpenAIEmbeddingProvider } from './openai.provider.js';
@@ -31,6 +37,7 @@ import { createProviderFromProfile } from './registry.js';
function createTestDb() {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
loadSqliteVec(client);
const db = drizzle(client, { schema });
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
@@ -387,10 +394,19 @@ describe('EmbeddingService', () => {
embedding: Buffer;
profile_id: string;
};
expect((row as Record<string, unknown>).vec_embedding).toBeUndefined();
expect(row.model).toBe('test-model');
expect(row.dimensions).toBe(4);
expect(row.profile_id).toBe('local-default');
expect(row.embedding).toBeInstanceOf(Buffer);
const queryEmbedding = service.getEmbedding(snippetId, 'local-default');
const matches = new SqliteVecStore(client).queryNearestNeighbors(queryEmbedding!, {
repositoryId: '/test/embed-repo',
profileId: 'local-default',
limit: 5
});
expect(matches[0]?.snippetId).toBe(snippetId);
});
it('stores embeddings as retrievable Float32Array blobs', async () => {
@@ -436,6 +452,22 @@ describe('EmbeddingService', () => {
.prepare('SELECT profile_id FROM snippet_embeddings WHERE snippet_id = ?')
.get(snippetId) as { profile_id: string };
expect(row.profile_id).toBe('openai-custom');
const queryEmbedding = service.getEmbedding(snippetId, 'openai-custom');
const store = new SqliteVecStore(client);
const customMatches = store.queryNearestNeighbors(queryEmbedding!, {
repositoryId: '/test/embed-repo',
profileId: 'openai-custom',
limit: 5
});
const defaultMatches = store.queryNearestNeighbors(new Float32Array([1, 0, 0, 0]), {
repositoryId: '/test/embed-repo',
profileId: 'local-default',
limit: 5
});
expect(customMatches[0]?.snippetId).toBe(snippetId);
expect(defaultMatches).toHaveLength(0);
});
it('is idempotent — re-embedding replaces the existing row', async () => {
@@ -450,6 +482,17 @@ describe('EmbeddingService', () => {
.prepare('SELECT COUNT(*) as cnt FROM snippet_embeddings WHERE snippet_id = ?')
.get(snippetId) as { cnt: number };
expect(rows.cnt).toBe(1);
const vecTable = sqliteVecTableName('local-default');
const rowidTable = sqliteVecRowidTableName('local-default');
const vecRows = client.prepare(`SELECT COUNT(*) as cnt FROM "${vecTable}"`).get() as {
cnt: number;
};
const rowidRows = client.prepare(`SELECT COUNT(*) as cnt FROM "${rowidTable}"`).get() as {
cnt: number;
};
expect(vecRows.cnt).toBe(1);
expect(rowidRows.cnt).toBe(1);
});
it('calls onProgress after each batch', async () => {