feat(TRUEREF-0023): add sqlite-vec search pipeline

This commit is contained in:
Giancarmine Salucci
2026-04-01 14:09:19 +02:00
parent 0752636847
commit 9525c58e9a
45 changed files with 4009 additions and 614 deletions

View File

@@ -15,6 +15,8 @@ import { HybridSearchService } from './hybrid.search.service.js';
import { VectorSearch, cosineSimilarity } from './vector.search.js';
import { reciprocalRankFusion } from './rrf.js';
import type { EmbeddingProvider, EmbeddingVector } from '../embeddings/provider.js';
import { loadSqliteVec } from '../db/sqlite-vec.js';
import { SqliteVecStore } from './sqlite-vec.store.js';
// ---------------------------------------------------------------------------
// In-memory DB factory
@@ -23,6 +25,7 @@ import type { EmbeddingProvider, EmbeddingVector } from '../embeddings/provider.
function createTestDb(): Database.Database {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
loadSqliteVec(client);
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
@@ -30,7 +33,11 @@ function createTestDb(): Database.Database {
const migrations = [
'0000_large_master_chief.sql',
'0001_quick_nighthawk.sql',
'0002_silky_stellaris.sql'
'0002_silky_stellaris.sql',
'0003_multiversion_config.sql',
'0004_complete_sentry.sql',
'0005_fix_stage_defaults.sql',
'0006_yielding_centennial.sql'
];
for (const migrationFile of migrations) {
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
@@ -121,6 +128,7 @@ function seedEmbedding(
VALUES (?, ?, ?, ?, ?, ?)`
)
.run(snippetId, profileId, model, values.length, Buffer.from(f32.buffer), NOW_S);
new SqliteVecStore(client).upsertEmbedding(profileId, snippetId, f32);
}
// ---------------------------------------------------------------------------
@@ -368,6 +376,42 @@ describe('VectorSearch', () => {
const results = vs.vectorSearch(new Float32Array([-0.5, 0.5]), { repositoryId: repoId });
expect(results[0].score).toBeCloseTo(1.0, 4);
});
it('filters by profileId using per-profile vec tables', () => {
client
.prepare(
`INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
)
.run('secondary-profile', 'local-transformers', 'Secondary', 1, 0, 'test-model', 2, '{}', NOW_S, NOW_S);
const defaultSnippet = seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'default profile snippet'
});
const secondarySnippet = seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'secondary profile snippet'
});
seedEmbedding(client, defaultSnippet, [1, 0], 'local-default');
seedEmbedding(client, secondarySnippet, [1, 0], 'secondary-profile');
const vs = new VectorSearch(client);
const defaultResults = vs.vectorSearch(new Float32Array([1, 0]), {
repositoryId: repoId,
profileId: 'local-default'
});
const secondaryResults = vs.vectorSearch(new Float32Array([1, 0]), {
repositoryId: repoId,
profileId: 'secondary-profile'
});
expect(defaultResults.map((result) => result.snippetId)).toEqual([defaultSnippet]);
expect(secondaryResults.map((result) => result.snippetId)).toEqual([secondarySnippet]);
});
});
// ===========================================================================