feat(TRUEREF-0023): add sqlite-vec search pipeline
This commit is contained in:
@@ -12,6 +12,12 @@ import { migrate } from 'drizzle-orm/better-sqlite3/migrator';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import * as schema from '../db/schema.js';
|
||||
import {
|
||||
loadSqliteVec,
|
||||
sqliteVecRowidTableName,
|
||||
sqliteVecTableName
|
||||
} from '../db/sqlite-vec.js';
|
||||
import { SqliteVecStore } from '../search/sqlite-vec.store.js';
|
||||
|
||||
import { NoopEmbeddingProvider, EmbeddingError, type EmbeddingVector } from './provider.js';
|
||||
import { OpenAIEmbeddingProvider } from './openai.provider.js';
|
||||
@@ -31,6 +37,7 @@ import { createProviderFromProfile } from './registry.js';
|
||||
function createTestDb() {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const db = drizzle(client, { schema });
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
@@ -387,10 +394,19 @@ describe('EmbeddingService', () => {
|
||||
embedding: Buffer;
|
||||
profile_id: string;
|
||||
};
|
||||
expect((row as Record<string, unknown>).vec_embedding).toBeUndefined();
|
||||
expect(row.model).toBe('test-model');
|
||||
expect(row.dimensions).toBe(4);
|
||||
expect(row.profile_id).toBe('local-default');
|
||||
expect(row.embedding).toBeInstanceOf(Buffer);
|
||||
|
||||
const queryEmbedding = service.getEmbedding(snippetId, 'local-default');
|
||||
const matches = new SqliteVecStore(client).queryNearestNeighbors(queryEmbedding!, {
|
||||
repositoryId: '/test/embed-repo',
|
||||
profileId: 'local-default',
|
||||
limit: 5
|
||||
});
|
||||
expect(matches[0]?.snippetId).toBe(snippetId);
|
||||
});
|
||||
|
||||
it('stores embeddings as retrievable Float32Array blobs', async () => {
|
||||
@@ -436,6 +452,22 @@ describe('EmbeddingService', () => {
|
||||
.prepare('SELECT profile_id FROM snippet_embeddings WHERE snippet_id = ?')
|
||||
.get(snippetId) as { profile_id: string };
|
||||
expect(row.profile_id).toBe('openai-custom');
|
||||
|
||||
const queryEmbedding = service.getEmbedding(snippetId, 'openai-custom');
|
||||
const store = new SqliteVecStore(client);
|
||||
const customMatches = store.queryNearestNeighbors(queryEmbedding!, {
|
||||
repositoryId: '/test/embed-repo',
|
||||
profileId: 'openai-custom',
|
||||
limit: 5
|
||||
});
|
||||
const defaultMatches = store.queryNearestNeighbors(new Float32Array([1, 0, 0, 0]), {
|
||||
repositoryId: '/test/embed-repo',
|
||||
profileId: 'local-default',
|
||||
limit: 5
|
||||
});
|
||||
|
||||
expect(customMatches[0]?.snippetId).toBe(snippetId);
|
||||
expect(defaultMatches).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('is idempotent — re-embedding replaces the existing row', async () => {
|
||||
@@ -450,6 +482,17 @@ describe('EmbeddingService', () => {
|
||||
.prepare('SELECT COUNT(*) as cnt FROM snippet_embeddings WHERE snippet_id = ?')
|
||||
.get(snippetId) as { cnt: number };
|
||||
expect(rows.cnt).toBe(1);
|
||||
|
||||
const vecTable = sqliteVecTableName('local-default');
|
||||
const rowidTable = sqliteVecRowidTableName('local-default');
|
||||
const vecRows = client.prepare(`SELECT COUNT(*) as cnt FROM "${vecTable}"`).get() as {
|
||||
cnt: number;
|
||||
};
|
||||
const rowidRows = client.prepare(`SELECT COUNT(*) as cnt FROM "${rowidTable}"`).get() as {
|
||||
cnt: number;
|
||||
};
|
||||
expect(vecRows.cnt).toBe(1);
|
||||
expect(rowidRows.cnt).toBe(1);
|
||||
});
|
||||
|
||||
it('calls onProgress after each batch', async () => {
|
||||
|
||||
Reference in New Issue
Block a user