feat(TRUEREF-0020): add embedding profiles, default local embeddings, and version-scoped semantic retrieval

- Add embedding_profiles table with provider registry pattern
- Install @xenova/transformers as runtime dependency
- Update snippet_embeddings with composite PK (snippet_id, profile_id)
- Seed default local profile using Xenova/all-MiniLM-L6-v2
- Add provider registry (local-transformers, openai-compatible)
- Update EmbeddingService to persist and retrieve by profileId
- Add version-scoped VectorSearch with optional versionId filtering
- Add searchMode (auto|keyword|semantic|hybrid) to HybridSearchService
- Update API /context route to load active profile, support searchMode/alpha params
- Extend MCP query-docs tool with searchMode and alpha parameters
- Update settings API to work with embedding_profiles table
- Add comprehensive test coverage for profiles, registry, version scoping

Status: 445/451 tests passing, core feature complete
This commit is contained in:
Giancarmine Salucci
2026-03-25 19:16:37 +01:00
parent fef6f66930
commit 169df4d984
19 changed files with 2668 additions and 246 deletions

View File

@@ -248,6 +248,99 @@ describe('OpenAIEmbeddingProvider', () => {
});
});
// ---------------------------------------------------------------------------
// Migration Tests — embedding_profiles table
// ---------------------------------------------------------------------------
describe('Migration — embedding_profiles', () => {
it('creates the embedding_profiles table', () => {
const { client } = createTestDb();
const tables = client
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='embedding_profiles'")
.all();
expect(tables).toHaveLength(1);
});
it('seeds the default local profile', () => {
const { client } = createTestDb();
const row = client
.prepare("SELECT * FROM embedding_profiles WHERE id = 'local-default'")
.get() as any;
expect(row).toBeDefined();
expect(row.is_default).toBe(1);
expect(row.provider_kind).toBe('local-transformers');
expect(row.model).toBe('Xenova/all-MiniLM-L6-v2');
expect(row.dimensions).toBe(384);
});
});
// ---------------------------------------------------------------------------
// Provider Registry Tests
// ---------------------------------------------------------------------------
describe('Provider Registry', () => {
it('creates LocalEmbeddingProvider for local-transformers', () => {
const { createProviderFromProfile } = require('./registry.js');
const profile: schema.EmbeddingProfile = {
id: 'test-local',
providerKind: 'local-transformers',
title: 'Test Local',
enabled: true,
isDefault: false,
model: 'Xenova/all-MiniLM-L6-v2',
dimensions: 384,
config: {},
createdAt: Date.now(),
updatedAt: Date.now()
};
const provider = createProviderFromProfile(profile);
expect(provider.name).toBe('local');
expect(provider.model).toBe('Xenova/all-MiniLM-L6-v2');
expect(provider.dimensions).toBe(384);
});
it('creates OpenAIEmbeddingProvider for openai-compatible', () => {
const { createProviderFromProfile } = require('./registry.js');
const profile: schema.EmbeddingProfile = {
id: 'test-openai',
providerKind: 'openai-compatible',
title: 'Test OpenAI',
enabled: true,
isDefault: false,
model: 'text-embedding-3-small',
dimensions: 1536,
config: {
baseUrl: 'https://api.openai.com/v1',
apiKey: 'test-key',
model: 'text-embedding-3-small'
},
createdAt: Date.now(),
updatedAt: Date.now()
};
const provider = createProviderFromProfile(profile);
expect(provider.name).toBe('openai');
expect(provider.model).toBe('text-embedding-3-small');
});
it('returns NoopEmbeddingProvider for unknown providerKind', () => {
const { createProviderFromProfile } = require('./registry.js');
const profile: schema.EmbeddingProfile = {
id: 'test-unknown',
providerKind: 'unknown-provider',
title: 'Unknown',
enabled: true,
isDefault: false,
model: 'unknown',
dimensions: 0,
config: {},
createdAt: Date.now(),
updatedAt: Date.now()
};
const provider = createProviderFromProfile(profile);
expect(provider.name).toBe('noop');
});
});
// ---------------------------------------------------------------------------
// EmbeddingService — storage logic
// ---------------------------------------------------------------------------
@@ -281,23 +374,36 @@ describe('EmbeddingService', () => {
it('stores embeddings in snippet_embeddings table', async () => {
const snippetId = seedSnippet(db, client);
const provider = makeProvider(4);
const service = new EmbeddingService(client, provider);
const service = new EmbeddingService(client, provider, 'test-profile');
await service.embedSnippets([snippetId]);
const rows = client.prepare('SELECT * FROM snippet_embeddings WHERE snippet_id = ?').all(snippetId);
const rows = client
.prepare('SELECT * FROM snippet_embeddings WHERE snippet_id = ? AND profile_id = ?')
.all(snippetId, 'test-profile');
expect(rows).toHaveLength(1);
const row = rows[0] as { model: string; dimensions: number; embedding: Buffer };
const row = rows[0] as { model: string; dimensions: number; embedding: Buffer; profile_id: string };
expect(row.model).toBe('test-model');
expect(row.dimensions).toBe(4);
expect(row.profile_id).toBe('test-profile');
expect(row.embedding).toBeInstanceOf(Buffer);
});
it('stores embeddings as retrievable Float32Array blobs', async () => {
const snippetId = seedSnippet(db, client);
const provider = makeProvider(3);
const service = new EmbeddingService(client, provider);
const service = new EmbeddingService(client, provider, 'test-profile');
await service.embedSnippets([snippetId]);
const embedding = service.getEmbedding(snippetId, 'test-profile');
expect(embedding).toBeInstanceOf(Float32Array);
expect(embedding).toHaveLength(3);
expect(embedding![0]).toBeCloseTo(0.0, 5);
expect(embedding![1]).toBeCloseTo(0.1, 5);
expect(embedding![2]).toBeCloseTo(0.2, 5);
});
await service.embedSnippets([snippetId]);