feat(TRUEREF-0020): add embedding profiles, default local embeddings, and version-scoped semantic retrieval
- Add embedding_profiles table with provider registry pattern - Install @xenova/transformers as runtime dependency - Update snippet_embeddings with composite PK (snippet_id, profile_id) - Seed default local profile using Xenova/all-MiniLM-L6-v2 - Add provider registry (local-transformers, openai-compatible) - Update EmbeddingService to persist and retrieve by profileId - Add version-scoped VectorSearch with optional versionId filtering - Add searchMode (auto|keyword|semantic|hybrid) to HybridSearchService - Update API /context route to load active profile, support searchMode/alpha params - Extend MCP query-docs tool with searchMode and alpha parameters - Update settings API to work with embedding_profiles table - Add comprehensive test coverage for profiles, registry, version scoping Status: 445/451 tests passing, core feature complete
This commit is contained in:
@@ -248,6 +248,99 @@ describe('OpenAIEmbeddingProvider', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Migration Tests — embedding_profiles table
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Migration — embedding_profiles', () => {
|
||||
it('creates the embedding_profiles table', () => {
|
||||
const { client } = createTestDb();
|
||||
const tables = client
|
||||
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='embedding_profiles'")
|
||||
.all();
|
||||
expect(tables).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('seeds the default local profile', () => {
|
||||
const { client } = createTestDb();
|
||||
const row = client
|
||||
.prepare("SELECT * FROM embedding_profiles WHERE id = 'local-default'")
|
||||
.get() as any;
|
||||
expect(row).toBeDefined();
|
||||
expect(row.is_default).toBe(1);
|
||||
expect(row.provider_kind).toBe('local-transformers');
|
||||
expect(row.model).toBe('Xenova/all-MiniLM-L6-v2');
|
||||
expect(row.dimensions).toBe(384);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider Registry Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Provider Registry', () => {
|
||||
it('creates LocalEmbeddingProvider for local-transformers', () => {
|
||||
const { createProviderFromProfile } = require('./registry.js');
|
||||
const profile: schema.EmbeddingProfile = {
|
||||
id: 'test-local',
|
||||
providerKind: 'local-transformers',
|
||||
title: 'Test Local',
|
||||
enabled: true,
|
||||
isDefault: false,
|
||||
model: 'Xenova/all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
config: {},
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now()
|
||||
};
|
||||
const provider = createProviderFromProfile(profile);
|
||||
expect(provider.name).toBe('local');
|
||||
expect(provider.model).toBe('Xenova/all-MiniLM-L6-v2');
|
||||
expect(provider.dimensions).toBe(384);
|
||||
});
|
||||
|
||||
it('creates OpenAIEmbeddingProvider for openai-compatible', () => {
|
||||
const { createProviderFromProfile } = require('./registry.js');
|
||||
const profile: schema.EmbeddingProfile = {
|
||||
id: 'test-openai',
|
||||
providerKind: 'openai-compatible',
|
||||
title: 'Test OpenAI',
|
||||
enabled: true,
|
||||
isDefault: false,
|
||||
model: 'text-embedding-3-small',
|
||||
dimensions: 1536,
|
||||
config: {
|
||||
baseUrl: 'https://api.openai.com/v1',
|
||||
apiKey: 'test-key',
|
||||
model: 'text-embedding-3-small'
|
||||
},
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now()
|
||||
};
|
||||
const provider = createProviderFromProfile(profile);
|
||||
expect(provider.name).toBe('openai');
|
||||
expect(provider.model).toBe('text-embedding-3-small');
|
||||
});
|
||||
|
||||
it('returns NoopEmbeddingProvider for unknown providerKind', () => {
|
||||
const { createProviderFromProfile } = require('./registry.js');
|
||||
const profile: schema.EmbeddingProfile = {
|
||||
id: 'test-unknown',
|
||||
providerKind: 'unknown-provider',
|
||||
title: 'Unknown',
|
||||
enabled: true,
|
||||
isDefault: false,
|
||||
model: 'unknown',
|
||||
dimensions: 0,
|
||||
config: {},
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now()
|
||||
};
|
||||
const provider = createProviderFromProfile(profile);
|
||||
expect(provider.name).toBe('noop');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// EmbeddingService — storage logic
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -281,23 +374,36 @@ describe('EmbeddingService', () => {
|
||||
it('stores embeddings in snippet_embeddings table', async () => {
|
||||
const snippetId = seedSnippet(db, client);
|
||||
const provider = makeProvider(4);
|
||||
const service = new EmbeddingService(client, provider);
|
||||
const service = new EmbeddingService(client, provider, 'test-profile');
|
||||
|
||||
await service.embedSnippets([snippetId]);
|
||||
|
||||
const rows = client.prepare('SELECT * FROM snippet_embeddings WHERE snippet_id = ?').all(snippetId);
|
||||
const rows = client
|
||||
.prepare('SELECT * FROM snippet_embeddings WHERE snippet_id = ? AND profile_id = ?')
|
||||
.all(snippetId, 'test-profile');
|
||||
expect(rows).toHaveLength(1);
|
||||
|
||||
const row = rows[0] as { model: string; dimensions: number; embedding: Buffer };
|
||||
const row = rows[0] as { model: string; dimensions: number; embedding: Buffer; profile_id: string };
|
||||
expect(row.model).toBe('test-model');
|
||||
expect(row.dimensions).toBe(4);
|
||||
expect(row.profile_id).toBe('test-profile');
|
||||
expect(row.embedding).toBeInstanceOf(Buffer);
|
||||
});
|
||||
|
||||
it('stores embeddings as retrievable Float32Array blobs', async () => {
|
||||
const snippetId = seedSnippet(db, client);
|
||||
const provider = makeProvider(3);
|
||||
const service = new EmbeddingService(client, provider);
|
||||
const service = new EmbeddingService(client, provider, 'test-profile');
|
||||
|
||||
await service.embedSnippets([snippetId]);
|
||||
|
||||
const embedding = service.getEmbedding(snippetId, 'test-profile');
|
||||
expect(embedding).toBeInstanceOf(Float32Array);
|
||||
expect(embedding).toHaveLength(3);
|
||||
expect(embedding![0]).toBeCloseTo(0.0, 5);
|
||||
expect(embedding![1]).toBeCloseTo(0.1, 5);
|
||||
expect(embedding![2]).toBeCloseTo(0.2, 5);
|
||||
});
|
||||
|
||||
await service.embedSnippets([snippetId]);
|
||||
|
||||
|
||||
@@ -19,7 +19,8 @@ const TEXT_MAX_CHARS = 2048;
|
||||
export class EmbeddingService {
|
||||
constructor(
|
||||
private readonly db: Database.Database,
|
||||
private readonly provider: EmbeddingProvider
|
||||
private readonly provider: EmbeddingProvider,
|
||||
private readonly profileId: string = 'local-default'
|
||||
) {}
|
||||
|
||||
/**
|
||||
@@ -54,9 +55,9 @@ export class EmbeddingService {
|
||||
.slice(0, TEXT_MAX_CHARS)
|
||||
);
|
||||
|
||||
const insert = this.db.prepare<[string, string, number, Buffer]>(`
|
||||
INSERT OR REPLACE INTO snippet_embeddings (snippet_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, unixepoch())
|
||||
const insert = this.db.prepare<[string, string, string, number, Buffer]>(`
|
||||
INSERT OR REPLACE INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, unixepoch())
|
||||
`);
|
||||
|
||||
for (let i = 0; i < snippets.length; i += BATCH_SIZE) {
|
||||
@@ -71,6 +72,7 @@ export class EmbeddingService {
|
||||
const embedding = embeddings[j];
|
||||
insert.run(
|
||||
snippet.id,
|
||||
this.profileId,
|
||||
embedding.model,
|
||||
embedding.dimensions,
|
||||
Buffer.from(embedding.values.buffer)
|
||||
@@ -85,14 +87,17 @@ export class EmbeddingService {
|
||||
|
||||
/**
|
||||
* Retrieve a stored embedding for a snippet as a Float32Array.
|
||||
* Returns null when no embedding has been stored for the given snippet.
|
||||
* Returns null when no embedding has been stored for the given snippet and profile.
|
||||
*
|
||||
* @param snippetId - Snippet UUID
|
||||
* @param profileId - Embedding profile ID (default: 'local-default')
|
||||
*/
|
||||
getEmbedding(snippetId: string): Float32Array | null {
|
||||
getEmbedding(snippetId: string, profileId: string = 'local-default'): Float32Array | null {
|
||||
const row = this.db
|
||||
.prepare<[string], { embedding: Buffer; dimensions: number }>(
|
||||
`SELECT embedding, dimensions FROM snippet_embeddings WHERE snippet_id = ?`
|
||||
.prepare<[string, string], { embedding: Buffer; dimensions: number }>(
|
||||
`SELECT embedding, dimensions FROM snippet_embeddings WHERE snippet_id = ? AND profile_id = ?`
|
||||
)
|
||||
.get(snippetId);
|
||||
.get(snippetId, profileId);
|
||||
|
||||
if (!row) return null;
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
/**
|
||||
* Factory — create an EmbeddingProvider from a persisted EmbeddingConfig.
|
||||
*
|
||||
* This module maintains backward compatibility with the old enum-style config
|
||||
* while the registry pattern is adopted. Settings endpoints transition to
|
||||
* using embedding_profiles table + registry.ts directly.
|
||||
*/
|
||||
|
||||
import type { EmbeddingProvider } from './provider.js';
|
||||
@@ -7,6 +11,9 @@ import { NoopEmbeddingProvider } from './provider.js';
|
||||
import { OpenAIEmbeddingProvider } from './openai.provider.js';
|
||||
import { LocalEmbeddingProvider } from './local.provider.js';
|
||||
|
||||
// Re-export registry functions for new callers
|
||||
export { createProviderFromProfile, getDefaultLocalProfile, getRegisteredProviderKinds } from './registry.js';
|
||||
|
||||
export interface EmbeddingConfig {
|
||||
provider: 'openai' | 'local' | 'none';
|
||||
openai?: {
|
||||
|
||||
64
src/lib/server/embeddings/registry.ts
Normal file
64
src/lib/server/embeddings/registry.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
/**
|
||||
* Provider Registry — map providerKind to EmbeddingProvider instances.
|
||||
*
|
||||
* Replaces the enum-style factory with a registry pattern that supports
|
||||
* arbitrary custom provider adapters without changing core types.
|
||||
*/
|
||||
|
||||
import type { EmbeddingProvider } from './provider.js';
|
||||
import { NoopEmbeddingProvider } from './provider.js';
|
||||
import { OpenAIEmbeddingProvider } from './openai.provider.js';
|
||||
import { LocalEmbeddingProvider } from './local.provider.js';
|
||||
import type { EmbeddingProfile } from '../db/schema.js';
|
||||
|
||||
export type ProviderFactory = (config: Record<string, unknown>) => EmbeddingProvider;
|
||||
|
||||
const PROVIDER_REGISTRY: Record<string, ProviderFactory> = {
|
||||
'local-transformers': (_config) => new LocalEmbeddingProvider(),
|
||||
'openai-compatible': (config) =>
|
||||
new OpenAIEmbeddingProvider({
|
||||
baseUrl: config.baseUrl as string,
|
||||
apiKey: config.apiKey as string,
|
||||
model: config.model as string,
|
||||
dimensions: config.dimensions as number | undefined,
|
||||
maxBatchSize: config.maxBatchSize as number | undefined
|
||||
})
|
||||
};
|
||||
|
||||
/**
|
||||
* Create an EmbeddingProvider from a persisted EmbeddingProfile.
|
||||
*
|
||||
* Falls back to NoopEmbeddingProvider when the providerKind is not recognized.
|
||||
*/
|
||||
export function createProviderFromProfile(profile: EmbeddingProfile): EmbeddingProvider {
|
||||
const factory = PROVIDER_REGISTRY[profile.providerKind];
|
||||
if (!factory) return new NoopEmbeddingProvider();
|
||||
const config = (profile.config as Record<string, unknown>) ?? {};
|
||||
return factory(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return metadata for the default local profile.
|
||||
*
|
||||
* Used by migration seeds and runtime defaults.
|
||||
*/
|
||||
export function getDefaultLocalProfile(): Pick<
|
||||
EmbeddingProfile,
|
||||
'id' | 'providerKind' | 'model' | 'dimensions'
|
||||
> {
|
||||
return {
|
||||
id: 'local-default',
|
||||
providerKind: 'local-transformers',
|
||||
model: 'Xenova/all-MiniLM-L6-v2',
|
||||
dimensions: 384
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all registered providerKind values.
|
||||
*
|
||||
* Useful for settings UI validation and provider discovery.
|
||||
*/
|
||||
export function getRegisteredProviderKinds(): string[] {
|
||||
return Object.keys(PROVIDER_REGISTRY);
|
||||
}
|
||||
Reference in New Issue
Block a user