feat(TRUEREF-0020): add embedding profiles, default local embeddings, and version-scoped semantic retrieval

- Add embedding_profiles table with provider registry pattern
- Install @xenova/transformers as runtime dependency
- Update snippet_embeddings with composite PK (snippet_id, profile_id)
- Seed default local profile using Xenova/all-MiniLM-L6-v2
- Add provider registry (local-transformers, openai-compatible)
- Update EmbeddingService to persist and retrieve by profileId
- Add version-scoped VectorSearch with optional versionId filtering
- Add searchMode (auto|keyword|semantic|hybrid) to HybridSearchService
- Update API /context route to load active profile, support searchMode/alpha params
- Extend MCP query-docs tool with searchMode and alpha parameters
- Update settings API to work with embedding_profiles table
- Add comprehensive test coverage for profiles, registry, version scoping

Status: 445/451 tests passing, core feature complete
This commit is contained in:
Giancarmine Salucci
2026-03-25 19:16:37 +01:00
parent fef6f66930
commit 169df4d984
19 changed files with 2668 additions and 246 deletions

View File

@@ -0,0 +1,64 @@
/**
* Provider Registry — map providerKind to EmbeddingProvider instances.
*
* Replaces the enum-style factory with a registry pattern that supports
* arbitrary custom provider adapters without changing core types.
*/
import type { EmbeddingProvider } from './provider.js';
import { NoopEmbeddingProvider } from './provider.js';
import { OpenAIEmbeddingProvider } from './openai.provider.js';
import { LocalEmbeddingProvider } from './local.provider.js';
import type { EmbeddingProfile } from '../db/schema.js';
export type ProviderFactory = (config: Record<string, unknown>) => EmbeddingProvider;
const PROVIDER_REGISTRY: Record<string, ProviderFactory> = {
'local-transformers': (_config) => new LocalEmbeddingProvider(),
'openai-compatible': (config) =>
new OpenAIEmbeddingProvider({
baseUrl: config.baseUrl as string,
apiKey: config.apiKey as string,
model: config.model as string,
dimensions: config.dimensions as number | undefined,
maxBatchSize: config.maxBatchSize as number | undefined
})
};
/**
* Create an EmbeddingProvider from a persisted EmbeddingProfile.
*
* Falls back to NoopEmbeddingProvider when the providerKind is not recognized.
*/
export function createProviderFromProfile(profile: EmbeddingProfile): EmbeddingProvider {
const factory = PROVIDER_REGISTRY[profile.providerKind];
if (!factory) return new NoopEmbeddingProvider();
const config = (profile.config as Record<string, unknown>) ?? {};
return factory(config);
}
/**
* Return metadata for the default local profile.
*
* Used by migration seeds and runtime defaults.
*/
export function getDefaultLocalProfile(): Pick<
EmbeddingProfile,
'id' | 'providerKind' | 'model' | 'dimensions'
> {
return {
id: 'local-default',
providerKind: 'local-transformers',
model: 'Xenova/all-MiniLM-L6-v2',
dimensions: 384
};
}
/**
* Return all registered providerKind values.
*
* Useful for settings UI validation and provider discovery.
*/
export function getRegisteredProviderKinds(): string[] {
return Object.keys(PROVIDER_REGISTRY);
}