feat(TRUEREF-0020): add embedding profiles, default local embeddings, and version-scoped semantic retrieval

- Add embedding_profiles table with provider registry pattern
- Install @xenova/transformers as runtime dependency
- Update snippet_embeddings with composite PK (snippet_id, profile_id)
- Seed default local profile using Xenova/all-MiniLM-L6-v2
- Add provider registry (local-transformers, openai-compatible)
- Update EmbeddingService to persist and retrieve by profileId
- Add version-scoped VectorSearch with optional versionId filtering
- Add searchMode (auto|keyword|semantic|hybrid) to HybridSearchService
- Update API /context route to load active profile, support searchMode/alpha params
- Extend MCP query-docs tool with searchMode and alpha parameters
- Update settings API to work with embedding_profiles table
- Add comprehensive test coverage for profiles, registry, version scoping

Status: 445/451 tests passing, core feature complete
This commit is contained in:
Giancarmine Salucci
2026-03-25 19:16:37 +01:00
parent fef6f66930
commit 169df4d984
19 changed files with 2668 additions and 246 deletions

View File

@@ -42,6 +42,8 @@ export async function fetchContext(params: {
query: string;
tokens?: number;
type?: 'json' | 'txt';
searchMode?: string;
alpha?: number;
}): Promise<ApiResponse> {
const url = new URL(`${API_BASE}/api/v1/context`);
url.searchParams.set('libraryId', params.libraryId);
@@ -50,6 +52,12 @@ export async function fetchContext(params: {
if (params.tokens !== undefined) {
url.searchParams.set('tokens', String(params.tokens));
}
if (params.searchMode) {
url.searchParams.set('searchMode', params.searchMode);
}
if (params.alpha !== undefined) {
url.searchParams.set('alpha', String(params.alpha));
}
return fetch(url.toString());
}

View File

@@ -15,7 +15,19 @@ export const QueryDocsSchema = z.object({
query: z
.string()
.describe('Specific question about the library to retrieve relevant documentation'),
tokens: z.number().optional().describe('Maximum token budget for the response (default: 10000)')
tokens: z.number().optional().describe('Maximum token budget for the response (default: 10000)'),
searchMode: z
.enum(['auto', 'keyword', 'semantic', 'hybrid'])
.optional()
.describe(
"Retrieval mode: 'auto' (default), 'keyword' (FTS only), 'semantic' (vector only), or 'hybrid'"
),
alpha: z
.number()
.min(0)
.max(1)
.optional()
.describe('Hybrid blend weight: 0.0 = keyword only, 1.0 = semantic only (default: 0.5)')
});
export type QueryDocsInput = z.infer<typeof QueryDocsSchema>;
@@ -42,6 +54,17 @@ export const QUERY_DOCS_TOOL = {
tokens: {
type: 'number',
description: 'Max token budget (default: 10000)'
},
searchMode: {
type: 'string',
enum: ['auto', 'keyword', 'semantic', 'hybrid'],
description: "Retrieval mode: 'auto' (default), 'keyword', 'semantic', or 'hybrid'"
},
alpha: {
type: 'number',
minimum: 0,
maximum: 1,
description: 'Hybrid blend weight (0=keyword, 1=semantic, default: 0.5)'
}
},
required: ['libraryId', 'query']
@@ -49,9 +72,9 @@ export const QUERY_DOCS_TOOL = {
};
export async function handleQueryDocs(args: unknown) {
const { libraryId, query, tokens } = QueryDocsSchema.parse(args);
const { libraryId, query, tokens, searchMode, alpha } = QueryDocsSchema.parse(args);
const response = await fetchContext({ libraryId, query, tokens, type: 'txt' });
const response = await fetchContext({ libraryId, query, tokens, type: 'txt', searchMode, alpha });
if (!response.ok) {
const status = response.status;