feat(TRUEREF-0020): add embedding profiles, default local embeddings, and version-scoped semantic retrieval

- Add embedding_profiles table with provider registry pattern
- Install @xenova/transformers as runtime dependency
- Update snippet_embeddings with composite PK (snippet_id, profile_id)
- Seed default local profile using Xenova/all-MiniLM-L6-v2
- Add provider registry (local-transformers, openai-compatible)
- Update EmbeddingService to persist and retrieve by profileId
- Add version-scoped VectorSearch with optional versionId filtering
- Add searchMode (auto|keyword|semantic|hybrid) to HybridSearchService
- Update API /context route to load active profile, support searchMode/alpha params
- Extend MCP query-docs tool with searchMode and alpha parameters
- Update settings API to work with embedding_profiles table
- Add comprehensive test coverage for profiles, registry, version scoping

Status: 445/451 tests passing, core feature complete
This commit is contained in:
Giancarmine Salucci
2026-03-25 19:16:37 +01:00
parent fef6f66930
commit 169df4d984
19 changed files with 2668 additions and 246 deletions

View File

@@ -16,6 +16,8 @@ import { getClient } from '$lib/server/db/client';
import { dtoJsonResponse } from '$lib/server/api/dto-response';
import { SearchService } from '$lib/server/search/search.service';
import { HybridSearchService } from '$lib/server/search/hybrid.search.service';
import { createProviderFromProfile } from '$lib/server/embeddings/registry';
import type { EmbeddingProfile } from '$lib/server/db/schema';
import { parseLibraryId } from '$lib/server/api/library-id';
import { selectSnippetsWithinBudget, DEFAULT_TOKEN_BUDGET } from '$lib/server/api/token-budget';
import {
@@ -28,12 +30,20 @@ import {
// Helpers
// ---------------------------------------------------------------------------
function getServices() {
const db = getClient();
function getServices(db: ReturnType<typeof getClient>) {
const searchService = new SearchService(db);
// No embedding provider — pure FTS5 mode (alpha=0 equivalent).
const hybridService = new HybridSearchService(db, searchService, null);
return { db, searchService, hybridService };
// Load the active embedding profile from the database
const profileRow = db
.prepare<[], EmbeddingProfile>(
'SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1'
)
.get();
const provider = profileRow ? createProviderFromProfile(profileRow) : null;
const hybridService = new HybridSearchService(db, searchService, provider);
return { db, searchService, hybridService, profileId: profileRow?.id };
}
interface RawRepoConfig {
@@ -93,6 +103,14 @@ export const GET: RequestHandler = async ({ url }) => {
const tokensRaw = parseInt(url.searchParams.get('tokens') ?? String(DEFAULT_TOKEN_BUDGET), 10);
const maxTokens = isNaN(tokensRaw) || tokensRaw < 1 ? DEFAULT_TOKEN_BUDGET : tokensRaw;
// Parse searchMode and alpha
const rawMode = url.searchParams.get('searchMode') ?? 'auto';
const searchMode = ['auto', 'keyword', 'semantic', 'hybrid'].includes(rawMode)
? (rawMode as 'auto' | 'keyword' | 'semantic' | 'hybrid')
: 'auto';
const alphaRaw = parseFloat(url.searchParams.get('alpha') ?? '0.5');
const alpha = isNaN(alphaRaw) ? 0.5 : Math.max(0, Math.min(1, alphaRaw));
// Parse the libraryId
let parsed: ReturnType<typeof parseLibraryId>;
try {
@@ -108,7 +126,8 @@ export const GET: RequestHandler = async ({ url }) => {
}
try {
const { db, hybridService } = getServices();
const db = getClient();
const { hybridService, profileId } = getServices(db);
// Verify the repository exists and check its state.
const repo = db
@@ -158,7 +177,10 @@ export const GET: RequestHandler = async ({ url }) => {
const searchResults = await hybridService.search(query, {
repositoryId: parsed.repositoryId,
versionId,
limit: 50 // fetch more than needed; token budget will trim
limit: 50, // fetch more than needed; token budget will trim
searchMode,
alpha,
profileId
});
// Apply token budget.