feat(TRUEREF-0020): add embedding profiles, default local embeddings, and version-scoped semantic retrieval

- Add embedding_profiles table with provider registry pattern
- Install @xenova/transformers as runtime dependency
- Update snippet_embeddings with composite PK (snippet_id, profile_id)
- Seed default local profile using Xenova/all-MiniLM-L6-v2
- Add provider registry (local-transformers, openai-compatible)
- Update EmbeddingService to persist and retrieve by profileId
- Add version-scoped VectorSearch with optional versionId filtering
- Add searchMode (auto|keyword|semantic|hybrid) to HybridSearchService
- Update API /context route to load active profile, support searchMode/alpha params
- Extend MCP query-docs tool with searchMode and alpha parameters
- Update settings API to work with embedding_profiles table
- Add comprehensive test coverage for profiles, registry, version scoping

Status: 445/451 tests passing, core feature complete
This commit is contained in:
Giancarmine Salucci
2026-03-25 19:16:37 +01:00
parent fef6f66930
commit 169df4d984
19 changed files with 2668 additions and 246 deletions

View File

@@ -1,82 +1,47 @@
/**
* POST /api/v1/settings/embedding/test
* GET /api/v1/settings/embedding/test
*
* Validates an embedding provider configuration by creating a provider
* instance and calling embed(['test']). Returns success with dimensions
* or a descriptive error without persisting any changes.
* Tests the active default embedding profile by creating a provider instance
* and checking availability. Returns success with profile metadata or error.
*/
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import {
createProviderFromConfig,
type EmbeddingConfig
} from '$lib/server/embeddings/factory';
import { handleServiceError, InvalidInputError } from '$lib/server/utils/validation';
import { getClient } from '$lib/server/db/client';
import { createProviderFromProfile } from '$lib/server/embeddings/registry';
import type { EmbeddingProfile } from '$lib/server/db/schema';
import { handleServiceError } from '$lib/server/utils/validation';
export const GET: RequestHandler = async () => {
try {
const provider = createProviderFromConfig({ provider: 'local' });
const db = getClient();
const profile = db
.prepare<[], EmbeddingProfile>(
'SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1'
)
.get();
if (!profile) {
return json({ available: false, error: 'No active embedding profile configured' });
}
const provider = createProviderFromProfile(profile);
const available = await provider.isAvailable();
return json({ available });
return json({
available,
profile: {
id: profile.id,
providerKind: profile.providerKind,
model: profile.model,
dimensions: profile.dimensions
}
});
} catch (err) {
return handleServiceError(err);
}
};
// ---------------------------------------------------------------------------
// Validate — reuse the same shape accepted by PUT /settings/embedding
// ---------------------------------------------------------------------------
function validateConfig(body: unknown): EmbeddingConfig {
if (typeof body !== 'object' || body === null) {
throw new InvalidInputError('Request body must be a JSON object');
}
const obj = body as Record<string, unknown>;
const provider = obj.provider;
if (provider !== 'openai' && provider !== 'local' && provider !== 'none') {
throw new InvalidInputError(
`Invalid provider "${String(provider)}". Must be one of: openai, local, none.`
);
}
if (provider === 'openai') {
const openai = obj.openai as Record<string, unknown> | undefined;
if (!openai || typeof openai !== 'object') {
throw new InvalidInputError('openai config object is required when provider is "openai"');
}
if (typeof openai.baseUrl !== 'string' || !openai.baseUrl) {
throw new InvalidInputError('openai.baseUrl must be a non-empty string');
}
if (typeof openai.apiKey !== 'string' || !openai.apiKey) {
throw new InvalidInputError('openai.apiKey must be a non-empty string');
}
if (typeof openai.model !== 'string' || !openai.model) {
throw new InvalidInputError('openai.model must be a non-empty string');
}
return {
provider: 'openai',
openai: {
baseUrl: openai.baseUrl as string,
apiKey: openai.apiKey as string,
model: openai.model as string,
dimensions:
typeof openai.dimensions === 'number' ? (openai.dimensions as number) : undefined,
maxBatchSize:
typeof openai.maxBatchSize === 'number' ? (openai.maxBatchSize as number) : undefined
}
};
}
return { provider: provider as 'local' | 'none' };
}
// ---------------------------------------------------------------------------
// POST
// ---------------------------------------------------------------------------
export const POST: RequestHandler = async ({ request }) => {
try {