feat(TRUEREF-0020): add embedding profiles, default local embeddings, and version-scoped semantic retrieval
- Add embedding_profiles table with provider registry pattern - Install @xenova/transformers as runtime dependency - Update snippet_embeddings with composite PK (snippet_id, profile_id) - Seed default local profile using Xenova/all-MiniLM-L6-v2 - Add provider registry (local-transformers, openai-compatible) - Update EmbeddingService to persist and retrieve by profileId - Add version-scoped VectorSearch with optional versionId filtering - Add searchMode (auto|keyword|semantic|hybrid) to HybridSearchService - Update API /context route to load active profile, support searchMode/alpha params - Extend MCP query-docs tool with searchMode and alpha parameters - Update settings API to work with embedding_profiles table - Add comprehensive test coverage for profiles, registry, version scoping Status: 445/451 tests passing, core feature complete
This commit is contained in:
@@ -21,6 +21,13 @@ export interface VectorSearchResult {
|
||||
score: number;
|
||||
}
|
||||
|
||||
export interface VectorSearchOptions {
|
||||
repositoryId: string;
|
||||
versionId?: string;
|
||||
profileId?: string;
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
/** Raw DB row from snippet_embeddings joined with snippets. */
|
||||
interface RawEmbeddingRow {
|
||||
snippet_id: string;
|
||||
@@ -64,32 +71,33 @@ export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class VectorSearch {
|
||||
private readonly stmt: Database.Statement<[string], RawEmbeddingRow>;
|
||||
|
||||
constructor(private readonly db: Database.Database) {
|
||||
// Prepare once — reused for every call.
|
||||
this.stmt = this.db.prepare<[string], RawEmbeddingRow>(`
|
||||
SELECT se.snippet_id, se.embedding
|
||||
FROM snippet_embeddings se
|
||||
JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ?
|
||||
`);
|
||||
}
|
||||
constructor(private readonly db: Database.Database) {}
|
||||
|
||||
/**
|
||||
* Search stored embeddings by cosine similarity to the query embedding.
|
||||
*
|
||||
* @param queryEmbedding - The embedded representation of the search query.
|
||||
* @param repositoryId - Scope the search to a single repository.
|
||||
* @param limit - Maximum number of results to return. Default: 50.
|
||||
* @param options - Search options including repositoryId, optional versionId, profileId, and limit.
|
||||
* @returns Results sorted by descending cosine similarity score.
|
||||
*/
|
||||
vectorSearch(
|
||||
queryEmbedding: Float32Array,
|
||||
repositoryId: string,
|
||||
limit = 50
|
||||
): VectorSearchResult[] {
|
||||
const rows = this.stmt.all(repositoryId);
|
||||
vectorSearch(queryEmbedding: Float32Array, options: VectorSearchOptions): VectorSearchResult[] {
|
||||
const { repositoryId, versionId, profileId = 'local-default', limit = 50 } = options;
|
||||
|
||||
let sql = `
|
||||
SELECT se.snippet_id, se.embedding
|
||||
FROM snippet_embeddings se
|
||||
JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ?
|
||||
AND se.profile_id = ?
|
||||
`;
|
||||
const params: unknown[] = [repositoryId, profileId];
|
||||
|
||||
if (versionId) {
|
||||
sql += ' AND s.version_id = ?';
|
||||
params.push(versionId);
|
||||
}
|
||||
|
||||
const rows = this.db.prepare<unknown[], RawEmbeddingRow>(sql).all(...params);
|
||||
|
||||
const scored: VectorSearchResult[] = rows.map((row) => {
|
||||
const embedding = new Float32Array(
|
||||
|
||||
Reference in New Issue
Block a user