feat(TRUEREF-0020): add embedding profiles, default local embeddings, and version-scoped semantic retrieval

- Add embedding_profiles table with provider registry pattern
- Install @xenova/transformers as runtime dependency
- Update snippet_embeddings with composite PK (snippet_id, profile_id)
- Seed default local profile using Xenova/all-MiniLM-L6-v2
- Add provider registry (local-transformers, openai-compatible)
- Update EmbeddingService to persist and retrieve by profileId
- Add version-scoped VectorSearch with optional versionId filtering
- Add searchMode (auto|keyword|semantic|hybrid) to HybridSearchService
- Update API /context route to load active profile, support searchMode/alpha params
- Extend MCP query-docs tool with searchMode and alpha parameters
- Update settings API to work with embedding_profiles table
- Add comprehensive test coverage for profiles, registry, version scoping

Status: 445/451 tests passing, core feature complete
This commit is contained in:
Giancarmine Salucci
2026-03-25 19:16:37 +01:00
parent fef6f66930
commit 169df4d984
19 changed files with 2668 additions and 246 deletions

View File

@@ -1,4 +1,4 @@
import { blob, integer, real, sqliteTable, text } from 'drizzle-orm/sqlite-core';
import { blob, integer, primaryKey, real, sqliteTable, text } from 'drizzle-orm/sqlite-core';
// ---------------------------------------------------------------------------
// repositories
@@ -86,18 +86,41 @@ export const snippets = sqliteTable('snippets', {
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
});
// ---------------------------------------------------------------------------
// embedding_profiles
// ---------------------------------------------------------------------------
export const embeddingProfiles = sqliteTable('embedding_profiles', {
id: text('id').primaryKey(),
providerKind: text('provider_kind').notNull(),
title: text('title').notNull(),
enabled: integer('enabled', { mode: 'boolean' }).notNull().default(true),
isDefault: integer('is_default', { mode: 'boolean' }).notNull().default(false),
model: text('model').notNull(),
dimensions: integer('dimensions').notNull(),
config: text('config', { mode: 'json' }).notNull().$type<Record<string, unknown>>(),
createdAt: integer('created_at').notNull(),
updatedAt: integer('updated_at').notNull()
});
// ---------------------------------------------------------------------------
// snippet_embeddings
// ---------------------------------------------------------------------------
export const snippetEmbeddings = sqliteTable('snippet_embeddings', {
snippetId: text('snippet_id')
.primaryKey()
.references(() => snippets.id, { onDelete: 'cascade' }),
model: text('model').notNull(), // embedding model identifier
dimensions: integer('dimensions').notNull(),
embedding: blob('embedding').notNull(), // Float32Array as binary blob
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
});
export const snippetEmbeddings = sqliteTable(
'snippet_embeddings',
{
snippetId: text('snippet_id')
.notNull()
.references(() => snippets.id, { onDelete: 'cascade' }),
profileId: text('profile_id')
.notNull()
.references(() => embeddingProfiles.id, { onDelete: 'cascade' }),
model: text('model').notNull(), // embedding model identifier
dimensions: integer('dimensions').notNull(),
embedding: blob('embedding').notNull(), // Float32Array as binary blob
createdAt: integer('created_at').notNull()
},
(table) => [primaryKey({ columns: [table.snippetId, table.profileId] })]
);
// ---------------------------------------------------------------------------
// indexing_jobs
@@ -165,6 +188,9 @@ export type NewDocument = typeof documents.$inferInsert;
export type Snippet = typeof snippets.$inferSelect;
export type NewSnippet = typeof snippets.$inferInsert;
export type EmbeddingProfile = typeof embeddingProfiles.$inferSelect;
export type NewEmbeddingProfile = typeof embeddingProfiles.$inferInsert;
export type SnippetEmbedding = typeof snippetEmbeddings.$inferSelect;
export type NewSnippetEmbedding = typeof snippetEmbeddings.$inferInsert;