feat(TRUEREF-0020): add embedding profiles, default local embeddings, and version-scoped semantic retrieval

- Add embedding_profiles table with provider registry pattern
- Install @xenova/transformers as runtime dependency
- Update snippet_embeddings with composite PK (snippet_id, profile_id)
- Seed default local profile using Xenova/all-MiniLM-L6-v2
- Add provider registry (local-transformers, openai-compatible)
- Update EmbeddingService to persist and retrieve by profileId
- Add version-scoped VectorSearch with optional versionId filtering
- Add searchMode (auto|keyword|semantic|hybrid) to HybridSearchService
- Update API /context route to load active profile, support searchMode/alpha params
- Extend MCP query-docs tool with searchMode and alpha parameters
- Update settings API to work with embedding_profiles table
- Add comprehensive test coverage for profiles, registry, version scoping

Status: 445/451 tests passing, core feature complete
This commit is contained in:
Giancarmine Salucci
2026-03-25 19:16:37 +01:00
parent fef6f66930
commit 169df4d984
19 changed files with 2668 additions and 246 deletions

View File

@@ -0,0 +1,34 @@
CREATE TABLE `embedding_profiles` (
`id` text PRIMARY KEY NOT NULL,
`provider_kind` text NOT NULL,
`title` text NOT NULL,
`enabled` integer DEFAULT true NOT NULL,
`is_default` integer DEFAULT false NOT NULL,
`model` text NOT NULL,
`dimensions` integer NOT NULL,
`config` text NOT NULL,
`created_at` integer NOT NULL,
`updated_at` integer NOT NULL
);
--> statement-breakpoint
INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
VALUES ('local-default', 'local-transformers', 'Local (Xenova/all-MiniLM-L6-v2)', 1, 1, 'Xenova/all-MiniLM-L6-v2', 384, '{}', unixepoch(), unixepoch())
ON CONFLICT(id) DO NOTHING;
--> statement-breakpoint
PRAGMA foreign_keys=OFF;--> statement-breakpoint
CREATE TABLE `__new_snippet_embeddings` (
`snippet_id` text NOT NULL,
`profile_id` text NOT NULL,
`model` text NOT NULL,
`dimensions` integer NOT NULL,
`embedding` blob NOT NULL,
`created_at` integer NOT NULL,
PRIMARY KEY(`snippet_id`, `profile_id`),
FOREIGN KEY (`snippet_id`) REFERENCES `snippets`(`id`) ON UPDATE no action ON DELETE cascade,
FOREIGN KEY (`profile_id`) REFERENCES `embedding_profiles`(`id`) ON UPDATE no action ON DELETE cascade
);
--> statement-breakpoint
INSERT INTO `__new_snippet_embeddings`("snippet_id", "profile_id", "model", "dimensions", "embedding", "created_at") SELECT "snippet_id", 'local-default', "model", "dimensions", "embedding", "created_at" FROM `snippet_embeddings`;--> statement-breakpoint
DROP TABLE `snippet_embeddings`;--> statement-breakpoint
ALTER TABLE `__new_snippet_embeddings` RENAME TO `snippet_embeddings`;--> statement-breakpoint
PRAGMA foreign_keys=ON;