feat(TRUEREF-0020): add embedding profiles, default local embeddings, and version-scoped semantic retrieval
- Add embedding_profiles table with provider registry pattern - Install @xenova/transformers as runtime dependency - Update snippet_embeddings with composite PK (snippet_id, profile_id) - Seed default local profile using Xenova/all-MiniLM-L6-v2 - Add provider registry (local-transformers, openai-compatible) - Update EmbeddingService to persist and retrieve by profileId - Add version-scoped VectorSearch with optional versionId filtering - Add searchMode (auto|keyword|semantic|hybrid) to HybridSearchService - Update API /context route to load active profile, support searchMode/alpha params - Extend MCP query-docs tool with searchMode and alpha parameters - Update settings API to work with embedding_profiles table - Add comprehensive test coverage for profiles, registry, version scoping Status: 445/451 tests passing, core feature complete
This commit is contained in:
34
src/lib/server/db/migrations/0002_silky_stellaris.sql
Normal file
34
src/lib/server/db/migrations/0002_silky_stellaris.sql
Normal file
@@ -0,0 +1,34 @@
|
||||
CREATE TABLE `embedding_profiles` (
|
||||
`id` text PRIMARY KEY NOT NULL,
|
||||
`provider_kind` text NOT NULL,
|
||||
`title` text NOT NULL,
|
||||
`enabled` integer DEFAULT true NOT NULL,
|
||||
`is_default` integer DEFAULT false NOT NULL,
|
||||
`model` text NOT NULL,
|
||||
`dimensions` integer NOT NULL,
|
||||
`config` text NOT NULL,
|
||||
`created_at` integer NOT NULL,
|
||||
`updated_at` integer NOT NULL
|
||||
);
|
||||
--> statement-breakpoint
|
||||
INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES ('local-default', 'local-transformers', 'Local (Xenova/all-MiniLM-L6-v2)', 1, 1, 'Xenova/all-MiniLM-L6-v2', 384, '{}', unixepoch(), unixepoch())
|
||||
ON CONFLICT(id) DO NOTHING;
|
||||
--> statement-breakpoint
|
||||
PRAGMA foreign_keys=OFF;--> statement-breakpoint
|
||||
CREATE TABLE `__new_snippet_embeddings` (
|
||||
`snippet_id` text NOT NULL,
|
||||
`profile_id` text NOT NULL,
|
||||
`model` text NOT NULL,
|
||||
`dimensions` integer NOT NULL,
|
||||
`embedding` blob NOT NULL,
|
||||
`created_at` integer NOT NULL,
|
||||
PRIMARY KEY(`snippet_id`, `profile_id`),
|
||||
FOREIGN KEY (`snippet_id`) REFERENCES `snippets`(`id`) ON UPDATE no action ON DELETE cascade,
|
||||
FOREIGN KEY (`profile_id`) REFERENCES `embedding_profiles`(`id`) ON UPDATE no action ON DELETE cascade
|
||||
);
|
||||
--> statement-breakpoint
|
||||
INSERT INTO `__new_snippet_embeddings`("snippet_id", "profile_id", "model", "dimensions", "embedding", "created_at") SELECT "snippet_id", 'local-default', "model", "dimensions", "embedding", "created_at" FROM `snippet_embeddings`;--> statement-breakpoint
|
||||
DROP TABLE `snippet_embeddings`;--> statement-breakpoint
|
||||
ALTER TABLE `__new_snippet_embeddings` RENAME TO `snippet_embeddings`;--> statement-breakpoint
|
||||
PRAGMA foreign_keys=ON;
|
||||
856
src/lib/server/db/migrations/meta/0002_snapshot.json
Normal file
856
src/lib/server/db/migrations/meta/0002_snapshot.json
Normal file
@@ -0,0 +1,856 @@
|
||||
{
|
||||
"version": "6",
|
||||
"dialect": "sqlite",
|
||||
"id": "31531dab-a199-4fc5-a889-1884940039cd",
|
||||
"prevId": "60c9a1b5-449f-45fd-9b2d-1ab4cca78ab6",
|
||||
"tables": {
|
||||
"documents": {
|
||||
"name": "documents",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"version_id": {
|
||||
"name": "version_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"file_path": {
|
||||
"name": "file_path",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"language": {
|
||||
"name": "language",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"token_count": {
|
||||
"name": "token_count",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"checksum": {
|
||||
"name": "checksum",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"indexed_at": {
|
||||
"name": "indexed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {
|
||||
"documents_repository_id_repositories_id_fk": {
|
||||
"name": "documents_repository_id_repositories_id_fk",
|
||||
"tableFrom": "documents",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"documents_version_id_repository_versions_id_fk": {
|
||||
"name": "documents_version_id_repository_versions_id_fk",
|
||||
"tableFrom": "documents",
|
||||
"tableTo": "repository_versions",
|
||||
"columnsFrom": [
|
||||
"version_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"embedding_profiles": {
|
||||
"name": "embedding_profiles",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"provider_kind": {
|
||||
"name": "provider_kind",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"enabled": {
|
||||
"name": "enabled",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": true
|
||||
},
|
||||
"is_default": {
|
||||
"name": "is_default",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": false
|
||||
},
|
||||
"model": {
|
||||
"name": "model",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"dimensions": {
|
||||
"name": "dimensions",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"config": {
|
||||
"name": "config",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"indexing_jobs": {
|
||||
"name": "indexing_jobs",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"version_id": {
|
||||
"name": "version_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"status": {
|
||||
"name": "status",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'queued'"
|
||||
},
|
||||
"progress": {
|
||||
"name": "progress",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"total_files": {
|
||||
"name": "total_files",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"processed_files": {
|
||||
"name": "processed_files",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"error": {
|
||||
"name": "error",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"started_at": {
|
||||
"name": "started_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"completed_at": {
|
||||
"name": "completed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {
|
||||
"indexing_jobs_repository_id_repositories_id_fk": {
|
||||
"name": "indexing_jobs_repository_id_repositories_id_fk",
|
||||
"tableFrom": "indexing_jobs",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"repositories": {
|
||||
"name": "repositories",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"description": {
|
||||
"name": "description",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"source": {
|
||||
"name": "source",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"source_url": {
|
||||
"name": "source_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"branch": {
|
||||
"name": "branch",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": "'main'"
|
||||
},
|
||||
"state": {
|
||||
"name": "state",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'pending'"
|
||||
},
|
||||
"total_snippets": {
|
||||
"name": "total_snippets",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"total_tokens": {
|
||||
"name": "total_tokens",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"trust_score": {
|
||||
"name": "trust_score",
|
||||
"type": "real",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"benchmark_score": {
|
||||
"name": "benchmark_score",
|
||||
"type": "real",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"stars": {
|
||||
"name": "stars",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"github_token": {
|
||||
"name": "github_token",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"last_indexed_at": {
|
||||
"name": "last_indexed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"repository_configs": {
|
||||
"name": "repository_configs",
|
||||
"columns": {
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"project_title": {
|
||||
"name": "project_title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"description": {
|
||||
"name": "description",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"folders": {
|
||||
"name": "folders",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"exclude_folders": {
|
||||
"name": "exclude_folders",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"exclude_files": {
|
||||
"name": "exclude_files",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"rules": {
|
||||
"name": "rules",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"previous_versions": {
|
||||
"name": "previous_versions",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {
|
||||
"repository_configs_repository_id_repositories_id_fk": {
|
||||
"name": "repository_configs_repository_id_repositories_id_fk",
|
||||
"tableFrom": "repository_configs",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"repository_versions": {
|
||||
"name": "repository_versions",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"tag": {
|
||||
"name": "tag",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"commit_hash": {
|
||||
"name": "commit_hash",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"state": {
|
||||
"name": "state",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'pending'"
|
||||
},
|
||||
"total_snippets": {
|
||||
"name": "total_snippets",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"indexed_at": {
|
||||
"name": "indexed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {
|
||||
"repository_versions_repository_id_repositories_id_fk": {
|
||||
"name": "repository_versions_repository_id_repositories_id_fk",
|
||||
"tableFrom": "repository_versions",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"settings": {
|
||||
"name": "settings",
|
||||
"columns": {
|
||||
"key": {
|
||||
"name": "key",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"value": {
|
||||
"name": "value",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"snippet_embeddings": {
|
||||
"name": "snippet_embeddings",
|
||||
"columns": {
|
||||
"snippet_id": {
|
||||
"name": "snippet_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"profile_id": {
|
||||
"name": "profile_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"model": {
|
||||
"name": "model",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"dimensions": {
|
||||
"name": "dimensions",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"embedding": {
|
||||
"name": "embedding",
|
||||
"type": "blob",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {
|
||||
"snippet_embeddings_snippet_id_snippets_id_fk": {
|
||||
"name": "snippet_embeddings_snippet_id_snippets_id_fk",
|
||||
"tableFrom": "snippet_embeddings",
|
||||
"tableTo": "snippets",
|
||||
"columnsFrom": [
|
||||
"snippet_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"snippet_embeddings_profile_id_embedding_profiles_id_fk": {
|
||||
"name": "snippet_embeddings_profile_id_embedding_profiles_id_fk",
|
||||
"tableFrom": "snippet_embeddings",
|
||||
"tableTo": "embedding_profiles",
|
||||
"columnsFrom": [
|
||||
"profile_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {
|
||||
"snippet_embeddings_snippet_id_profile_id_pk": {
|
||||
"columns": [
|
||||
"snippet_id",
|
||||
"profile_id"
|
||||
],
|
||||
"name": "snippet_embeddings_snippet_id_profile_id_pk"
|
||||
}
|
||||
},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"snippets": {
|
||||
"name": "snippets",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"document_id": {
|
||||
"name": "document_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"version_id": {
|
||||
"name": "version_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"type": {
|
||||
"name": "type",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"content": {
|
||||
"name": "content",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"language": {
|
||||
"name": "language",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"breadcrumb": {
|
||||
"name": "breadcrumb",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"token_count": {
|
||||
"name": "token_count",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {
|
||||
"snippets_document_id_documents_id_fk": {
|
||||
"name": "snippets_document_id_documents_id_fk",
|
||||
"tableFrom": "snippets",
|
||||
"tableTo": "documents",
|
||||
"columnsFrom": [
|
||||
"document_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"snippets_repository_id_repositories_id_fk": {
|
||||
"name": "snippets_repository_id_repositories_id_fk",
|
||||
"tableFrom": "snippets",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"snippets_version_id_repository_versions_id_fk": {
|
||||
"name": "snippets_version_id_repository_versions_id_fk",
|
||||
"tableFrom": "snippets",
|
||||
"tableTo": "repository_versions",
|
||||
"columnsFrom": [
|
||||
"version_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
}
|
||||
},
|
||||
"views": {},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"schemas": {},
|
||||
"tables": {},
|
||||
"columns": {}
|
||||
},
|
||||
"internal": {
|
||||
"indexes": {}
|
||||
}
|
||||
}
|
||||
@@ -15,6 +15,13 @@
|
||||
"when": 1774448049161,
|
||||
"tag": "0001_quick_nighthawk",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 2,
|
||||
"version": "6",
|
||||
"when": 1774461897742,
|
||||
"tag": "0002_silky_stellaris",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import { blob, integer, real, sqliteTable, text } from 'drizzle-orm/sqlite-core';
|
||||
import { blob, integer, primaryKey, real, sqliteTable, text } from 'drizzle-orm/sqlite-core';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// repositories
|
||||
@@ -86,18 +86,41 @@ export const snippets = sqliteTable('snippets', {
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// embedding_profiles
|
||||
// ---------------------------------------------------------------------------
|
||||
export const embeddingProfiles = sqliteTable('embedding_profiles', {
|
||||
id: text('id').primaryKey(),
|
||||
providerKind: text('provider_kind').notNull(),
|
||||
title: text('title').notNull(),
|
||||
enabled: integer('enabled', { mode: 'boolean' }).notNull().default(true),
|
||||
isDefault: integer('is_default', { mode: 'boolean' }).notNull().default(false),
|
||||
model: text('model').notNull(),
|
||||
dimensions: integer('dimensions').notNull(),
|
||||
config: text('config', { mode: 'json' }).notNull().$type<Record<string, unknown>>(),
|
||||
createdAt: integer('created_at').notNull(),
|
||||
updatedAt: integer('updated_at').notNull()
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// snippet_embeddings
|
||||
// ---------------------------------------------------------------------------
|
||||
export const snippetEmbeddings = sqliteTable('snippet_embeddings', {
|
||||
snippetId: text('snippet_id')
|
||||
.primaryKey()
|
||||
.references(() => snippets.id, { onDelete: 'cascade' }),
|
||||
model: text('model').notNull(), // embedding model identifier
|
||||
dimensions: integer('dimensions').notNull(),
|
||||
embedding: blob('embedding').notNull(), // Float32Array as binary blob
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
|
||||
});
|
||||
export const snippetEmbeddings = sqliteTable(
|
||||
'snippet_embeddings',
|
||||
{
|
||||
snippetId: text('snippet_id')
|
||||
.notNull()
|
||||
.references(() => snippets.id, { onDelete: 'cascade' }),
|
||||
profileId: text('profile_id')
|
||||
.notNull()
|
||||
.references(() => embeddingProfiles.id, { onDelete: 'cascade' }),
|
||||
model: text('model').notNull(), // embedding model identifier
|
||||
dimensions: integer('dimensions').notNull(),
|
||||
embedding: blob('embedding').notNull(), // Float32Array as binary blob
|
||||
createdAt: integer('created_at').notNull()
|
||||
},
|
||||
(table) => [primaryKey({ columns: [table.snippetId, table.profileId] })]
|
||||
);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// indexing_jobs
|
||||
@@ -165,6 +188,9 @@ export type NewDocument = typeof documents.$inferInsert;
|
||||
export type Snippet = typeof snippets.$inferSelect;
|
||||
export type NewSnippet = typeof snippets.$inferInsert;
|
||||
|
||||
export type EmbeddingProfile = typeof embeddingProfiles.$inferSelect;
|
||||
export type NewEmbeddingProfile = typeof embeddingProfiles.$inferInsert;
|
||||
|
||||
export type SnippetEmbedding = typeof snippetEmbeddings.$inferSelect;
|
||||
export type NewSnippetEmbedding = typeof snippetEmbeddings.$inferInsert;
|
||||
|
||||
|
||||
@@ -248,6 +248,99 @@ describe('OpenAIEmbeddingProvider', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Migration Tests — embedding_profiles table
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Migration — embedding_profiles', () => {
|
||||
it('creates the embedding_profiles table', () => {
|
||||
const { client } = createTestDb();
|
||||
const tables = client
|
||||
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='embedding_profiles'")
|
||||
.all();
|
||||
expect(tables).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('seeds the default local profile', () => {
|
||||
const { client } = createTestDb();
|
||||
const row = client
|
||||
.prepare("SELECT * FROM embedding_profiles WHERE id = 'local-default'")
|
||||
.get() as any;
|
||||
expect(row).toBeDefined();
|
||||
expect(row.is_default).toBe(1);
|
||||
expect(row.provider_kind).toBe('local-transformers');
|
||||
expect(row.model).toBe('Xenova/all-MiniLM-L6-v2');
|
||||
expect(row.dimensions).toBe(384);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Provider Registry Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Provider Registry', () => {
|
||||
it('creates LocalEmbeddingProvider for local-transformers', () => {
|
||||
const { createProviderFromProfile } = require('./registry.js');
|
||||
const profile: schema.EmbeddingProfile = {
|
||||
id: 'test-local',
|
||||
providerKind: 'local-transformers',
|
||||
title: 'Test Local',
|
||||
enabled: true,
|
||||
isDefault: false,
|
||||
model: 'Xenova/all-MiniLM-L6-v2',
|
||||
dimensions: 384,
|
||||
config: {},
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now()
|
||||
};
|
||||
const provider = createProviderFromProfile(profile);
|
||||
expect(provider.name).toBe('local');
|
||||
expect(provider.model).toBe('Xenova/all-MiniLM-L6-v2');
|
||||
expect(provider.dimensions).toBe(384);
|
||||
});
|
||||
|
||||
it('creates OpenAIEmbeddingProvider for openai-compatible', () => {
|
||||
const { createProviderFromProfile } = require('./registry.js');
|
||||
const profile: schema.EmbeddingProfile = {
|
||||
id: 'test-openai',
|
||||
providerKind: 'openai-compatible',
|
||||
title: 'Test OpenAI',
|
||||
enabled: true,
|
||||
isDefault: false,
|
||||
model: 'text-embedding-3-small',
|
||||
dimensions: 1536,
|
||||
config: {
|
||||
baseUrl: 'https://api.openai.com/v1',
|
||||
apiKey: 'test-key',
|
||||
model: 'text-embedding-3-small'
|
||||
},
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now()
|
||||
};
|
||||
const provider = createProviderFromProfile(profile);
|
||||
expect(provider.name).toBe('openai');
|
||||
expect(provider.model).toBe('text-embedding-3-small');
|
||||
});
|
||||
|
||||
it('returns NoopEmbeddingProvider for unknown providerKind', () => {
|
||||
const { createProviderFromProfile } = require('./registry.js');
|
||||
const profile: schema.EmbeddingProfile = {
|
||||
id: 'test-unknown',
|
||||
providerKind: 'unknown-provider',
|
||||
title: 'Unknown',
|
||||
enabled: true,
|
||||
isDefault: false,
|
||||
model: 'unknown',
|
||||
dimensions: 0,
|
||||
config: {},
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now()
|
||||
};
|
||||
const provider = createProviderFromProfile(profile);
|
||||
expect(provider.name).toBe('noop');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// EmbeddingService — storage logic
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -281,23 +374,36 @@ describe('EmbeddingService', () => {
|
||||
it('stores embeddings in snippet_embeddings table', async () => {
|
||||
const snippetId = seedSnippet(db, client);
|
||||
const provider = makeProvider(4);
|
||||
const service = new EmbeddingService(client, provider);
|
||||
const service = new EmbeddingService(client, provider, 'test-profile');
|
||||
|
||||
await service.embedSnippets([snippetId]);
|
||||
|
||||
const rows = client.prepare('SELECT * FROM snippet_embeddings WHERE snippet_id = ?').all(snippetId);
|
||||
const rows = client
|
||||
.prepare('SELECT * FROM snippet_embeddings WHERE snippet_id = ? AND profile_id = ?')
|
||||
.all(snippetId, 'test-profile');
|
||||
expect(rows).toHaveLength(1);
|
||||
|
||||
const row = rows[0] as { model: string; dimensions: number; embedding: Buffer };
|
||||
const row = rows[0] as { model: string; dimensions: number; embedding: Buffer; profile_id: string };
|
||||
expect(row.model).toBe('test-model');
|
||||
expect(row.dimensions).toBe(4);
|
||||
expect(row.profile_id).toBe('test-profile');
|
||||
expect(row.embedding).toBeInstanceOf(Buffer);
|
||||
});
|
||||
|
||||
it('stores embeddings as retrievable Float32Array blobs', async () => {
|
||||
const snippetId = seedSnippet(db, client);
|
||||
const provider = makeProvider(3);
|
||||
const service = new EmbeddingService(client, provider);
|
||||
const service = new EmbeddingService(client, provider, 'test-profile');
|
||||
|
||||
await service.embedSnippets([snippetId]);
|
||||
|
||||
const embedding = service.getEmbedding(snippetId, 'test-profile');
|
||||
expect(embedding).toBeInstanceOf(Float32Array);
|
||||
expect(embedding).toHaveLength(3);
|
||||
expect(embedding![0]).toBeCloseTo(0.0, 5);
|
||||
expect(embedding![1]).toBeCloseTo(0.1, 5);
|
||||
expect(embedding![2]).toBeCloseTo(0.2, 5);
|
||||
});
|
||||
|
||||
await service.embedSnippets([snippetId]);
|
||||
|
||||
|
||||
@@ -19,7 +19,8 @@ const TEXT_MAX_CHARS = 2048;
|
||||
export class EmbeddingService {
|
||||
constructor(
|
||||
private readonly db: Database.Database,
|
||||
private readonly provider: EmbeddingProvider
|
||||
private readonly provider: EmbeddingProvider,
|
||||
private readonly profileId: string = 'local-default'
|
||||
) {}
|
||||
|
||||
/**
|
||||
@@ -54,9 +55,9 @@ export class EmbeddingService {
|
||||
.slice(0, TEXT_MAX_CHARS)
|
||||
);
|
||||
|
||||
const insert = this.db.prepare<[string, string, number, Buffer]>(`
|
||||
INSERT OR REPLACE INTO snippet_embeddings (snippet_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, unixepoch())
|
||||
const insert = this.db.prepare<[string, string, string, number, Buffer]>(`
|
||||
INSERT OR REPLACE INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, unixepoch())
|
||||
`);
|
||||
|
||||
for (let i = 0; i < snippets.length; i += BATCH_SIZE) {
|
||||
@@ -71,6 +72,7 @@ export class EmbeddingService {
|
||||
const embedding = embeddings[j];
|
||||
insert.run(
|
||||
snippet.id,
|
||||
this.profileId,
|
||||
embedding.model,
|
||||
embedding.dimensions,
|
||||
Buffer.from(embedding.values.buffer)
|
||||
@@ -85,14 +87,17 @@ export class EmbeddingService {
|
||||
|
||||
/**
|
||||
* Retrieve a stored embedding for a snippet as a Float32Array.
|
||||
* Returns null when no embedding has been stored for the given snippet.
|
||||
* Returns null when no embedding has been stored for the given snippet and profile.
|
||||
*
|
||||
* @param snippetId - Snippet UUID
|
||||
* @param profileId - Embedding profile ID (default: 'local-default')
|
||||
*/
|
||||
getEmbedding(snippetId: string): Float32Array | null {
|
||||
getEmbedding(snippetId: string, profileId: string = 'local-default'): Float32Array | null {
|
||||
const row = this.db
|
||||
.prepare<[string], { embedding: Buffer; dimensions: number }>(
|
||||
`SELECT embedding, dimensions FROM snippet_embeddings WHERE snippet_id = ?`
|
||||
.prepare<[string, string], { embedding: Buffer; dimensions: number }>(
|
||||
`SELECT embedding, dimensions FROM snippet_embeddings WHERE snippet_id = ? AND profile_id = ?`
|
||||
)
|
||||
.get(snippetId);
|
||||
.get(snippetId, profileId);
|
||||
|
||||
if (!row) return null;
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
/**
|
||||
* Factory — create an EmbeddingProvider from a persisted EmbeddingConfig.
|
||||
*
|
||||
* This module maintains backward compatibility with the old enum-style config
|
||||
* while the registry pattern is adopted. Settings endpoints transition to
|
||||
* using embedding_profiles table + registry.ts directly.
|
||||
*/
|
||||
|
||||
import type { EmbeddingProvider } from './provider.js';
|
||||
@@ -7,6 +11,9 @@ import { NoopEmbeddingProvider } from './provider.js';
|
||||
import { OpenAIEmbeddingProvider } from './openai.provider.js';
|
||||
import { LocalEmbeddingProvider } from './local.provider.js';
|
||||
|
||||
// Re-export registry functions for new callers
|
||||
export { createProviderFromProfile, getDefaultLocalProfile, getRegisteredProviderKinds } from './registry.js';
|
||||
|
||||
export interface EmbeddingConfig {
|
||||
provider: 'openai' | 'local' | 'none';
|
||||
openai?: {
|
||||
|
||||
64
src/lib/server/embeddings/registry.ts
Normal file
64
src/lib/server/embeddings/registry.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
/**
|
||||
* Provider Registry — map providerKind to EmbeddingProvider instances.
|
||||
*
|
||||
* Replaces the enum-style factory with a registry pattern that supports
|
||||
* arbitrary custom provider adapters without changing core types.
|
||||
*/
|
||||
|
||||
import type { EmbeddingProvider } from './provider.js';
|
||||
import { NoopEmbeddingProvider } from './provider.js';
|
||||
import { OpenAIEmbeddingProvider } from './openai.provider.js';
|
||||
import { LocalEmbeddingProvider } from './local.provider.js';
|
||||
import type { EmbeddingProfile } from '../db/schema.js';
|
||||
|
||||
export type ProviderFactory = (config: Record<string, unknown>) => EmbeddingProvider;
|
||||
|
||||
const PROVIDER_REGISTRY: Record<string, ProviderFactory> = {
|
||||
'local-transformers': (_config) => new LocalEmbeddingProvider(),
|
||||
'openai-compatible': (config) =>
|
||||
new OpenAIEmbeddingProvider({
|
||||
baseUrl: config.baseUrl as string,
|
||||
apiKey: config.apiKey as string,
|
||||
model: config.model as string,
|
||||
dimensions: config.dimensions as number | undefined,
|
||||
maxBatchSize: config.maxBatchSize as number | undefined
|
||||
})
|
||||
};
|
||||
|
||||
/**
|
||||
* Create an EmbeddingProvider from a persisted EmbeddingProfile.
|
||||
*
|
||||
* Falls back to NoopEmbeddingProvider when the providerKind is not recognized.
|
||||
*/
|
||||
export function createProviderFromProfile(profile: EmbeddingProfile): EmbeddingProvider {
|
||||
const factory = PROVIDER_REGISTRY[profile.providerKind];
|
||||
if (!factory) return new NoopEmbeddingProvider();
|
||||
const config = (profile.config as Record<string, unknown>) ?? {};
|
||||
return factory(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return metadata for the default local profile.
|
||||
*
|
||||
* Used by migration seeds and runtime defaults.
|
||||
*/
|
||||
export function getDefaultLocalProfile(): Pick<
|
||||
EmbeddingProfile,
|
||||
'id' | 'providerKind' | 'model' | 'dimensions'
|
||||
> {
|
||||
return {
|
||||
id: 'local-default',
|
||||
providerKind: 'local-transformers',
|
||||
model: 'Xenova/all-MiniLM-L6-v2',
|
||||
dimensions: 384
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all registered providerKind values.
|
||||
*
|
||||
* Useful for settings UI validation and provider discovery.
|
||||
*/
|
||||
export function getRegisteredProviderKinds(): string[] {
|
||||
return Object.keys(PROVIDER_REGISTRY);
|
||||
}
|
||||
@@ -25,16 +25,18 @@ function createTestDb(): Database.Database {
|
||||
client.pragma('foreign_keys = ON');
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
const migrationSql = readFileSync(
|
||||
join(migrationsFolder, '0000_large_master_chief.sql'),
|
||||
'utf-8'
|
||||
);
|
||||
const statements = migrationSql
|
||||
.split('--> statement-breakpoint')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
for (const stmt of statements) {
|
||||
client.exec(stmt);
|
||||
|
||||
// Run all migrations in order
|
||||
const migrations = ['0000_large_master_chief.sql', '0001_quick_nighthawk.sql', '0002_silky_stellaris.sql'];
|
||||
for (const migrationFile of migrations) {
|
||||
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
|
||||
const statements = migrationSql
|
||||
.split('--> statement-breakpoint')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
for (const stmt of statements) {
|
||||
client.exec(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
const ftsSql = readFileSync(join(import.meta.dirname, '../db/fts.sql'), 'utf-8');
|
||||
@@ -104,16 +106,17 @@ function seedEmbedding(
|
||||
client: Database.Database,
|
||||
snippetId: string,
|
||||
values: number[],
|
||||
profileId = 'local-default',
|
||||
model = 'test-model'
|
||||
): void {
|
||||
const f32 = new Float32Array(values);
|
||||
client
|
||||
.prepare(
|
||||
`INSERT OR REPLACE INTO snippet_embeddings
|
||||
(snippet_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?)`
|
||||
(snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(snippetId, model, values.length, Buffer.from(f32.buffer), NOW_S);
|
||||
.run(snippetId, profileId, model, values.length, Buffer.from(f32.buffer), NOW_S);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -621,4 +624,203 @@ describe('HybridSearchService', () => {
|
||||
const results = await svc.search('default alpha hybrid', { repositoryId: repoId });
|
||||
expect(Array.isArray(results)).toBe(true);
|
||||
});
|
||||
|
||||
it('filters by versionId — excludes snippets from other versions', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
// Create two versions
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO repository_versions (id, repository_id, tag, state, total_snippets, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run('/test/repo/v1.0', repoId, 'v1.0', 'indexed', 0, NOW_S);
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO repository_versions (id, repository_id, tag, state, total_snippets, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run('/test/repo/v2.0', repoId, 'v2.0', 'indexed', 0, NOW_S);
|
||||
|
||||
// Create embedding profile
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run('test-profile', 'local-transformers', 'Test', 1, 1, 'test-model', 4, '{}', NOW_S, NOW_S);
|
||||
|
||||
// Snippet A in version 1.0
|
||||
const snippetA = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'version 1 text'
|
||||
});
|
||||
client
|
||||
.prepare('UPDATE snippets SET version_id = ? WHERE id = ?')
|
||||
.run('/test/repo/v1.0', snippetA);
|
||||
|
||||
// Seed embedding for snippetA
|
||||
const embedA = [0.1, 0.2, 0.3, 0.4];
|
||||
const f32A = new Float32Array(embedA);
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(snippetA, 'test-profile', 'test-model', 4, Buffer.from(f32A.buffer), NOW_S);
|
||||
|
||||
// Snippet B in version 2.0
|
||||
const snippetB = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'version 2 text'
|
||||
});
|
||||
client
|
||||
.prepare('UPDATE snippets SET version_id = ? WHERE id = ?')
|
||||
.run('/test/repo/v2.0', snippetB);
|
||||
|
||||
// Seed embedding for snippetB
|
||||
const embedB = [0.2, 0.3, 0.4, 0.5];
|
||||
const f32B = new Float32Array(embedB);
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(snippetB, 'test-profile', 'test-model', 4, Buffer.from(f32B.buffer), NOW_S);
|
||||
|
||||
const vs = new VectorSearch(client);
|
||||
const query = new Float32Array([0.1, 0.2, 0.3, 0.4]);
|
||||
|
||||
// Query with versionId v1.0 should only return snippetA
|
||||
const resultsV1 = vs.vectorSearch(query, {
|
||||
repositoryId: repoId,
|
||||
versionId: '/test/repo/v1.0',
|
||||
profileId: 'test-profile'
|
||||
});
|
||||
expect(resultsV1.map((r) => r.snippetId)).toContain(snippetA);
|
||||
expect(resultsV1.map((r) => r.snippetId)).not.toContain(snippetB);
|
||||
|
||||
// Query with versionId v2.0 should only return snippetB
|
||||
const resultsV2 = vs.vectorSearch(query, {
|
||||
repositoryId: repoId,
|
||||
versionId: '/test/repo/v2.0',
|
||||
profileId: 'test-profile'
|
||||
});
|
||||
expect(resultsV2.map((r) => r.snippetId)).not.toContain(snippetA);
|
||||
expect(resultsV2.map((r) => r.snippetId)).toContain(snippetB);
|
||||
|
||||
// Query without versionId should return both
|
||||
const resultsAll = vs.vectorSearch(query, {
|
||||
repositoryId: repoId,
|
||||
profileId: 'test-profile'
|
||||
});
|
||||
expect(resultsAll.map((r) => r.snippetId)).toContain(snippetA);
|
||||
expect(resultsAll.map((r) => r.snippetId)).toContain(snippetB);
|
||||
});
|
||||
|
||||
it('searchMode=keyword never calls provider.embed()', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
const snippetId = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'keyword only test'
|
||||
});
|
||||
|
||||
client.exec(
|
||||
`INSERT INTO snippets_fts (id, repository_id, version_id, title, breadcrumb, content)
|
||||
VALUES ('${snippetId}', '${repoId}', NULL, NULL, NULL, 'keyword only test')`
|
||||
);
|
||||
|
||||
let embedCalled = false;
|
||||
const mockProvider: EmbeddingProvider = {
|
||||
name: 'mock',
|
||||
dimensions: 4,
|
||||
model: 'test-model',
|
||||
async embed() {
|
||||
embedCalled = true;
|
||||
return [];
|
||||
},
|
||||
async isAvailable() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
const results = await hybridService.search('keyword', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'keyword'
|
||||
});
|
||||
|
||||
expect(embedCalled).toBe(false);
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('searchMode=semantic uses only vector search', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
// Create profile
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run('test-profile', 'local-transformers', 'Test', 1, 1, 'test-model', 4, '{}', NOW_S, NOW_S);
|
||||
|
||||
const snippetId = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'semantic test'
|
||||
});
|
||||
|
||||
// Seed embedding
|
||||
const embed = [0.5, 0.5, 0.5, 0.5];
|
||||
const f32 = new Float32Array(embed);
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(snippetId, 'test-profile', 'test-model', 4, Buffer.from(f32.buffer), NOW_S);
|
||||
|
||||
const mockProvider: EmbeddingProvider = {
|
||||
name: 'mock',
|
||||
dimensions: 4,
|
||||
model: 'test-model',
|
||||
async embed() {
|
||||
return [
|
||||
{
|
||||
values: new Float32Array([0.5, 0.5, 0.5, 0.5]),
|
||||
dimensions: 4,
|
||||
model: 'test-model'
|
||||
}
|
||||
];
|
||||
},
|
||||
async isAvailable() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
const results = await hybridService.search('semantic', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'semantic',
|
||||
profileId: 'test-profile'
|
||||
});
|
||||
|
||||
// Should return results (alpha=1 pure vector mode)
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -36,6 +36,16 @@ export interface HybridSearchOptions {
|
||||
* Default: 0.5.
|
||||
*/
|
||||
alpha?: number;
|
||||
/**
|
||||
* Search mode: 'auto' (default), 'keyword', 'semantic', or 'hybrid'.
|
||||
* Overrides alpha when set to 'keyword' (forces 0) or 'semantic' (forces 1).
|
||||
*/
|
||||
searchMode?: 'auto' | 'keyword' | 'semantic' | 'hybrid';
|
||||
/**
|
||||
* Embedding profile ID for vector search.
|
||||
* Default: 'local-default'.
|
||||
*/
|
||||
profileId?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -90,7 +100,24 @@ export class HybridSearchService {
|
||||
options: HybridSearchOptions
|
||||
): Promise<SnippetSearchResult[]> {
|
||||
const limit = options.limit ?? 20;
|
||||
const alpha = options.alpha ?? 0.5;
|
||||
const mode = options.searchMode ?? 'auto';
|
||||
|
||||
// Resolve alpha from searchMode
|
||||
let alpha: number;
|
||||
switch (mode) {
|
||||
case 'keyword':
|
||||
alpha = 0;
|
||||
break;
|
||||
case 'semantic':
|
||||
alpha = 1;
|
||||
break;
|
||||
case 'hybrid':
|
||||
alpha = options.alpha ?? 0.5;
|
||||
break;
|
||||
default:
|
||||
// 'auto'
|
||||
alpha = options.alpha ?? 0.5;
|
||||
}
|
||||
|
||||
// Always run FTS5 — it is synchronous and fast.
|
||||
const ftsResults = this.searchService.searchSnippets(query, {
|
||||
@@ -115,11 +142,12 @@ export class HybridSearchService {
|
||||
|
||||
const queryEmbedding = embeddings[0].values;
|
||||
|
||||
const vectorResults = this.vectorSearch.vectorSearch(
|
||||
queryEmbedding,
|
||||
options.repositoryId,
|
||||
limit * 3
|
||||
);
|
||||
const vectorResults = this.vectorSearch.vectorSearch(queryEmbedding, {
|
||||
repositoryId: options.repositoryId,
|
||||
versionId: options.versionId,
|
||||
profileId: options.profileId,
|
||||
limit: limit * 3
|
||||
});
|
||||
|
||||
// Pure vector mode: skip RRF and return vector results directly.
|
||||
if (alpha === 1) {
|
||||
|
||||
@@ -21,6 +21,13 @@ export interface VectorSearchResult {
|
||||
score: number;
|
||||
}
|
||||
|
||||
export interface VectorSearchOptions {
|
||||
repositoryId: string;
|
||||
versionId?: string;
|
||||
profileId?: string;
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
/** Raw DB row from snippet_embeddings joined with snippets. */
|
||||
interface RawEmbeddingRow {
|
||||
snippet_id: string;
|
||||
@@ -64,32 +71,33 @@ export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class VectorSearch {
|
||||
private readonly stmt: Database.Statement<[string], RawEmbeddingRow>;
|
||||
|
||||
constructor(private readonly db: Database.Database) {
|
||||
// Prepare once — reused for every call.
|
||||
this.stmt = this.db.prepare<[string], RawEmbeddingRow>(`
|
||||
SELECT se.snippet_id, se.embedding
|
||||
FROM snippet_embeddings se
|
||||
JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ?
|
||||
`);
|
||||
}
|
||||
constructor(private readonly db: Database.Database) {}
|
||||
|
||||
/**
|
||||
* Search stored embeddings by cosine similarity to the query embedding.
|
||||
*
|
||||
* @param queryEmbedding - The embedded representation of the search query.
|
||||
* @param repositoryId - Scope the search to a single repository.
|
||||
* @param limit - Maximum number of results to return. Default: 50.
|
||||
* @param options - Search options including repositoryId, optional versionId, profileId, and limit.
|
||||
* @returns Results sorted by descending cosine similarity score.
|
||||
*/
|
||||
vectorSearch(
|
||||
queryEmbedding: Float32Array,
|
||||
repositoryId: string,
|
||||
limit = 50
|
||||
): VectorSearchResult[] {
|
||||
const rows = this.stmt.all(repositoryId);
|
||||
vectorSearch(queryEmbedding: Float32Array, options: VectorSearchOptions): VectorSearchResult[] {
|
||||
const { repositoryId, versionId, profileId = 'local-default', limit = 50 } = options;
|
||||
|
||||
let sql = `
|
||||
SELECT se.snippet_id, se.embedding
|
||||
FROM snippet_embeddings se
|
||||
JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ?
|
||||
AND se.profile_id = ?
|
||||
`;
|
||||
const params: unknown[] = [repositoryId, profileId];
|
||||
|
||||
if (versionId) {
|
||||
sql += ' AND s.version_id = ?';
|
||||
params.push(versionId);
|
||||
}
|
||||
|
||||
const rows = this.db.prepare<unknown[], RawEmbeddingRow>(sql).all(...params);
|
||||
|
||||
const scored: VectorSearchResult[] = rows.map((row) => {
|
||||
const embedding = new Float32Array(
|
||||
|
||||
@@ -42,6 +42,8 @@ export async function fetchContext(params: {
|
||||
query: string;
|
||||
tokens?: number;
|
||||
type?: 'json' | 'txt';
|
||||
searchMode?: string;
|
||||
alpha?: number;
|
||||
}): Promise<ApiResponse> {
|
||||
const url = new URL(`${API_BASE}/api/v1/context`);
|
||||
url.searchParams.set('libraryId', params.libraryId);
|
||||
@@ -50,6 +52,12 @@ export async function fetchContext(params: {
|
||||
if (params.tokens !== undefined) {
|
||||
url.searchParams.set('tokens', String(params.tokens));
|
||||
}
|
||||
if (params.searchMode) {
|
||||
url.searchParams.set('searchMode', params.searchMode);
|
||||
}
|
||||
if (params.alpha !== undefined) {
|
||||
url.searchParams.set('alpha', String(params.alpha));
|
||||
}
|
||||
|
||||
return fetch(url.toString());
|
||||
}
|
||||
|
||||
@@ -15,7 +15,19 @@ export const QueryDocsSchema = z.object({
|
||||
query: z
|
||||
.string()
|
||||
.describe('Specific question about the library to retrieve relevant documentation'),
|
||||
tokens: z.number().optional().describe('Maximum token budget for the response (default: 10000)')
|
||||
tokens: z.number().optional().describe('Maximum token budget for the response (default: 10000)'),
|
||||
searchMode: z
|
||||
.enum(['auto', 'keyword', 'semantic', 'hybrid'])
|
||||
.optional()
|
||||
.describe(
|
||||
"Retrieval mode: 'auto' (default), 'keyword' (FTS only), 'semantic' (vector only), or 'hybrid'"
|
||||
),
|
||||
alpha: z
|
||||
.number()
|
||||
.min(0)
|
||||
.max(1)
|
||||
.optional()
|
||||
.describe('Hybrid blend weight: 0.0 = keyword only, 1.0 = semantic only (default: 0.5)')
|
||||
});
|
||||
|
||||
export type QueryDocsInput = z.infer<typeof QueryDocsSchema>;
|
||||
@@ -42,6 +54,17 @@ export const QUERY_DOCS_TOOL = {
|
||||
tokens: {
|
||||
type: 'number',
|
||||
description: 'Max token budget (default: 10000)'
|
||||
},
|
||||
searchMode: {
|
||||
type: 'string',
|
||||
enum: ['auto', 'keyword', 'semantic', 'hybrid'],
|
||||
description: "Retrieval mode: 'auto' (default), 'keyword', 'semantic', or 'hybrid'"
|
||||
},
|
||||
alpha: {
|
||||
type: 'number',
|
||||
minimum: 0,
|
||||
maximum: 1,
|
||||
description: 'Hybrid blend weight (0=keyword, 1=semantic, default: 0.5)'
|
||||
}
|
||||
},
|
||||
required: ['libraryId', 'query']
|
||||
@@ -49,9 +72,9 @@ export const QUERY_DOCS_TOOL = {
|
||||
};
|
||||
|
||||
export async function handleQueryDocs(args: unknown) {
|
||||
const { libraryId, query, tokens } = QueryDocsSchema.parse(args);
|
||||
const { libraryId, query, tokens, searchMode, alpha } = QueryDocsSchema.parse(args);
|
||||
|
||||
const response = await fetchContext({ libraryId, query, tokens, type: 'txt' });
|
||||
const response = await fetchContext({ libraryId, query, tokens, type: 'txt', searchMode, alpha });
|
||||
|
||||
if (!response.ok) {
|
||||
const status = response.status;
|
||||
|
||||
@@ -16,6 +16,8 @@ import { getClient } from '$lib/server/db/client';
|
||||
import { dtoJsonResponse } from '$lib/server/api/dto-response';
|
||||
import { SearchService } from '$lib/server/search/search.service';
|
||||
import { HybridSearchService } from '$lib/server/search/hybrid.search.service';
|
||||
import { createProviderFromProfile } from '$lib/server/embeddings/registry';
|
||||
import type { EmbeddingProfile } from '$lib/server/db/schema';
|
||||
import { parseLibraryId } from '$lib/server/api/library-id';
|
||||
import { selectSnippetsWithinBudget, DEFAULT_TOKEN_BUDGET } from '$lib/server/api/token-budget';
|
||||
import {
|
||||
@@ -28,12 +30,20 @@ import {
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function getServices() {
|
||||
const db = getClient();
|
||||
function getServices(db: ReturnType<typeof getClient>) {
|
||||
const searchService = new SearchService(db);
|
||||
// No embedding provider — pure FTS5 mode (alpha=0 equivalent).
|
||||
const hybridService = new HybridSearchService(db, searchService, null);
|
||||
return { db, searchService, hybridService };
|
||||
|
||||
// Load the active embedding profile from the database
|
||||
const profileRow = db
|
||||
.prepare<[], EmbeddingProfile>(
|
||||
'SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1'
|
||||
)
|
||||
.get();
|
||||
|
||||
const provider = profileRow ? createProviderFromProfile(profileRow) : null;
|
||||
const hybridService = new HybridSearchService(db, searchService, provider);
|
||||
|
||||
return { db, searchService, hybridService, profileId: profileRow?.id };
|
||||
}
|
||||
|
||||
interface RawRepoConfig {
|
||||
@@ -93,6 +103,14 @@ export const GET: RequestHandler = async ({ url }) => {
|
||||
const tokensRaw = parseInt(url.searchParams.get('tokens') ?? String(DEFAULT_TOKEN_BUDGET), 10);
|
||||
const maxTokens = isNaN(tokensRaw) || tokensRaw < 1 ? DEFAULT_TOKEN_BUDGET : tokensRaw;
|
||||
|
||||
// Parse searchMode and alpha
|
||||
const rawMode = url.searchParams.get('searchMode') ?? 'auto';
|
||||
const searchMode = ['auto', 'keyword', 'semantic', 'hybrid'].includes(rawMode)
|
||||
? (rawMode as 'auto' | 'keyword' | 'semantic' | 'hybrid')
|
||||
: 'auto';
|
||||
const alphaRaw = parseFloat(url.searchParams.get('alpha') ?? '0.5');
|
||||
const alpha = isNaN(alphaRaw) ? 0.5 : Math.max(0, Math.min(1, alphaRaw));
|
||||
|
||||
// Parse the libraryId
|
||||
let parsed: ReturnType<typeof parseLibraryId>;
|
||||
try {
|
||||
@@ -108,7 +126,8 @@ export const GET: RequestHandler = async ({ url }) => {
|
||||
}
|
||||
|
||||
try {
|
||||
const { db, hybridService } = getServices();
|
||||
const db = getClient();
|
||||
const { hybridService, profileId } = getServices(db);
|
||||
|
||||
// Verify the repository exists and check its state.
|
||||
const repo = db
|
||||
@@ -158,7 +177,10 @@ export const GET: RequestHandler = async ({ url }) => {
|
||||
const searchResults = await hybridService.search(query, {
|
||||
repositoryId: parsed.repositoryId,
|
||||
versionId,
|
||||
limit: 50 // fetch more than needed; token budget will trim
|
||||
limit: 50, // fetch more than needed; token budget will trim
|
||||
searchMode,
|
||||
alpha,
|
||||
profileId
|
||||
});
|
||||
|
||||
// Apply token budget.
|
||||
|
||||
@@ -1,147 +1,149 @@
|
||||
/**
|
||||
* GET /api/v1/settings/embedding — retrieve current embedding configuration
|
||||
* PUT /api/v1/settings/embedding — update embedding configuration
|
||||
* GET /api/v1/settings/embedding — retrieve all embedding profiles
|
||||
* POST /api/v1/settings/embedding — create or update an embedding profile
|
||||
* PUT /api/v1/settings/embedding — alias for POST (backward compat)
|
||||
*/
|
||||
|
||||
import { json } from '@sveltejs/kit';
|
||||
import type { RequestHandler } from './$types';
|
||||
import { getClient } from '$lib/server/db/client';
|
||||
import {
|
||||
EMBEDDING_CONFIG_KEY,
|
||||
createProviderFromConfig,
|
||||
defaultEmbeddingConfig,
|
||||
type EmbeddingConfig
|
||||
} from '$lib/server/embeddings/factory';
|
||||
import { createProviderFromProfile } from '$lib/server/embeddings/registry';
|
||||
import type { EmbeddingProfile, NewEmbeddingProfile } from '$lib/server/db/schema';
|
||||
import { handleServiceError, InvalidInputError } from '$lib/server/utils/validation';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// GET — Return all profiles
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function readConfig(db: ReturnType<typeof getClient>): EmbeddingConfig {
|
||||
const row = db
|
||||
.prepare(`SELECT value FROM settings WHERE key = ?`)
|
||||
.get(EMBEDDING_CONFIG_KEY) as { value: string } | undefined;
|
||||
|
||||
if (!row) return defaultEmbeddingConfig();
|
||||
|
||||
export const GET: RequestHandler = () => {
|
||||
try {
|
||||
return JSON.parse(row.value) as EmbeddingConfig;
|
||||
} catch {
|
||||
return defaultEmbeddingConfig();
|
||||
}
|
||||
}
|
||||
const db = getClient();
|
||||
const profiles = db
|
||||
.prepare('SELECT * FROM embedding_profiles ORDER BY is_default DESC, created_at ASC')
|
||||
.all() as EmbeddingProfile[];
|
||||
|
||||
function validateConfig(body: unknown): EmbeddingConfig {
|
||||
// Sanitize: remove sensitive config fields like apiKey
|
||||
const safeProfiles = profiles.map(sanitizeProfile);
|
||||
return json({ profiles: safeProfiles });
|
||||
} catch (err) {
|
||||
return handleServiceError(err);
|
||||
}
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// POST/PUT — Create or update a profile
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function upsertProfile(body: unknown) {
|
||||
if (typeof body !== 'object' || body === null) {
|
||||
throw new InvalidInputError('Request body must be a JSON object');
|
||||
}
|
||||
|
||||
const obj = body as Record<string, unknown>;
|
||||
|
||||
const provider = obj.provider;
|
||||
if (provider !== 'openai' && provider !== 'local' && provider !== 'none') {
|
||||
// Required fields
|
||||
if (typeof obj.id !== 'string' || !obj.id) {
|
||||
throw new InvalidInputError('id is required');
|
||||
}
|
||||
if (typeof obj.providerKind !== 'string' || !obj.providerKind) {
|
||||
throw new InvalidInputError('providerKind is required');
|
||||
}
|
||||
if (typeof obj.title !== 'string' || !obj.title) {
|
||||
throw new InvalidInputError('title is required');
|
||||
}
|
||||
if (typeof obj.model !== 'string' || !obj.model) {
|
||||
throw new InvalidInputError('model is required');
|
||||
}
|
||||
if (typeof obj.dimensions !== 'number') {
|
||||
throw new InvalidInputError('dimensions must be a number');
|
||||
}
|
||||
|
||||
const profile: NewEmbeddingProfile = {
|
||||
id: obj.id,
|
||||
providerKind: obj.providerKind,
|
||||
title: obj.title,
|
||||
enabled: typeof obj.enabled === 'boolean' ? obj.enabled : true,
|
||||
isDefault: typeof obj.isDefault === 'boolean' ? obj.isDefault : false,
|
||||
model: obj.model,
|
||||
dimensions: obj.dimensions,
|
||||
config: (obj.config as Record<string, unknown>) ?? {},
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now()
|
||||
};
|
||||
|
||||
// Validate provider availability before persisting
|
||||
const provider = createProviderFromProfile(profile as EmbeddingProfile);
|
||||
const available = await provider.isAvailable();
|
||||
if (!available) {
|
||||
throw new InvalidInputError(
|
||||
`Invalid provider "${String(provider)}". Must be one of: openai, local, none.`
|
||||
`Could not connect to the "${profile.providerKind}" provider. Check your configuration.`
|
||||
);
|
||||
}
|
||||
|
||||
if (provider === 'openai') {
|
||||
const openai = obj.openai as Record<string, unknown> | undefined;
|
||||
if (!openai || typeof openai !== 'object') {
|
||||
throw new InvalidInputError('openai config object is required when provider is "openai"');
|
||||
}
|
||||
if (typeof openai.baseUrl !== 'string' || !openai.baseUrl) {
|
||||
throw new InvalidInputError('openai.baseUrl must be a non-empty string');
|
||||
}
|
||||
if (typeof openai.apiKey !== 'string' || !openai.apiKey) {
|
||||
throw new InvalidInputError('openai.apiKey must be a non-empty string');
|
||||
}
|
||||
if (typeof openai.model !== 'string' || !openai.model) {
|
||||
throw new InvalidInputError('openai.model must be a non-empty string');
|
||||
}
|
||||
const db = getClient();
|
||||
|
||||
const config: EmbeddingConfig = {
|
||||
provider: 'openai',
|
||||
openai: {
|
||||
baseUrl: openai.baseUrl as string,
|
||||
apiKey: openai.apiKey as string,
|
||||
model: openai.model as string,
|
||||
dimensions:
|
||||
typeof openai.dimensions === 'number' ? (openai.dimensions as number) : undefined,
|
||||
maxBatchSize:
|
||||
typeof openai.maxBatchSize === 'number'
|
||||
? (openai.maxBatchSize as number)
|
||||
: undefined
|
||||
}
|
||||
};
|
||||
return config;
|
||||
// If setting as default, clear other defaults first
|
||||
if (profile.isDefault) {
|
||||
db.prepare('UPDATE embedding_profiles SET is_default = 0').run();
|
||||
}
|
||||
|
||||
return { provider: provider as 'local' | 'none' };
|
||||
// Upsert the profile
|
||||
db.prepare(
|
||||
`INSERT INTO embedding_profiles
|
||||
(id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(id) DO UPDATE SET
|
||||
provider_kind = excluded.provider_kind,
|
||||
title = excluded.title,
|
||||
enabled = excluded.enabled,
|
||||
is_default = excluded.is_default,
|
||||
model = excluded.model,
|
||||
dimensions = excluded.dimensions,
|
||||
config = excluded.config,
|
||||
updated_at = excluded.updated_at`
|
||||
).run(
|
||||
profile.id,
|
||||
profile.providerKind,
|
||||
profile.title,
|
||||
profile.enabled ? 1 : 0,
|
||||
profile.isDefault ? 1 : 0,
|
||||
profile.model,
|
||||
profile.dimensions,
|
||||
JSON.stringify(profile.config),
|
||||
profile.createdAt,
|
||||
profile.updatedAt
|
||||
);
|
||||
|
||||
const inserted = db
|
||||
.prepare('SELECT * FROM embedding_profiles WHERE id = ?')
|
||||
.get(profile.id) as EmbeddingProfile;
|
||||
|
||||
return sanitizeProfile(inserted);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// GET
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const GET: RequestHandler = () => {
|
||||
try {
|
||||
const db = getClient();
|
||||
const config = readConfig(db);
|
||||
|
||||
// Strip the apiKey from the response for security.
|
||||
const safeConfig = sanitizeForResponse(config);
|
||||
return json(safeConfig);
|
||||
} catch (err) {
|
||||
return handleServiceError(err);
|
||||
}
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// PUT
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const PUT: RequestHandler = async ({ request }) => {
|
||||
export const POST: RequestHandler = async ({ request }) => {
|
||||
try {
|
||||
const body = await request.json();
|
||||
const config = validateConfig(body);
|
||||
|
||||
// Verify provider connectivity before persisting (skip for noop).
|
||||
if (config.provider !== 'none') {
|
||||
const provider = createProviderFromConfig(config);
|
||||
const available = await provider.isAvailable();
|
||||
if (!available) {
|
||||
throw new InvalidInputError(
|
||||
`Could not connect to the "${config.provider}" embedding provider. Check your configuration.`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const db = getClient();
|
||||
db.prepare(
|
||||
`INSERT INTO settings (key, value, updated_at)
|
||||
VALUES (?, ?, unixepoch())
|
||||
ON CONFLICT (key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at`
|
||||
).run(EMBEDDING_CONFIG_KEY, JSON.stringify(config));
|
||||
|
||||
const safeConfig = sanitizeForResponse(config);
|
||||
return json(safeConfig);
|
||||
const profile = await upsertProfile(body);
|
||||
return json(profile);
|
||||
} catch (err) {
|
||||
return handleServiceError(err);
|
||||
}
|
||||
};
|
||||
|
||||
// Backward compat alias
|
||||
export const PUT: RequestHandler = POST;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Sanitize — remove sensitive fields before returning to clients
|
||||
// Sanitize — remove sensitive config fields before returning to clients
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function sanitizeForResponse(config: EmbeddingConfig): Omit<EmbeddingConfig, 'openai'> & {
|
||||
openai?: Omit<NonNullable<EmbeddingConfig['openai']>, 'apiKey'>;
|
||||
} {
|
||||
if (config.provider === 'openai' && config.openai) {
|
||||
const { apiKey: _apiKey, ...rest } = config.openai;
|
||||
return { ...config, openai: rest };
|
||||
function sanitizeProfile(profile: EmbeddingProfile): EmbeddingProfile {
|
||||
const config = profile.config as Record<string, unknown>;
|
||||
if (config && config.apiKey) {
|
||||
const { apiKey: _apiKey, ...rest } = config;
|
||||
return { ...profile, config: rest };
|
||||
}
|
||||
return config;
|
||||
return profile;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,82 +1,47 @@
|
||||
/**
|
||||
* POST /api/v1/settings/embedding/test
|
||||
* GET /api/v1/settings/embedding/test
|
||||
*
|
||||
* Validates an embedding provider configuration by creating a provider
|
||||
* instance and calling embed(['test']). Returns success with dimensions
|
||||
* or a descriptive error without persisting any changes.
|
||||
* Tests the active default embedding profile by creating a provider instance
|
||||
* and checking availability. Returns success with profile metadata or error.
|
||||
*/
|
||||
|
||||
import { json } from '@sveltejs/kit';
|
||||
import type { RequestHandler } from './$types';
|
||||
import {
|
||||
createProviderFromConfig,
|
||||
type EmbeddingConfig
|
||||
} from '$lib/server/embeddings/factory';
|
||||
import { handleServiceError, InvalidInputError } from '$lib/server/utils/validation';
|
||||
import { getClient } from '$lib/server/db/client';
|
||||
import { createProviderFromProfile } from '$lib/server/embeddings/registry';
|
||||
import type { EmbeddingProfile } from '$lib/server/db/schema';
|
||||
import { handleServiceError } from '$lib/server/utils/validation';
|
||||
|
||||
export const GET: RequestHandler = async () => {
|
||||
try {
|
||||
const provider = createProviderFromConfig({ provider: 'local' });
|
||||
const db = getClient();
|
||||
const profile = db
|
||||
.prepare<[], EmbeddingProfile>(
|
||||
'SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1'
|
||||
)
|
||||
.get();
|
||||
|
||||
if (!profile) {
|
||||
return json({ available: false, error: 'No active embedding profile configured' });
|
||||
}
|
||||
|
||||
const provider = createProviderFromProfile(profile);
|
||||
const available = await provider.isAvailable();
|
||||
return json({ available });
|
||||
|
||||
return json({
|
||||
available,
|
||||
profile: {
|
||||
id: profile.id,
|
||||
providerKind: profile.providerKind,
|
||||
model: profile.model,
|
||||
dimensions: profile.dimensions
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
return handleServiceError(err);
|
||||
}
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Validate — reuse the same shape accepted by PUT /settings/embedding
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function validateConfig(body: unknown): EmbeddingConfig {
|
||||
if (typeof body !== 'object' || body === null) {
|
||||
throw new InvalidInputError('Request body must be a JSON object');
|
||||
}
|
||||
|
||||
const obj = body as Record<string, unknown>;
|
||||
|
||||
const provider = obj.provider;
|
||||
if (provider !== 'openai' && provider !== 'local' && provider !== 'none') {
|
||||
throw new InvalidInputError(
|
||||
`Invalid provider "${String(provider)}". Must be one of: openai, local, none.`
|
||||
);
|
||||
}
|
||||
|
||||
if (provider === 'openai') {
|
||||
const openai = obj.openai as Record<string, unknown> | undefined;
|
||||
if (!openai || typeof openai !== 'object') {
|
||||
throw new InvalidInputError('openai config object is required when provider is "openai"');
|
||||
}
|
||||
if (typeof openai.baseUrl !== 'string' || !openai.baseUrl) {
|
||||
throw new InvalidInputError('openai.baseUrl must be a non-empty string');
|
||||
}
|
||||
if (typeof openai.apiKey !== 'string' || !openai.apiKey) {
|
||||
throw new InvalidInputError('openai.apiKey must be a non-empty string');
|
||||
}
|
||||
if (typeof openai.model !== 'string' || !openai.model) {
|
||||
throw new InvalidInputError('openai.model must be a non-empty string');
|
||||
}
|
||||
|
||||
return {
|
||||
provider: 'openai',
|
||||
openai: {
|
||||
baseUrl: openai.baseUrl as string,
|
||||
apiKey: openai.apiKey as string,
|
||||
model: openai.model as string,
|
||||
dimensions:
|
||||
typeof openai.dimensions === 'number' ? (openai.dimensions as number) : undefined,
|
||||
maxBatchSize:
|
||||
typeof openai.maxBatchSize === 'number' ? (openai.maxBatchSize as number) : undefined
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
return { provider: provider as 'local' | 'none' };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// POST
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const POST: RequestHandler = async ({ request }) => {
|
||||
try {
|
||||
|
||||
Reference in New Issue
Block a user