feat(TRUEREF-0023): add sqlite-vec search pipeline
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
*/
|
||||
import Database from 'better-sqlite3';
|
||||
import { env } from '$env/dynamic/private';
|
||||
import { loadSqliteVec } from './sqlite-vec';
|
||||
|
||||
let _client: Database.Database | null = null;
|
||||
|
||||
@@ -14,6 +15,12 @@ export function getClient(): Database.Database {
|
||||
_client.pragma('journal_mode = WAL');
|
||||
_client.pragma('foreign_keys = ON');
|
||||
_client.pragma('busy_timeout = 5000');
|
||||
_client.pragma('synchronous = NORMAL');
|
||||
_client.pragma('cache_size = -65536');
|
||||
_client.pragma('temp_store = MEMORY');
|
||||
_client.pragma('mmap_size = 268435456');
|
||||
_client.pragma('wal_autocheckpoint = 1000');
|
||||
loadSqliteVec(_client);
|
||||
}
|
||||
return _client;
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import { readFileSync } from 'node:fs';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { join, dirname } from 'node:path';
|
||||
import * as schema from './schema';
|
||||
import { loadSqliteVec } from './sqlite-vec';
|
||||
import { env } from '$env/dynamic/private';
|
||||
|
||||
if (!env.DATABASE_URL) throw new Error('DATABASE_URL is not set');
|
||||
@@ -19,6 +20,12 @@ client.pragma('foreign_keys = ON');
|
||||
// Prevents SQLITE_BUSY errors when the indexing pipeline holds the write lock
|
||||
// and an HTTP request arrives simultaneously.
|
||||
client.pragma('busy_timeout = 5000');
|
||||
client.pragma('synchronous = NORMAL');
|
||||
client.pragma('cache_size = -65536');
|
||||
client.pragma('temp_store = MEMORY');
|
||||
client.pragma('mmap_size = 268435456');
|
||||
client.pragma('wal_autocheckpoint = 1000');
|
||||
loadSqliteVec(client);
|
||||
|
||||
export const db = drizzle(client, { schema });
|
||||
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
CREATE INDEX `idx_embeddings_profile` ON `snippet_embeddings` (`profile_id`,`snippet_id`);--> statement-breakpoint
|
||||
CREATE INDEX `idx_documents_repo_version` ON `documents` (`repository_id`,`version_id`);--> statement-breakpoint
|
||||
CREATE INDEX `idx_jobs_repo_status` ON `indexing_jobs` (`repository_id`,`status`);--> statement-breakpoint
|
||||
CREATE INDEX `idx_repositories_state` ON `repositories` (`state`);--> statement-breakpoint
|
||||
CREATE INDEX `idx_snippets_repo_version` ON `snippets` (`repository_id`,`version_id`);--> statement-breakpoint
|
||||
CREATE INDEX `idx_snippets_repo_type` ON `snippets` (`repository_id`,`type`);
|
||||
948
src/lib/server/db/migrations/meta/0006_snapshot.json
Normal file
948
src/lib/server/db/migrations/meta/0006_snapshot.json
Normal file
@@ -0,0 +1,948 @@
|
||||
{
|
||||
"version": "6",
|
||||
"dialect": "sqlite",
|
||||
"id": "b8998bda-f89b-41bc-b923-3f676d153c79",
|
||||
"prevId": "c326dcbe-1771-4a90-a566-0ebd1eca47ec",
|
||||
"tables": {
|
||||
"documents": {
|
||||
"name": "documents",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"version_id": {
|
||||
"name": "version_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"file_path": {
|
||||
"name": "file_path",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"language": {
|
||||
"name": "language",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"token_count": {
|
||||
"name": "token_count",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"checksum": {
|
||||
"name": "checksum",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"indexed_at": {
|
||||
"name": "indexed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_documents_repo_version": {
|
||||
"name": "idx_documents_repo_version",
|
||||
"columns": [
|
||||
"repository_id",
|
||||
"version_id"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"documents_repository_id_repositories_id_fk": {
|
||||
"name": "documents_repository_id_repositories_id_fk",
|
||||
"tableFrom": "documents",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"documents_version_id_repository_versions_id_fk": {
|
||||
"name": "documents_version_id_repository_versions_id_fk",
|
||||
"tableFrom": "documents",
|
||||
"tableTo": "repository_versions",
|
||||
"columnsFrom": [
|
||||
"version_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"embedding_profiles": {
|
||||
"name": "embedding_profiles",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"provider_kind": {
|
||||
"name": "provider_kind",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"enabled": {
|
||||
"name": "enabled",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": true
|
||||
},
|
||||
"is_default": {
|
||||
"name": "is_default",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": false
|
||||
},
|
||||
"model": {
|
||||
"name": "model",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"dimensions": {
|
||||
"name": "dimensions",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"config": {
|
||||
"name": "config",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"indexing_jobs": {
|
||||
"name": "indexing_jobs",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"version_id": {
|
||||
"name": "version_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"status": {
|
||||
"name": "status",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'queued'"
|
||||
},
|
||||
"progress": {
|
||||
"name": "progress",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"total_files": {
|
||||
"name": "total_files",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"processed_files": {
|
||||
"name": "processed_files",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"stage": {
|
||||
"name": "stage",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'queued'"
|
||||
},
|
||||
"stage_detail": {
|
||||
"name": "stage_detail",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"error": {
|
||||
"name": "error",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"started_at": {
|
||||
"name": "started_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"completed_at": {
|
||||
"name": "completed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_jobs_repo_status": {
|
||||
"name": "idx_jobs_repo_status",
|
||||
"columns": [
|
||||
"repository_id",
|
||||
"status"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"indexing_jobs_repository_id_repositories_id_fk": {
|
||||
"name": "indexing_jobs_repository_id_repositories_id_fk",
|
||||
"tableFrom": "indexing_jobs",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"repositories": {
|
||||
"name": "repositories",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"description": {
|
||||
"name": "description",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"source": {
|
||||
"name": "source",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"source_url": {
|
||||
"name": "source_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"branch": {
|
||||
"name": "branch",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": "'main'"
|
||||
},
|
||||
"state": {
|
||||
"name": "state",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'pending'"
|
||||
},
|
||||
"total_snippets": {
|
||||
"name": "total_snippets",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"total_tokens": {
|
||||
"name": "total_tokens",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"trust_score": {
|
||||
"name": "trust_score",
|
||||
"type": "real",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"benchmark_score": {
|
||||
"name": "benchmark_score",
|
||||
"type": "real",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"stars": {
|
||||
"name": "stars",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"github_token": {
|
||||
"name": "github_token",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"last_indexed_at": {
|
||||
"name": "last_indexed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_repositories_state": {
|
||||
"name": "idx_repositories_state",
|
||||
"columns": [
|
||||
"state"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"repository_configs": {
|
||||
"name": "repository_configs",
|
||||
"columns": {
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"version_id": {
|
||||
"name": "version_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"project_title": {
|
||||
"name": "project_title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"description": {
|
||||
"name": "description",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"folders": {
|
||||
"name": "folders",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"exclude_folders": {
|
||||
"name": "exclude_folders",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"exclude_files": {
|
||||
"name": "exclude_files",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"rules": {
|
||||
"name": "rules",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"previous_versions": {
|
||||
"name": "previous_versions",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"uniq_repo_config_base": {
|
||||
"name": "uniq_repo_config_base",
|
||||
"columns": [
|
||||
"repository_id"
|
||||
],
|
||||
"isUnique": true,
|
||||
"where": "\"repository_configs\".\"version_id\" IS NULL"
|
||||
},
|
||||
"uniq_repo_config_version": {
|
||||
"name": "uniq_repo_config_version",
|
||||
"columns": [
|
||||
"repository_id",
|
||||
"version_id"
|
||||
],
|
||||
"isUnique": true,
|
||||
"where": "\"repository_configs\".\"version_id\" IS NOT NULL"
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"repository_configs_repository_id_repositories_id_fk": {
|
||||
"name": "repository_configs_repository_id_repositories_id_fk",
|
||||
"tableFrom": "repository_configs",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"repository_versions": {
|
||||
"name": "repository_versions",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"tag": {
|
||||
"name": "tag",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"commit_hash": {
|
||||
"name": "commit_hash",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"state": {
|
||||
"name": "state",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'pending'"
|
||||
},
|
||||
"total_snippets": {
|
||||
"name": "total_snippets",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"indexed_at": {
|
||||
"name": "indexed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {
|
||||
"repository_versions_repository_id_repositories_id_fk": {
|
||||
"name": "repository_versions_repository_id_repositories_id_fk",
|
||||
"tableFrom": "repository_versions",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"settings": {
|
||||
"name": "settings",
|
||||
"columns": {
|
||||
"key": {
|
||||
"name": "key",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"value": {
|
||||
"name": "value",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"snippet_embeddings": {
|
||||
"name": "snippet_embeddings",
|
||||
"columns": {
|
||||
"snippet_id": {
|
||||
"name": "snippet_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"profile_id": {
|
||||
"name": "profile_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"model": {
|
||||
"name": "model",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"dimensions": {
|
||||
"name": "dimensions",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"embedding": {
|
||||
"name": "embedding",
|
||||
"type": "blob",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_embeddings_profile": {
|
||||
"name": "idx_embeddings_profile",
|
||||
"columns": [
|
||||
"profile_id",
|
||||
"snippet_id"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"snippet_embeddings_snippet_id_snippets_id_fk": {
|
||||
"name": "snippet_embeddings_snippet_id_snippets_id_fk",
|
||||
"tableFrom": "snippet_embeddings",
|
||||
"tableTo": "snippets",
|
||||
"columnsFrom": [
|
||||
"snippet_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"snippet_embeddings_profile_id_embedding_profiles_id_fk": {
|
||||
"name": "snippet_embeddings_profile_id_embedding_profiles_id_fk",
|
||||
"tableFrom": "snippet_embeddings",
|
||||
"tableTo": "embedding_profiles",
|
||||
"columnsFrom": [
|
||||
"profile_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {
|
||||
"snippet_embeddings_snippet_id_profile_id_pk": {
|
||||
"columns": [
|
||||
"snippet_id",
|
||||
"profile_id"
|
||||
],
|
||||
"name": "snippet_embeddings_snippet_id_profile_id_pk"
|
||||
}
|
||||
},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"snippets": {
|
||||
"name": "snippets",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"document_id": {
|
||||
"name": "document_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"version_id": {
|
||||
"name": "version_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"type": {
|
||||
"name": "type",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"content": {
|
||||
"name": "content",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"language": {
|
||||
"name": "language",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"breadcrumb": {
|
||||
"name": "breadcrumb",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"token_count": {
|
||||
"name": "token_count",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_snippets_repo_version": {
|
||||
"name": "idx_snippets_repo_version",
|
||||
"columns": [
|
||||
"repository_id",
|
||||
"version_id"
|
||||
],
|
||||
"isUnique": false
|
||||
},
|
||||
"idx_snippets_repo_type": {
|
||||
"name": "idx_snippets_repo_type",
|
||||
"columns": [
|
||||
"repository_id",
|
||||
"type"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"snippets_document_id_documents_id_fk": {
|
||||
"name": "snippets_document_id_documents_id_fk",
|
||||
"tableFrom": "snippets",
|
||||
"tableTo": "documents",
|
||||
"columnsFrom": [
|
||||
"document_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"snippets_repository_id_repositories_id_fk": {
|
||||
"name": "snippets_repository_id_repositories_id_fk",
|
||||
"tableFrom": "snippets",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"snippets_version_id_repository_versions_id_fk": {
|
||||
"name": "snippets_version_id_repository_versions_id_fk",
|
||||
"tableFrom": "snippets",
|
||||
"tableTo": "repository_versions",
|
||||
"columnsFrom": [
|
||||
"version_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
}
|
||||
},
|
||||
"views": {},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"schemas": {},
|
||||
"tables": {},
|
||||
"columns": {}
|
||||
},
|
||||
"internal": {
|
||||
"indexes": {}
|
||||
}
|
||||
}
|
||||
@@ -43,6 +43,13 @@
|
||||
"when": 1774890536284,
|
||||
"tag": "0005_fix_stage_defaults",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 6,
|
||||
"version": "6",
|
||||
"when": 1775038799913,
|
||||
"tag": "0006_yielding_centennial",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { eq } from 'drizzle-orm';
|
||||
import * as schema from './schema';
|
||||
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from './sqlite-vec';
|
||||
import {
|
||||
repositories,
|
||||
repositoryVersions,
|
||||
@@ -24,6 +25,7 @@ import {
|
||||
function createTestDb() {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const db = drizzle(client, { schema });
|
||||
|
||||
@@ -266,10 +268,11 @@ describe('snippets table', () => {
|
||||
|
||||
describe('snippet_embeddings table', () => {
|
||||
let db: ReturnType<typeof createTestDb>['db'];
|
||||
let client: Database.Database;
|
||||
let snippetId: string;
|
||||
|
||||
beforeEach(() => {
|
||||
({ db } = createTestDb());
|
||||
({ db, client } = createTestDb());
|
||||
db.insert(repositories).values(makeRepo()).run();
|
||||
const docId = crypto.randomUUID();
|
||||
db.insert(documents)
|
||||
@@ -344,6 +347,30 @@ describe('snippet_embeddings table', () => {
|
||||
const result = db.select().from(snippetEmbeddings).all();
|
||||
expect(result).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('keeps the relational schema free of vec_embedding and retains the profile index', () => {
|
||||
const columns = client
|
||||
.prepare("PRAGMA table_info('snippet_embeddings')")
|
||||
.all() as Array<{ name: string }>;
|
||||
expect(columns.map((column) => column.name)).not.toContain('vec_embedding');
|
||||
|
||||
const indexes = client
|
||||
.prepare("PRAGMA index_list('snippet_embeddings')")
|
||||
.all() as Array<{ name: string }>;
|
||||
expect(indexes.map((index) => index.name)).toContain('idx_embeddings_profile');
|
||||
});
|
||||
|
||||
it('loads sqlite-vec idempotently and derives deterministic per-profile table names', () => {
|
||||
expect(() => loadSqliteVec(client)).not.toThrow();
|
||||
const tableName = sqliteVecTableName('local-default');
|
||||
const rowidTableName = sqliteVecRowidTableName('local-default');
|
||||
|
||||
expect(tableName).toMatch(/^snippet_embeddings_vec_local_default_[0-9a-f]{8}$/);
|
||||
expect(rowidTableName).toMatch(/^snippet_embeddings_vec_rowids_local_default_[0-9a-f]{8}$/);
|
||||
expect(sqliteVecTableName('local-default')).toBe(tableName);
|
||||
expect(sqliteVecRowidTableName('local-default')).toBe(rowidTableName);
|
||||
expect(sqliteVecTableName('local-default')).not.toBe(sqliteVecTableName('openai/custom'));
|
||||
});
|
||||
});
|
||||
|
||||
describe('indexing_jobs table', () => {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { sql } from 'drizzle-orm';
|
||||
import {
|
||||
blob,
|
||||
index,
|
||||
integer,
|
||||
primaryKey,
|
||||
real,
|
||||
@@ -34,7 +35,7 @@ export const repositories = sqliteTable('repositories', {
|
||||
lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }),
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
|
||||
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull()
|
||||
});
|
||||
}, (t) => [index('idx_repositories_state').on(t.state)]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// repository_versions
|
||||
@@ -72,7 +73,7 @@ export const documents = sqliteTable('documents', {
|
||||
tokenCount: integer('token_count').default(0),
|
||||
checksum: text('checksum').notNull(), // SHA-256 of file content
|
||||
indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull()
|
||||
});
|
||||
}, (t) => [index('idx_documents_repo_version').on(t.repositoryId, t.versionId)]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// snippets
|
||||
@@ -93,7 +94,10 @@ export const snippets = sqliteTable('snippets', {
|
||||
breadcrumb: text('breadcrumb'), // e.g. "Installation > Getting Started"
|
||||
tokenCount: integer('token_count').default(0),
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
|
||||
});
|
||||
}, (t) => [
|
||||
index('idx_snippets_repo_version').on(t.repositoryId, t.versionId),
|
||||
index('idx_snippets_repo_type').on(t.repositoryId, t.type),
|
||||
]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// embedding_profiles
|
||||
@@ -128,7 +132,10 @@ export const snippetEmbeddings = sqliteTable(
|
||||
embedding: blob('embedding').notNull(), // Float32Array as binary blob
|
||||
createdAt: integer('created_at').notNull()
|
||||
},
|
||||
(table) => [primaryKey({ columns: [table.snippetId, table.profileId] })]
|
||||
(table) => [
|
||||
primaryKey({ columns: [table.snippetId, table.profileId] }),
|
||||
index('idx_embeddings_profile').on(table.profileId, table.snippetId),
|
||||
]
|
||||
);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -154,7 +161,7 @@ export const indexingJobs = sqliteTable('indexing_jobs', {
|
||||
startedAt: integer('started_at', { mode: 'timestamp' }),
|
||||
completedAt: integer('completed_at', { mode: 'timestamp' }),
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
|
||||
});
|
||||
}, (t) => [index('idx_jobs_repo_status').on(t.repositoryId, t.status)]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// repository_configs
|
||||
|
||||
49
src/lib/server/db/sqlite-vec.ts
Normal file
49
src/lib/server/db/sqlite-vec.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import type Database from 'better-sqlite3';
|
||||
import * as sqliteVec from 'sqlite-vec';
|
||||
|
||||
const loadedConnections = new WeakSet<Database.Database>();
|
||||
|
||||
function stableHash(value: string): string {
|
||||
let hash = 2166136261;
|
||||
|
||||
for (let index = 0; index < value.length; index += 1) {
|
||||
hash ^= value.charCodeAt(index);
|
||||
hash = Math.imul(hash, 16777619);
|
||||
}
|
||||
|
||||
return (hash >>> 0).toString(16).padStart(8, '0');
|
||||
}
|
||||
|
||||
function sanitizeIdentifierPart(value: string): string {
|
||||
const sanitized = value
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '_')
|
||||
.replace(/^_+|_+$/g, '');
|
||||
|
||||
return sanitized.length > 0 ? sanitized.slice(0, 32) : 'profile';
|
||||
}
|
||||
|
||||
export function sqliteVecTableSuffix(profileId: string): string {
|
||||
return `${sanitizeIdentifierPart(profileId)}_${stableHash(profileId)}`;
|
||||
}
|
||||
|
||||
export function sqliteVecTableName(profileId: string): string {
|
||||
return `snippet_embeddings_vec_${sqliteVecTableSuffix(profileId)}`;
|
||||
}
|
||||
|
||||
export function sqliteVecRowidTableName(profileId: string): string {
|
||||
return `snippet_embeddings_vec_rowids_${sqliteVecTableSuffix(profileId)}`;
|
||||
}
|
||||
|
||||
export function quoteSqliteIdentifier(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
export function loadSqliteVec(db: Database.Database): void {
|
||||
if (loadedConnections.has(db)) {
|
||||
return;
|
||||
}
|
||||
|
||||
sqliteVec.load(db);
|
||||
loadedConnections.add(db);
|
||||
}
|
||||
2
src/lib/server/db/vectors.sql
Normal file
2
src/lib/server/db/vectors.sql
Normal file
@@ -0,0 +1,2 @@
|
||||
-- Relational vec_embedding bootstrap removed in iteration 2.
|
||||
-- Downstream sqlite-vec vec0 tables are created on demand in application code.
|
||||
@@ -12,6 +12,12 @@ import { migrate } from 'drizzle-orm/better-sqlite3/migrator';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import * as schema from '../db/schema.js';
|
||||
import {
|
||||
loadSqliteVec,
|
||||
sqliteVecRowidTableName,
|
||||
sqliteVecTableName
|
||||
} from '../db/sqlite-vec.js';
|
||||
import { SqliteVecStore } from '../search/sqlite-vec.store.js';
|
||||
|
||||
import { NoopEmbeddingProvider, EmbeddingError, type EmbeddingVector } from './provider.js';
|
||||
import { OpenAIEmbeddingProvider } from './openai.provider.js';
|
||||
@@ -31,6 +37,7 @@ import { createProviderFromProfile } from './registry.js';
|
||||
function createTestDb() {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const db = drizzle(client, { schema });
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
@@ -387,10 +394,19 @@ describe('EmbeddingService', () => {
|
||||
embedding: Buffer;
|
||||
profile_id: string;
|
||||
};
|
||||
expect((row as Record<string, unknown>).vec_embedding).toBeUndefined();
|
||||
expect(row.model).toBe('test-model');
|
||||
expect(row.dimensions).toBe(4);
|
||||
expect(row.profile_id).toBe('local-default');
|
||||
expect(row.embedding).toBeInstanceOf(Buffer);
|
||||
|
||||
const queryEmbedding = service.getEmbedding(snippetId, 'local-default');
|
||||
const matches = new SqliteVecStore(client).queryNearestNeighbors(queryEmbedding!, {
|
||||
repositoryId: '/test/embed-repo',
|
||||
profileId: 'local-default',
|
||||
limit: 5
|
||||
});
|
||||
expect(matches[0]?.snippetId).toBe(snippetId);
|
||||
});
|
||||
|
||||
it('stores embeddings as retrievable Float32Array blobs', async () => {
|
||||
@@ -436,6 +452,22 @@ describe('EmbeddingService', () => {
|
||||
.prepare('SELECT profile_id FROM snippet_embeddings WHERE snippet_id = ?')
|
||||
.get(snippetId) as { profile_id: string };
|
||||
expect(row.profile_id).toBe('openai-custom');
|
||||
|
||||
const queryEmbedding = service.getEmbedding(snippetId, 'openai-custom');
|
||||
const store = new SqliteVecStore(client);
|
||||
const customMatches = store.queryNearestNeighbors(queryEmbedding!, {
|
||||
repositoryId: '/test/embed-repo',
|
||||
profileId: 'openai-custom',
|
||||
limit: 5
|
||||
});
|
||||
const defaultMatches = store.queryNearestNeighbors(new Float32Array([1, 0, 0, 0]), {
|
||||
repositoryId: '/test/embed-repo',
|
||||
profileId: 'local-default',
|
||||
limit: 5
|
||||
});
|
||||
|
||||
expect(customMatches[0]?.snippetId).toBe(snippetId);
|
||||
expect(defaultMatches).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('is idempotent — re-embedding replaces the existing row', async () => {
|
||||
@@ -450,6 +482,17 @@ describe('EmbeddingService', () => {
|
||||
.prepare('SELECT COUNT(*) as cnt FROM snippet_embeddings WHERE snippet_id = ?')
|
||||
.get(snippetId) as { cnt: number };
|
||||
expect(rows.cnt).toBe(1);
|
||||
|
||||
const vecTable = sqliteVecTableName('local-default');
|
||||
const rowidTable = sqliteVecRowidTableName('local-default');
|
||||
const vecRows = client.prepare(`SELECT COUNT(*) as cnt FROM "${vecTable}"`).get() as {
|
||||
cnt: number;
|
||||
};
|
||||
const rowidRows = client.prepare(`SELECT COUNT(*) as cnt FROM "${rowidTable}"`).get() as {
|
||||
cnt: number;
|
||||
};
|
||||
expect(vecRows.cnt).toBe(1);
|
||||
expect(rowidRows.cnt).toBe(1);
|
||||
});
|
||||
|
||||
it('calls onProgress after each batch', async () => {
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
import type Database from 'better-sqlite3';
|
||||
import type { EmbeddingProvider } from './provider.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
|
||||
interface SnippetRow {
|
||||
id: string;
|
||||
@@ -17,11 +18,15 @@ const BATCH_SIZE = 50;
|
||||
const TEXT_MAX_CHARS = 2048;
|
||||
|
||||
export class EmbeddingService {
|
||||
private readonly sqliteVecStore: SqliteVecStore;
|
||||
|
||||
constructor(
|
||||
private readonly db: Database.Database,
|
||||
private readonly provider: EmbeddingProvider,
|
||||
private readonly profileId: string = 'local-default'
|
||||
) {}
|
||||
) {
|
||||
this.sqliteVecStore = new SqliteVecStore(db);
|
||||
}
|
||||
|
||||
findSnippetIdsMissingEmbeddings(repositoryId: string, versionId: string | null): string[] {
|
||||
if (versionId) {
|
||||
@@ -104,13 +109,19 @@ export class EmbeddingService {
|
||||
for (let j = 0; j < batchSnippets.length; j++) {
|
||||
const snippet = batchSnippets[j];
|
||||
const embedding = embeddings[j];
|
||||
|
||||
insert.run(
|
||||
snippet.id,
|
||||
this.profileId,
|
||||
embedding.model,
|
||||
embedding.dimensions,
|
||||
Buffer.from(embedding.values.buffer)
|
||||
Buffer.from(
|
||||
embedding.values.buffer,
|
||||
embedding.values.byteOffset,
|
||||
embedding.values.byteLength
|
||||
)
|
||||
);
|
||||
this.sqliteVecStore.upsertEmbedding(this.profileId, snippet.id, embedding.values);
|
||||
}
|
||||
});
|
||||
insertMany();
|
||||
|
||||
@@ -21,6 +21,11 @@ const db = new Database(dbPath);
|
||||
db.pragma('journal_mode = WAL');
|
||||
db.pragma('foreign_keys = ON');
|
||||
db.pragma('busy_timeout = 5000');
|
||||
db.pragma('synchronous = NORMAL');
|
||||
db.pragma('cache_size = -65536');
|
||||
db.pragma('temp_store = MEMORY');
|
||||
db.pragma('mmap_size = 268435456');
|
||||
db.pragma('wal_autocheckpoint = 1000');
|
||||
|
||||
// Load the embedding profile from DB
|
||||
const rawProfile = db.prepare('SELECT * FROM embedding_profiles WHERE id = ?').get(embeddingProfileId);
|
||||
|
||||
@@ -13,6 +13,9 @@ import { JobQueue } from './job-queue.js';
|
||||
import { IndexingPipeline } from './indexing.pipeline.js';
|
||||
import { recoverStaleJobs } from './startup.js';
|
||||
import { EmbeddingService } from '$lib/server/embeddings/embedding.service.js';
|
||||
import { loadSqliteVec } from '$lib/server/db/sqlite-vec.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import { sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js';
|
||||
import * as diffStrategy from './differential-strategy.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -22,6 +25,7 @@ import * as diffStrategy from './differential-strategy.js';
|
||||
function createTestDb(): Database.Database {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
for (const migrationFile of [
|
||||
@@ -29,7 +33,9 @@ function createTestDb(): Database.Database {
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql',
|
||||
'0003_multiversion_config.sql',
|
||||
'0004_complete_sentry.sql'
|
||||
'0004_complete_sentry.sql',
|
||||
'0005_fix_stage_defaults.sql',
|
||||
'0006_yielding_centennial.sql'
|
||||
]) {
|
||||
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
|
||||
|
||||
@@ -539,6 +545,52 @@ describe('IndexingPipeline', () => {
|
||||
expect(finalChecksum).toBe('sha-v2');
|
||||
});
|
||||
|
||||
it('removes derived vec rows when changed documents are replaced', async () => {
|
||||
const docId = crypto.randomUUID();
|
||||
const snippetId = crypto.randomUUID();
|
||||
const embedding = Float32Array.from([1, 0, 0]);
|
||||
const vecStore = new SqliteVecStore(db);
|
||||
|
||||
db.prepare(
|
||||
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, '/test/repo', NULL, 'README.md', 'stale-doc', ?)`
|
||||
).run(docId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
|
||||
VALUES (?, ?, '/test/repo', NULL, 'info', 'stale snippet', ?)`
|
||||
).run(snippetId, docId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
|
||||
).run(snippetId, Buffer.from(embedding.buffer), now);
|
||||
vecStore.upsertEmbedding('local-default', snippetId, embedding);
|
||||
|
||||
const pipeline = makePipeline({
|
||||
files: [
|
||||
{
|
||||
path: 'README.md',
|
||||
content: '# Updated\n\nFresh content.',
|
||||
sha: 'sha-fresh',
|
||||
language: 'markdown'
|
||||
}
|
||||
],
|
||||
totalFiles: 1
|
||||
});
|
||||
const job = makeJob();
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
const vecTable = sqliteVecTableName('local-default');
|
||||
const rowidTable = sqliteVecRowidTableName('local-default');
|
||||
const vecCount = db.prepare(`SELECT COUNT(*) as n FROM "${vecTable}"`).get() as { n: number };
|
||||
const rowidCount = db.prepare(`SELECT COUNT(*) as n FROM "${rowidTable}"`).get() as {
|
||||
n: number;
|
||||
};
|
||||
|
||||
expect(vecCount.n).toBe(0);
|
||||
expect(rowidCount.n).toBe(0);
|
||||
});
|
||||
|
||||
it('updates job progress as files are processed', async () => {
|
||||
const files = Array.from({ length: 5 }, (_, i) => ({
|
||||
path: `file${i}.md`,
|
||||
@@ -700,6 +752,60 @@ describe('IndexingPipeline', () => {
|
||||
expect(version.indexed_at).not.toBeNull();
|
||||
});
|
||||
|
||||
it('clones ancestor embeddings into the derived vec store for differential indexing', async () => {
|
||||
const ancestorVersionId = insertVersion(db, { tag: 'v1.0.0', state: 'indexed' });
|
||||
const targetVersionId = insertVersion(db, { tag: 'v1.1.0', state: 'pending' });
|
||||
const vecStore = new SqliteVecStore(db);
|
||||
const docId = crypto.randomUUID();
|
||||
const snippetId = crypto.randomUUID();
|
||||
const embedding = Float32Array.from([0.2, 0.4, 0.6]);
|
||||
|
||||
db.prepare(
|
||||
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, '/test/repo', ?, 'README.md', 'ancestor-doc', ?)`
|
||||
).run(docId, ancestorVersionId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
|
||||
VALUES (?, ?, '/test/repo', ?, 'info', 'ancestor snippet', ?)`
|
||||
).run(snippetId, docId, ancestorVersionId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
|
||||
).run(snippetId, Buffer.from(embedding.buffer), now);
|
||||
vecStore.upsertEmbedding('local-default', snippetId, embedding);
|
||||
|
||||
vi.spyOn(diffStrategy, 'buildDifferentialPlan').mockResolvedValue({
|
||||
ancestorTag: 'v1.0.0',
|
||||
ancestorVersionId,
|
||||
changedPaths: new Set<string>(),
|
||||
unchangedPaths: new Set<string>(['README.md'])
|
||||
});
|
||||
|
||||
const pipeline = makePipeline({ files: [], totalFiles: 0 });
|
||||
const job = makeJob('/test/repo', targetVersionId);
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
const targetRows = db
|
||||
.prepare(
|
||||
`SELECT se.snippet_id, se.embedding
|
||||
FROM snippet_embeddings se
|
||||
INNER JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.version_id = ?`
|
||||
)
|
||||
.all(targetVersionId) as Array<{ snippet_id: string; embedding: Buffer }>;
|
||||
|
||||
expect(targetRows).toHaveLength(1);
|
||||
const matches = vecStore.queryNearestNeighbors(embedding, {
|
||||
repositoryId: '/test/repo',
|
||||
versionId: targetVersionId,
|
||||
profileId: 'local-default',
|
||||
limit: 5
|
||||
});
|
||||
|
||||
expect(matches[0]?.snippetId).toBe(targetRows[0].snippet_id);
|
||||
});
|
||||
|
||||
it('updates repository_versions state to error when pipeline throws and job has versionId', async () => {
|
||||
const versionId = insertVersion(db, { tag: 'v1.0.0', state: 'pending' });
|
||||
const errorCrawl = vi.fn().mockRejectedValue(new Error('crawl failed'));
|
||||
|
||||
@@ -22,6 +22,7 @@ import type { EmbeddingService } from '$lib/server/embeddings/embedding.service.
|
||||
import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
|
||||
import { IndexingJob } from '$lib/server/models/indexing-job.js';
|
||||
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import { resolveConfig, type ParsedConfig } from '$lib/server/config/config-parser.js';
|
||||
import { parseFile } from '$lib/server/parser/index.js';
|
||||
import { computeTrustScore } from '$lib/server/search/trust-score.js';
|
||||
@@ -63,12 +64,16 @@ function sha256(content: string): string {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class IndexingPipeline {
|
||||
private readonly sqliteVecStore: SqliteVecStore;
|
||||
|
||||
constructor(
|
||||
private readonly db: Database.Database,
|
||||
private readonly githubCrawl: typeof GithubCrawlFn,
|
||||
private readonly localCrawler: LocalCrawler,
|
||||
private readonly embeddingService: EmbeddingService | null
|
||||
) {}
|
||||
) {
|
||||
this.sqliteVecStore = new SqliteVecStore(db);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Public — run a job end to end
|
||||
@@ -593,6 +598,12 @@ export class IndexingPipeline {
|
||||
emb.embedding,
|
||||
emb.created_at
|
||||
);
|
||||
this.sqliteVecStore.upsertEmbeddingBuffer(
|
||||
emb.profile_id,
|
||||
newSnippetId,
|
||||
emb.embedding,
|
||||
emb.dimensions
|
||||
);
|
||||
}
|
||||
}
|
||||
})();
|
||||
@@ -623,6 +634,8 @@ export class IndexingPipeline {
|
||||
);
|
||||
|
||||
this.db.transaction(() => {
|
||||
this.sqliteVecStore.deleteEmbeddingsForDocumentIds(changedDocIds);
|
||||
|
||||
// Delete stale documents (cascade deletes their snippets via FK).
|
||||
if (changedDocIds.length > 0) {
|
||||
const placeholders = changedDocIds.map(() => '?').join(',');
|
||||
|
||||
@@ -17,6 +17,54 @@ import type { WorkerPool } from './worker-pool.js';
|
||||
|
||||
const JOB_SELECT = `SELECT * FROM indexing_jobs`;
|
||||
|
||||
type JobStatusFilter = IndexingJob['status'] | Array<IndexingJob['status']>;
|
||||
|
||||
function escapeLikePattern(value: string): string {
|
||||
return value.replaceAll('\\', '\\\\').replaceAll('%', '\\%').replaceAll('_', '\\_');
|
||||
}
|
||||
|
||||
function isSpecificRepositoryId(repositoryId: string): boolean {
|
||||
return repositoryId.split('/').filter(Boolean).length >= 2;
|
||||
}
|
||||
|
||||
function normalizeStatuses(status?: JobStatusFilter): Array<IndexingJob['status']> {
|
||||
if (!status) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const statuses = Array.isArray(status) ? status : [status];
|
||||
return [...new Set(statuses)];
|
||||
}
|
||||
|
||||
function buildJobFilterQuery(options?: {
|
||||
repositoryId?: string;
|
||||
status?: JobStatusFilter;
|
||||
}): { where: string; params: unknown[] } {
|
||||
const conditions: string[] = [];
|
||||
const params: unknown[] = [];
|
||||
|
||||
if (options?.repositoryId) {
|
||||
if (isSpecificRepositoryId(options.repositoryId)) {
|
||||
conditions.push('repository_id = ?');
|
||||
params.push(options.repositoryId);
|
||||
} else {
|
||||
conditions.push(`(repository_id = ? OR repository_id LIKE ? ESCAPE '\\')`);
|
||||
params.push(options.repositoryId, `${escapeLikePattern(options.repositoryId)}/%`);
|
||||
}
|
||||
}
|
||||
|
||||
const statuses = normalizeStatuses(options?.status);
|
||||
if (statuses.length > 0) {
|
||||
conditions.push(`status IN (${statuses.map(() => '?').join(', ')})`);
|
||||
params.push(...statuses);
|
||||
}
|
||||
|
||||
return {
|
||||
where: conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '',
|
||||
params
|
||||
};
|
||||
}
|
||||
|
||||
export class JobQueue {
|
||||
private workerPool: WorkerPool | null = null;
|
||||
|
||||
@@ -144,23 +192,11 @@ export class JobQueue {
|
||||
*/
|
||||
listJobs(options?: {
|
||||
repositoryId?: string;
|
||||
status?: IndexingJob['status'];
|
||||
status?: JobStatusFilter;
|
||||
limit?: number;
|
||||
}): IndexingJob[] {
|
||||
const limit = Math.min(options?.limit ?? 20, 200);
|
||||
const conditions: string[] = [];
|
||||
const params: unknown[] = [];
|
||||
|
||||
if (options?.repositoryId) {
|
||||
conditions.push('repository_id = ?');
|
||||
params.push(options.repositoryId);
|
||||
}
|
||||
if (options?.status) {
|
||||
conditions.push('status = ?');
|
||||
params.push(options.status);
|
||||
}
|
||||
|
||||
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
||||
const { where, params } = buildJobFilterQuery(options);
|
||||
const sql = `${JOB_SELECT} ${where} ORDER BY created_at DESC LIMIT ?`;
|
||||
params.push(limit);
|
||||
|
||||
@@ -194,19 +230,7 @@ export class JobQueue {
|
||||
* Count all jobs matching optional filters.
|
||||
*/
|
||||
countJobs(options?: { repositoryId?: string; status?: IndexingJob['status'] }): number {
|
||||
const conditions: string[] = [];
|
||||
const params: unknown[] = [];
|
||||
|
||||
if (options?.repositoryId) {
|
||||
conditions.push('repository_id = ?');
|
||||
params.push(options.repositoryId);
|
||||
}
|
||||
if (options?.status) {
|
||||
conditions.push('status = ?');
|
||||
params.push(options.status);
|
||||
}
|
||||
|
||||
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND')}` : '';
|
||||
const { where, params } = buildJobFilterQuery(options);
|
||||
const sql = `SELECT COUNT(*) as n FROM indexing_jobs ${where}`;
|
||||
const row = this.db.prepare<unknown[], { n: number }>(sql).get(...params);
|
||||
return row?.n ?? 0;
|
||||
|
||||
@@ -171,4 +171,25 @@ describe('ProgressBroadcaster', () => {
|
||||
reader1.cancel();
|
||||
reader2.cancel();
|
||||
});
|
||||
|
||||
it('broadcastWorkerStatus sends worker-status events to global subscribers', async () => {
|
||||
const broadcaster = new ProgressBroadcaster();
|
||||
const stream = broadcaster.subscribeAll();
|
||||
const reader = stream.getReader();
|
||||
|
||||
broadcaster.broadcastWorkerStatus({
|
||||
concurrency: 2,
|
||||
active: 1,
|
||||
idle: 1,
|
||||
workers: [{ index: 0, state: 'running', jobId: 'job-1', repositoryId: '/repo/1', versionId: null }]
|
||||
});
|
||||
|
||||
const { value } = await reader.read();
|
||||
const text = value as string;
|
||||
|
||||
expect(text).toContain('event: worker-status');
|
||||
expect(text).toContain('"active":1');
|
||||
|
||||
reader.cancel();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -10,6 +10,7 @@ export class ProgressBroadcaster {
|
||||
private allSubscribers = new Set<ReadableStreamDefaultController<string>>();
|
||||
private lastEventCache = new Map<string, SSEEvent>();
|
||||
private eventCounters = new Map<string, number>();
|
||||
private globalEventCounter = 0;
|
||||
|
||||
subscribe(jobId: string): ReadableStream<string> {
|
||||
return new ReadableStream({
|
||||
@@ -135,6 +136,24 @@ export class ProgressBroadcaster {
|
||||
}
|
||||
}
|
||||
|
||||
broadcastWorkerStatus(data: object): void {
|
||||
this.globalEventCounter += 1;
|
||||
const event: SSEEvent = {
|
||||
id: this.globalEventCounter,
|
||||
event: 'worker-status',
|
||||
data: JSON.stringify(data)
|
||||
};
|
||||
const sse = this.formatSSE(event);
|
||||
|
||||
for (const controller of this.allSubscribers) {
|
||||
try {
|
||||
controller.enqueue(sse);
|
||||
} catch {
|
||||
// Controller might be closed or errored
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getLastEvent(jobId: string): SSEEvent | null {
|
||||
return this.lastEventCache.get(jobId) ?? null;
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ import { LocalCrawler } from '$lib/server/crawler/local.crawler.js';
|
||||
import { IndexingPipeline } from './indexing.pipeline.js';
|
||||
import { JobQueue } from './job-queue.js';
|
||||
import { WorkerPool } from './worker-pool.js';
|
||||
import type { ParseWorkerResponse } from './worker-types.js';
|
||||
import { initBroadcaster } from './progress-broadcaster.js';
|
||||
import type { ProgressBroadcaster } from './progress-broadcaster.js';
|
||||
import path from 'node:path';
|
||||
@@ -90,17 +89,28 @@ export function initializePipeline(
|
||||
if (options?.dbPath) {
|
||||
_broadcaster = initBroadcaster();
|
||||
|
||||
const getRepositoryIdForJob = (jobId: string): string => {
|
||||
const row = db
|
||||
.prepare<[string], { repository_id: string }>(
|
||||
`SELECT repository_id FROM indexing_jobs WHERE id = ?`
|
||||
)
|
||||
.get(jobId);
|
||||
return row?.repository_id ?? '';
|
||||
};
|
||||
|
||||
// Resolve worker script paths relative to this file (build/workers/ directory)
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const workerScript = path.join(__dirname, '../../../build/workers/worker-entry.mjs');
|
||||
const embedWorkerScript = path.join(__dirname, '../../../build/workers/embed-worker-entry.mjs');
|
||||
const writeWorkerScript = path.join(__dirname, '../../../build/workers/write-worker-entry.mjs');
|
||||
|
||||
try {
|
||||
_pool = new WorkerPool({
|
||||
concurrency: options.concurrency ?? 2,
|
||||
workerScript,
|
||||
embedWorkerScript,
|
||||
writeWorkerScript,
|
||||
dbPath: options.dbPath,
|
||||
onProgress: (jobId, msg) => {
|
||||
// Update DB with progress
|
||||
@@ -112,7 +122,10 @@ export function initializePipeline(
|
||||
|
||||
// Broadcast progress event
|
||||
if (_broadcaster) {
|
||||
_broadcaster.broadcast(jobId, '', 'progress', msg);
|
||||
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-progress', {
|
||||
...msg,
|
||||
status: 'running'
|
||||
});
|
||||
}
|
||||
},
|
||||
onJobDone: (jobId: string) => {
|
||||
@@ -123,7 +136,10 @@ export function initializePipeline(
|
||||
|
||||
// Broadcast done event
|
||||
if (_broadcaster) {
|
||||
_broadcaster.broadcast(jobId, '', 'job-done', { jobId });
|
||||
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-done', {
|
||||
jobId,
|
||||
status: 'done'
|
||||
});
|
||||
}
|
||||
},
|
||||
onJobFailed: (jobId: string, error: string) => {
|
||||
@@ -134,7 +150,11 @@ export function initializePipeline(
|
||||
|
||||
// Broadcast failed event
|
||||
if (_broadcaster) {
|
||||
_broadcaster.broadcast(jobId, '', 'job-failed', { jobId, error });
|
||||
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-failed', {
|
||||
jobId,
|
||||
status: 'failed',
|
||||
error
|
||||
});
|
||||
}
|
||||
},
|
||||
onEmbedDone: (jobId: string) => {
|
||||
@@ -142,6 +162,9 @@ export function initializePipeline(
|
||||
},
|
||||
onEmbedFailed: (jobId: string, error: string) => {
|
||||
console.error('[WorkerPool] Embedding failed for job:', jobId, error);
|
||||
},
|
||||
onWorkerStatus: (status) => {
|
||||
_broadcaster?.broadcastWorkerStatus(status);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -13,6 +13,11 @@ const db = new Database(dbPath);
|
||||
db.pragma('journal_mode = WAL');
|
||||
db.pragma('foreign_keys = ON');
|
||||
db.pragma('busy_timeout = 5000');
|
||||
db.pragma('synchronous = NORMAL');
|
||||
db.pragma('cache_size = -65536');
|
||||
db.pragma('temp_store = MEMORY');
|
||||
db.pragma('mmap_size = 268435456');
|
||||
db.pragma('wal_autocheckpoint = 1000');
|
||||
|
||||
const pipeline = new IndexingPipeline(db, githubCrawl, new LocalCrawler(), null);
|
||||
let currentJobId: string | null = null;
|
||||
|
||||
@@ -1,11 +1,19 @@
|
||||
import { Worker } from 'node:worker_threads';
|
||||
import { existsSync } from 'node:fs';
|
||||
import type { ParseWorkerRequest, ParseWorkerResponse, EmbedWorkerRequest, EmbedWorkerResponse, WorkerInitData } from './worker-types.js';
|
||||
import type {
|
||||
ParseWorkerRequest,
|
||||
ParseWorkerResponse,
|
||||
EmbedWorkerRequest,
|
||||
EmbedWorkerResponse,
|
||||
WorkerInitData,
|
||||
WriteWorkerResponse
|
||||
} from './worker-types.js';
|
||||
|
||||
export interface WorkerPoolOptions {
|
||||
concurrency: number;
|
||||
workerScript: string;
|
||||
embedWorkerScript: string;
|
||||
writeWorkerScript?: string;
|
||||
dbPath: string;
|
||||
embeddingProfileId?: string;
|
||||
onProgress: (jobId: string, msg: Extract<ParseWorkerResponse, { type: 'progress' }>) => void;
|
||||
@@ -13,6 +21,22 @@ export interface WorkerPoolOptions {
|
||||
onJobFailed: (jobId: string, error: string) => void;
|
||||
onEmbedDone: (jobId: string) => void;
|
||||
onEmbedFailed: (jobId: string, error: string) => void;
|
||||
onWorkerStatus?: (status: WorkerPoolStatus) => void;
|
||||
}
|
||||
|
||||
export interface WorkerStatusEntry {
|
||||
index: number;
|
||||
state: 'idle' | 'running';
|
||||
jobId: string | null;
|
||||
repositoryId: string | null;
|
||||
versionId: string | null;
|
||||
}
|
||||
|
||||
export interface WorkerPoolStatus {
|
||||
concurrency: number;
|
||||
active: number;
|
||||
idle: number;
|
||||
workers: WorkerStatusEntry[];
|
||||
}
|
||||
|
||||
interface QueuedJob {
|
||||
@@ -24,6 +48,7 @@ interface QueuedJob {
|
||||
interface RunningJob {
|
||||
jobId: string;
|
||||
repositoryId: string;
|
||||
versionId?: string | null;
|
||||
}
|
||||
|
||||
interface EmbedQueuedJob {
|
||||
@@ -36,10 +61,12 @@ export class WorkerPool {
|
||||
private workers: Worker[] = [];
|
||||
private idleWorkers: Worker[] = [];
|
||||
private embedWorker: Worker | null = null;
|
||||
private writeWorker: Worker | null = null;
|
||||
private embedReady = false;
|
||||
private writeReady = false;
|
||||
private jobQueue: QueuedJob[] = [];
|
||||
private runningJobs = new Map<Worker, RunningJob>();
|
||||
private runningRepoIds = new Set<string>();
|
||||
private runningJobKeys = new Set<string>();
|
||||
private embedQueue: EmbedQueuedJob[] = [];
|
||||
private options: WorkerPoolOptions;
|
||||
private fallbackMode = false;
|
||||
@@ -66,6 +93,12 @@ export class WorkerPool {
|
||||
if (options.embeddingProfileId && existsSync(options.embedWorkerScript)) {
|
||||
this.embedWorker = this.spawnEmbedWorker();
|
||||
}
|
||||
|
||||
if (options.writeWorkerScript && existsSync(options.writeWorkerScript)) {
|
||||
this.writeWorker = this.spawnWriteWorker(options.writeWorkerScript);
|
||||
}
|
||||
|
||||
this.emitStatusChanged();
|
||||
}
|
||||
|
||||
private spawnParseWorker(): Worker {
|
||||
@@ -94,6 +127,22 @@ export class WorkerPool {
|
||||
return worker;
|
||||
}
|
||||
|
||||
private spawnWriteWorker(writeWorkerScript: string): Worker {
|
||||
const worker = new Worker(writeWorkerScript, {
|
||||
workerData: {
|
||||
dbPath: this.options.dbPath
|
||||
} satisfies WorkerInitData
|
||||
});
|
||||
|
||||
worker.on('message', (msg: WriteWorkerResponse) => this.onWriteWorkerMessage(msg));
|
||||
worker.on('exit', () => {
|
||||
this.writeReady = false;
|
||||
this.writeWorker = null;
|
||||
});
|
||||
|
||||
return worker;
|
||||
}
|
||||
|
||||
public enqueue(jobId: string, repositoryId: string, versionId?: string | null): void {
|
||||
if (this.shuttingDown) {
|
||||
console.warn('WorkerPool is shutting down, ignoring enqueue request');
|
||||
@@ -109,10 +158,18 @@ export class WorkerPool {
|
||||
this.dispatch();
|
||||
}
|
||||
|
||||
private static jobKey(repositoryId: string, versionId?: string | null): string {
|
||||
return `${repositoryId}:${versionId ?? ''}`;
|
||||
}
|
||||
|
||||
private dispatch(): void {
|
||||
let statusChanged = false;
|
||||
|
||||
while (this.idleWorkers.length > 0 && this.jobQueue.length > 0) {
|
||||
// Find first job whose repositoryId is not currently running
|
||||
const jobIdx = this.jobQueue.findIndex((j) => !this.runningRepoIds.has(j.repositoryId));
|
||||
// Find first job whose (repositoryId, versionId) compound key is not currently running
|
||||
const jobIdx = this.jobQueue.findIndex(
|
||||
(j) => !this.runningJobKeys.has(WorkerPool.jobKey(j.repositoryId, j.versionId))
|
||||
);
|
||||
|
||||
if (jobIdx === -1) {
|
||||
// No eligible job found (all repos have running jobs)
|
||||
@@ -122,12 +179,17 @@ export class WorkerPool {
|
||||
const job = this.jobQueue.splice(jobIdx, 1)[0];
|
||||
const worker = this.idleWorkers.pop()!;
|
||||
|
||||
this.runningJobs.set(worker, { jobId: job.jobId, repositoryId: job.repositoryId });
|
||||
this.runningRepoIds.add(job.repositoryId);
|
||||
this.runningJobs.set(worker, { jobId: job.jobId, repositoryId: job.repositoryId, versionId: job.versionId });
|
||||
this.runningJobKeys.add(WorkerPool.jobKey(job.repositoryId, job.versionId));
|
||||
statusChanged = true;
|
||||
|
||||
const msg: ParseWorkerRequest = { type: 'run', jobId: job.jobId };
|
||||
worker.postMessage(msg);
|
||||
}
|
||||
|
||||
if (statusChanged) {
|
||||
this.emitStatusChanged();
|
||||
}
|
||||
}
|
||||
|
||||
private onWorkerMessage(worker: Worker, msg: ParseWorkerResponse): void {
|
||||
@@ -137,15 +199,20 @@ export class WorkerPool {
|
||||
const runningJob = this.runningJobs.get(worker);
|
||||
if (runningJob) {
|
||||
this.runningJobs.delete(worker);
|
||||
this.runningRepoIds.delete(runningJob.repositoryId);
|
||||
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
|
||||
}
|
||||
this.idleWorkers.push(worker);
|
||||
this.options.onJobDone(msg.jobId);
|
||||
this.emitStatusChanged();
|
||||
|
||||
// If embedding configured, enqueue embed request
|
||||
if (this.embedWorker && this.options.embeddingProfileId) {
|
||||
const runningJobData = runningJob || { jobId: msg.jobId, repositoryId: '' };
|
||||
this.enqueueEmbed(msg.jobId, runningJobData.repositoryId, null);
|
||||
const runningJobData = runningJob || { jobId: msg.jobId, repositoryId: '', versionId: null };
|
||||
this.enqueueEmbed(
|
||||
msg.jobId,
|
||||
runningJobData.repositoryId,
|
||||
runningJobData.versionId ?? null
|
||||
);
|
||||
}
|
||||
|
||||
this.dispatch();
|
||||
@@ -153,10 +220,11 @@ export class WorkerPool {
|
||||
const runningJob = this.runningJobs.get(worker);
|
||||
if (runningJob) {
|
||||
this.runningJobs.delete(worker);
|
||||
this.runningRepoIds.delete(runningJob.repositoryId);
|
||||
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
|
||||
}
|
||||
this.idleWorkers.push(worker);
|
||||
this.options.onJobFailed(msg.jobId, msg.error);
|
||||
this.emitStatusChanged();
|
||||
this.dispatch();
|
||||
}
|
||||
}
|
||||
@@ -176,13 +244,15 @@ export class WorkerPool {
|
||||
const runningJob = this.runningJobs.get(worker);
|
||||
if (runningJob && code !== 0) {
|
||||
this.runningJobs.delete(worker);
|
||||
this.runningRepoIds.delete(runningJob.repositoryId);
|
||||
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
|
||||
this.options.onJobFailed(runningJob.jobId, `Worker crashed with code ${code}`);
|
||||
} else if (runningJob) {
|
||||
this.runningJobs.delete(worker);
|
||||
this.runningRepoIds.delete(runningJob.repositoryId);
|
||||
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
|
||||
}
|
||||
|
||||
this.emitStatusChanged();
|
||||
|
||||
// Remove from workers array
|
||||
const workerIdx = this.workers.indexOf(worker);
|
||||
if (workerIdx !== -1) {
|
||||
@@ -212,6 +282,17 @@ export class WorkerPool {
|
||||
}
|
||||
}
|
||||
|
||||
private onWriteWorkerMessage(msg: WriteWorkerResponse): void {
|
||||
if (msg.type === 'ready') {
|
||||
this.writeReady = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'write_error') {
|
||||
console.error('[WorkerPool] Write worker failed for job:', msg.jobId, msg.error);
|
||||
}
|
||||
}
|
||||
|
||||
private processEmbedQueue(): void {
|
||||
if (!this.embedWorker || !this.embedReady) {
|
||||
return;
|
||||
@@ -250,6 +331,7 @@ export class WorkerPool {
|
||||
}
|
||||
|
||||
public setMaxConcurrency(n: number): void {
|
||||
this.options.concurrency = n;
|
||||
const current = this.workers.length;
|
||||
|
||||
if (n > current) {
|
||||
@@ -274,6 +356,8 @@ export class WorkerPool {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.emitStatusChanged();
|
||||
}
|
||||
|
||||
public async shutdown(): Promise<void> {
|
||||
@@ -300,6 +384,14 @@ export class WorkerPool {
|
||||
}
|
||||
}
|
||||
|
||||
if (this.writeWorker) {
|
||||
try {
|
||||
this.writeWorker.postMessage({ type: 'shutdown' });
|
||||
} catch {
|
||||
// Worker might already be exited
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for workers to exit with timeout
|
||||
const timeout = 5000;
|
||||
const startTime = Date.now();
|
||||
@@ -329,9 +421,41 @@ export class WorkerPool {
|
||||
}
|
||||
}
|
||||
|
||||
if (this.writeWorker) {
|
||||
try {
|
||||
this.writeWorker.terminate();
|
||||
} catch {
|
||||
// Already terminated
|
||||
}
|
||||
}
|
||||
|
||||
this.workers = [];
|
||||
this.idleWorkers = [];
|
||||
this.embedWorker = null;
|
||||
this.writeWorker = null;
|
||||
this.emitStatusChanged();
|
||||
}
|
||||
|
||||
public getStatus(): WorkerPoolStatus {
|
||||
return {
|
||||
concurrency: this.options.concurrency,
|
||||
active: this.runningJobs.size,
|
||||
idle: this.idleWorkers.length,
|
||||
workers: this.workers.map((worker, index) => {
|
||||
const runningJob = this.runningJobs.get(worker);
|
||||
return {
|
||||
index,
|
||||
state: runningJob ? 'running' : 'idle',
|
||||
jobId: runningJob?.jobId ?? null,
|
||||
repositoryId: runningJob?.repositoryId ?? null,
|
||||
versionId: runningJob?.versionId ?? null
|
||||
};
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
private emitStatusChanged(): void {
|
||||
this.options.onWorkerStatus?.(this.getStatus());
|
||||
}
|
||||
|
||||
public get isFallbackMode(): boolean {
|
||||
|
||||
@@ -19,7 +19,61 @@ export type EmbedWorkerResponse =
|
||||
| { type: 'embed-done'; jobId: string }
|
||||
| { type: 'embed-failed'; jobId: string; error: string };
|
||||
|
||||
export type WriteWorkerRequest = WriteRequest | { type: 'shutdown' };
|
||||
|
||||
export type WriteWorkerResponse =
|
||||
| { type: 'ready' }
|
||||
| WriteAck
|
||||
| WriteError;
|
||||
|
||||
export interface WorkerInitData {
|
||||
dbPath: string;
|
||||
embeddingProfileId?: string;
|
||||
}
|
||||
|
||||
// Write worker message types (Phase 6)
|
||||
export interface SerializedDocument {
|
||||
id: string;
|
||||
repositoryId: string;
|
||||
versionId: string | null;
|
||||
filePath: string;
|
||||
title: string | null;
|
||||
language: string | null;
|
||||
tokenCount: number;
|
||||
checksum: string;
|
||||
indexedAt: number;
|
||||
}
|
||||
|
||||
export interface SerializedSnippet {
|
||||
id: string;
|
||||
documentId: string;
|
||||
repositoryId: string;
|
||||
versionId: string | null;
|
||||
type: 'code' | 'info';
|
||||
title: string | null;
|
||||
content: string;
|
||||
language: string | null;
|
||||
breadcrumb: string | null;
|
||||
tokenCount: number;
|
||||
createdAt: number;
|
||||
}
|
||||
|
||||
export type WriteRequest = {
|
||||
type: 'write';
|
||||
jobId: string;
|
||||
documents: SerializedDocument[];
|
||||
snippets: SerializedSnippet[];
|
||||
};
|
||||
|
||||
export type WriteAck = {
|
||||
type: 'write_ack';
|
||||
jobId: string;
|
||||
documentCount: number;
|
||||
snippetCount: number;
|
||||
};
|
||||
|
||||
export type WriteError = {
|
||||
type: 'write_error';
|
||||
jobId: string;
|
||||
error: string;
|
||||
};
|
||||
|
||||
93
src/lib/server/pipeline/write-worker-entry.ts
Normal file
93
src/lib/server/pipeline/write-worker-entry.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
import { workerData, parentPort } from 'node:worker_threads';
|
||||
import Database from 'better-sqlite3';
|
||||
import type {
|
||||
SerializedDocument,
|
||||
SerializedSnippet,
|
||||
WorkerInitData,
|
||||
WriteWorkerRequest,
|
||||
WriteWorkerResponse
|
||||
} from './worker-types.js';
|
||||
|
||||
const { dbPath } = workerData as WorkerInitData;
|
||||
const db = new Database(dbPath);
|
||||
db.pragma('journal_mode = WAL');
|
||||
db.pragma('foreign_keys = ON');
|
||||
db.pragma('busy_timeout = 5000');
|
||||
db.pragma('synchronous = NORMAL');
|
||||
db.pragma('cache_size = -65536');
|
||||
db.pragma('temp_store = MEMORY');
|
||||
db.pragma('mmap_size = 268435456');
|
||||
db.pragma('wal_autocheckpoint = 1000');
|
||||
|
||||
const insertDocument = db.prepare(
|
||||
`INSERT OR REPLACE INTO documents
|
||||
(id, repository_id, version_id, file_path, title, language, token_count, checksum, indexed_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
);
|
||||
|
||||
const insertSnippet = db.prepare(
|
||||
`INSERT OR REPLACE INTO snippets
|
||||
(id, document_id, repository_id, version_id, type, title, content, language, breadcrumb, token_count, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
);
|
||||
|
||||
const writeBatch = db.transaction((documents: SerializedDocument[], snippets: SerializedSnippet[]) => {
|
||||
for (const document of documents) {
|
||||
insertDocument.run(
|
||||
document.id,
|
||||
document.repositoryId,
|
||||
document.versionId,
|
||||
document.filePath,
|
||||
document.title,
|
||||
document.language,
|
||||
document.tokenCount,
|
||||
document.checksum,
|
||||
document.indexedAt
|
||||
);
|
||||
}
|
||||
|
||||
for (const snippet of snippets) {
|
||||
insertSnippet.run(
|
||||
snippet.id,
|
||||
snippet.documentId,
|
||||
snippet.repositoryId,
|
||||
snippet.versionId,
|
||||
snippet.type,
|
||||
snippet.title,
|
||||
snippet.content,
|
||||
snippet.language,
|
||||
snippet.breadcrumb,
|
||||
snippet.tokenCount,
|
||||
snippet.createdAt
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
parentPort?.postMessage({ type: 'ready' } satisfies WriteWorkerResponse);
|
||||
|
||||
parentPort?.on('message', (msg: WriteWorkerRequest) => {
|
||||
if (msg.type === 'shutdown') {
|
||||
db.close();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (msg.type !== 'write') {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
writeBatch(msg.documents, msg.snippets);
|
||||
parentPort?.postMessage({
|
||||
type: 'write_ack',
|
||||
jobId: msg.jobId,
|
||||
documentCount: msg.documents.length,
|
||||
snippetCount: msg.snippets.length
|
||||
} satisfies WriteWorkerResponse);
|
||||
} catch (error) {
|
||||
parentPort?.postMessage({
|
||||
type: 'write_error',
|
||||
jobId: msg.jobId,
|
||||
error: error instanceof Error ? error.message : String(error)
|
||||
} satisfies WriteWorkerResponse);
|
||||
}
|
||||
});
|
||||
@@ -15,6 +15,8 @@ import { HybridSearchService } from './hybrid.search.service.js';
|
||||
import { VectorSearch, cosineSimilarity } from './vector.search.js';
|
||||
import { reciprocalRankFusion } from './rrf.js';
|
||||
import type { EmbeddingProvider, EmbeddingVector } from '../embeddings/provider.js';
|
||||
import { loadSqliteVec } from '../db/sqlite-vec.js';
|
||||
import { SqliteVecStore } from './sqlite-vec.store.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// In-memory DB factory
|
||||
@@ -23,6 +25,7 @@ import type { EmbeddingProvider, EmbeddingVector } from '../embeddings/provider.
|
||||
function createTestDb(): Database.Database {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
|
||||
@@ -30,7 +33,11 @@ function createTestDb(): Database.Database {
|
||||
const migrations = [
|
||||
'0000_large_master_chief.sql',
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql'
|
||||
'0002_silky_stellaris.sql',
|
||||
'0003_multiversion_config.sql',
|
||||
'0004_complete_sentry.sql',
|
||||
'0005_fix_stage_defaults.sql',
|
||||
'0006_yielding_centennial.sql'
|
||||
];
|
||||
for (const migrationFile of migrations) {
|
||||
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
|
||||
@@ -121,6 +128,7 @@ function seedEmbedding(
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(snippetId, profileId, model, values.length, Buffer.from(f32.buffer), NOW_S);
|
||||
new SqliteVecStore(client).upsertEmbedding(profileId, snippetId, f32);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -368,6 +376,42 @@ describe('VectorSearch', () => {
|
||||
const results = vs.vectorSearch(new Float32Array([-0.5, 0.5]), { repositoryId: repoId });
|
||||
expect(results[0].score).toBeCloseTo(1.0, 4);
|
||||
});
|
||||
|
||||
it('filters by profileId using per-profile vec tables', () => {
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run('secondary-profile', 'local-transformers', 'Secondary', 1, 0, 'test-model', 2, '{}', NOW_S, NOW_S);
|
||||
|
||||
const defaultSnippet = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'default profile snippet'
|
||||
});
|
||||
const secondarySnippet = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'secondary profile snippet'
|
||||
});
|
||||
|
||||
seedEmbedding(client, defaultSnippet, [1, 0], 'local-default');
|
||||
seedEmbedding(client, secondarySnippet, [1, 0], 'secondary-profile');
|
||||
|
||||
const vs = new VectorSearch(client);
|
||||
const defaultResults = vs.vectorSearch(new Float32Array([1, 0]), {
|
||||
repositoryId: repoId,
|
||||
profileId: 'local-default'
|
||||
});
|
||||
const secondaryResults = vs.vectorSearch(new Float32Array([1, 0]), {
|
||||
repositoryId: repoId,
|
||||
profileId: 'secondary-profile'
|
||||
});
|
||||
|
||||
expect(defaultResults.map((result) => result.snippetId)).toEqual([defaultSnippet]);
|
||||
expect(secondaryResults.map((result) => result.snippetId)).toEqual([secondarySnippet]);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
|
||||
@@ -148,7 +148,12 @@ export class HybridSearchService {
|
||||
|
||||
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
results: this.fetchSnippetsByIds(
|
||||
topIds,
|
||||
options.repositoryId,
|
||||
options.versionId,
|
||||
options.type
|
||||
),
|
||||
searchModeUsed: 'semantic'
|
||||
};
|
||||
}
|
||||
@@ -194,7 +199,12 @@ export class HybridSearchService {
|
||||
|
||||
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
results: this.fetchSnippetsByIds(
|
||||
topIds,
|
||||
options.repositoryId,
|
||||
options.versionId,
|
||||
options.type
|
||||
),
|
||||
searchModeUsed: 'keyword_fallback'
|
||||
};
|
||||
}
|
||||
@@ -220,7 +230,12 @@ export class HybridSearchService {
|
||||
if (alpha === 1) {
|
||||
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
results: this.fetchSnippetsByIds(
|
||||
topIds,
|
||||
options.repositoryId,
|
||||
options.versionId,
|
||||
options.type
|
||||
),
|
||||
searchModeUsed: 'semantic'
|
||||
};
|
||||
}
|
||||
@@ -234,7 +249,12 @@ export class HybridSearchService {
|
||||
|
||||
const topIds = fused.slice(0, limit).map((r) => r.id);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
results: this.fetchSnippetsByIds(
|
||||
topIds,
|
||||
options.repositoryId,
|
||||
options.versionId,
|
||||
options.type
|
||||
),
|
||||
searchModeUsed: 'hybrid'
|
||||
};
|
||||
}
|
||||
@@ -253,13 +273,19 @@ export class HybridSearchService {
|
||||
private fetchSnippetsByIds(
|
||||
ids: string[],
|
||||
repositoryId: string,
|
||||
versionId?: string,
|
||||
type?: 'code' | 'info'
|
||||
): SnippetSearchResult[] {
|
||||
if (ids.length === 0) return [];
|
||||
|
||||
const placeholders = ids.map(() => '?').join(', ');
|
||||
const params: unknown[] = [...ids, repositoryId];
|
||||
let versionClause = '';
|
||||
let typeClause = '';
|
||||
if (versionId !== undefined) {
|
||||
versionClause = ' AND s.version_id = ?';
|
||||
params.push(versionId);
|
||||
}
|
||||
if (type !== undefined) {
|
||||
typeClause = ' AND s.type = ?';
|
||||
params.push(type);
|
||||
@@ -276,7 +302,7 @@ export class HybridSearchService {
|
||||
FROM snippets s
|
||||
JOIN repositories r ON r.id = s.repository_id
|
||||
WHERE s.id IN (${placeholders})
|
||||
AND s.repository_id = ?${typeClause}`
|
||||
AND s.repository_id = ?${versionClause}${typeClause}`
|
||||
)
|
||||
.all(...params) as RawSnippetById[];
|
||||
|
||||
|
||||
394
src/lib/server/search/sqlite-vec.store.ts
Normal file
394
src/lib/server/search/sqlite-vec.store.ts
Normal file
@@ -0,0 +1,394 @@
|
||||
import type Database from 'better-sqlite3';
|
||||
import {
|
||||
loadSqliteVec,
|
||||
quoteSqliteIdentifier,
|
||||
sqliteVecRowidTableName,
|
||||
sqliteVecTableName
|
||||
} from '$lib/server/db/sqlite-vec.js';
|
||||
|
||||
export interface SqliteVecQueryOptions {
|
||||
repositoryId: string;
|
||||
versionId?: string;
|
||||
profileId?: string;
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export interface SqliteVecQueryResult {
|
||||
snippetId: string;
|
||||
score: number;
|
||||
distance: number;
|
||||
}
|
||||
|
||||
interface ProfileDimensionsRow {
|
||||
dimensions: number;
|
||||
}
|
||||
|
||||
interface StoredDimensionsRow {
|
||||
count: number;
|
||||
min_dimensions: number | null;
|
||||
max_dimensions: number | null;
|
||||
}
|
||||
|
||||
interface SnippetRowidRow {
|
||||
rowid: number;
|
||||
}
|
||||
|
||||
interface RawKnnRow {
|
||||
snippet_id: string;
|
||||
distance: number;
|
||||
}
|
||||
|
||||
interface CanonicalEmbeddingRow {
|
||||
snippet_id: string;
|
||||
embedding: Buffer;
|
||||
}
|
||||
|
||||
interface StoredEmbeddingRef {
|
||||
profile_id: string;
|
||||
snippet_id: string;
|
||||
}
|
||||
|
||||
interface ProfileStoreTables {
|
||||
vectorTableName: string;
|
||||
rowidTableName: string;
|
||||
quotedVectorTableName: string;
|
||||
quotedRowidTableName: string;
|
||||
dimensions: number;
|
||||
}
|
||||
|
||||
function toEmbeddingBuffer(values: Float32Array): Buffer {
|
||||
return Buffer.from(values.buffer, values.byteOffset, values.byteLength);
|
||||
}
|
||||
|
||||
function distanceToScore(distance: number): number {
|
||||
return 1 / (1 + distance);
|
||||
}
|
||||
|
||||
export class SqliteVecStore {
|
||||
constructor(private readonly db: Database.Database) {}
|
||||
|
||||
ensureProfileStore(profileId: string, preferredDimensions?: number): number {
|
||||
const tables = this.getProfileStoreTables(profileId, preferredDimensions);
|
||||
|
||||
this.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS ${tables.quotedRowidTableName} (
|
||||
rowid INTEGER PRIMARY KEY,
|
||||
snippet_id TEXT NOT NULL UNIQUE REFERENCES snippets(id) ON DELETE CASCADE
|
||||
);
|
||||
`);
|
||||
this.db.exec(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS ${tables.quotedVectorTableName}
|
||||
USING vec0(embedding float[${tables.dimensions}]);
|
||||
`);
|
||||
|
||||
return tables.dimensions;
|
||||
}
|
||||
|
||||
upsertEmbedding(profileId: string, snippetId: string, embedding: Float32Array): void {
|
||||
const tables = this.getProfileStoreTables(profileId, embedding.length);
|
||||
|
||||
this.ensureProfileStore(profileId, tables.dimensions);
|
||||
|
||||
const existingRow = this.db
|
||||
.prepare<[string], SnippetRowidRow>(
|
||||
`SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`
|
||||
)
|
||||
.get(snippetId);
|
||||
|
||||
const embeddingBuffer = toEmbeddingBuffer(embedding);
|
||||
if (existingRow) {
|
||||
this.db
|
||||
.prepare<[Buffer, number]>(
|
||||
`UPDATE ${tables.quotedVectorTableName} SET embedding = ? WHERE rowid = ?`
|
||||
)
|
||||
.run(embeddingBuffer, existingRow.rowid);
|
||||
return;
|
||||
}
|
||||
|
||||
const insertResult = this.db
|
||||
.prepare<[Buffer]>(`INSERT INTO ${tables.quotedVectorTableName} (embedding) VALUES (?)`)
|
||||
.run(embeddingBuffer);
|
||||
this.db
|
||||
.prepare<[number, string]>(
|
||||
`INSERT INTO ${tables.quotedRowidTableName} (rowid, snippet_id) VALUES (?, ?)`
|
||||
)
|
||||
.run(Number(insertResult.lastInsertRowid), snippetId);
|
||||
}
|
||||
|
||||
upsertEmbeddingBuffer(
|
||||
profileId: string,
|
||||
snippetId: string,
|
||||
embedding: Buffer,
|
||||
dimensions?: number
|
||||
): void {
|
||||
const vector = new Float32Array(
|
||||
embedding.buffer,
|
||||
embedding.byteOffset,
|
||||
dimensions ?? Math.floor(embedding.byteLength / Float32Array.BYTES_PER_ELEMENT)
|
||||
);
|
||||
this.upsertEmbedding(profileId, snippetId, vector);
|
||||
}
|
||||
|
||||
deleteEmbedding(profileId: string, snippetId: string): void {
|
||||
const tables = this.getProfileStoreTables(profileId);
|
||||
this.ensureProfileStore(profileId);
|
||||
|
||||
const existingRow = this.db
|
||||
.prepare<[string], SnippetRowidRow>(
|
||||
`SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`
|
||||
)
|
||||
.get(snippetId);
|
||||
|
||||
if (!existingRow) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.db
|
||||
.prepare<[number]>(`DELETE FROM ${tables.quotedVectorTableName} WHERE rowid = ?`)
|
||||
.run(existingRow.rowid);
|
||||
this.db
|
||||
.prepare<[string]>(`DELETE FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`)
|
||||
.run(snippetId);
|
||||
}
|
||||
|
||||
deleteEmbeddingsForDocumentIds(documentIds: string[]): void {
|
||||
if (documentIds.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const placeholders = documentIds.map(() => '?').join(', ');
|
||||
const rows = this.db
|
||||
.prepare<unknown[], StoredEmbeddingRef>(
|
||||
`SELECT DISTINCT se.profile_id, se.snippet_id
|
||||
FROM snippet_embeddings se
|
||||
INNER JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.document_id IN (${placeholders})`
|
||||
)
|
||||
.all(...documentIds);
|
||||
|
||||
this.deleteEmbeddingRefs(rows);
|
||||
}
|
||||
|
||||
deleteEmbeddingsForRepository(repositoryId: string): void {
|
||||
const rows = this.db
|
||||
.prepare<[string], StoredEmbeddingRef>(
|
||||
`SELECT DISTINCT se.profile_id, se.snippet_id
|
||||
FROM snippet_embeddings se
|
||||
INNER JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ?`
|
||||
)
|
||||
.all(repositoryId);
|
||||
|
||||
this.deleteEmbeddingRefs(rows);
|
||||
}
|
||||
|
||||
deleteEmbeddingsForVersion(repositoryId: string, versionId: string): void {
|
||||
const rows = this.db
|
||||
.prepare<[string, string], StoredEmbeddingRef>(
|
||||
`SELECT DISTINCT se.profile_id, se.snippet_id
|
||||
FROM snippet_embeddings se
|
||||
INNER JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ? AND s.version_id = ?`
|
||||
)
|
||||
.all(repositoryId, versionId);
|
||||
|
||||
this.deleteEmbeddingRefs(rows);
|
||||
}
|
||||
|
||||
queryNearestNeighbors(
|
||||
queryEmbedding: Float32Array,
|
||||
options: SqliteVecQueryOptions
|
||||
): SqliteVecQueryResult[] {
|
||||
const { repositoryId, versionId, profileId = 'local-default', limit = 50 } = options;
|
||||
if (limit <= 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tables = this.getProfileStoreTables(profileId, queryEmbedding.length);
|
||||
|
||||
this.ensureProfileStore(profileId, tables.dimensions);
|
||||
const totalRows = this.synchronizeProfileStore(profileId, tables);
|
||||
if (totalRows === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
let sql = `
|
||||
SELECT rowids.snippet_id, vec.distance
|
||||
FROM ${tables.quotedVectorTableName} vec
|
||||
JOIN ${tables.quotedRowidTableName} rowids ON rowids.rowid = vec.rowid
|
||||
JOIN snippets s ON s.id = rowids.snippet_id
|
||||
WHERE vec.embedding MATCH ?
|
||||
AND vec.k = ?
|
||||
AND s.repository_id = ?
|
||||
`;
|
||||
const params: unknown[] = [toEmbeddingBuffer(queryEmbedding), totalRows, repositoryId];
|
||||
|
||||
if (versionId !== undefined) {
|
||||
sql += ' AND s.version_id = ?';
|
||||
params.push(versionId);
|
||||
}
|
||||
|
||||
sql += ' ORDER BY vec.distance ASC LIMIT ?';
|
||||
params.push(limit);
|
||||
|
||||
const rows = this.db.prepare<unknown[], RawKnnRow>(sql).all(...params);
|
||||
return rows.map((row) => ({
|
||||
snippetId: row.snippet_id,
|
||||
score: distanceToScore(row.distance),
|
||||
distance: row.distance
|
||||
}));
|
||||
}
|
||||
|
||||
private synchronizeProfileStore(profileId: string, tables: ProfileStoreTables): number {
|
||||
this.db
|
||||
.prepare<[string, number]>(
|
||||
`DELETE FROM ${tables.quotedRowidTableName}
|
||||
WHERE rowid IN (
|
||||
SELECT rowids.rowid
|
||||
FROM ${tables.quotedRowidTableName} rowids
|
||||
LEFT JOIN snippet_embeddings se
|
||||
ON se.snippet_id = rowids.snippet_id
|
||||
AND se.profile_id = ?
|
||||
AND se.dimensions = ?
|
||||
LEFT JOIN ${tables.quotedVectorTableName} vec ON vec.rowid = rowids.rowid
|
||||
WHERE se.snippet_id IS NULL OR vec.rowid IS NULL
|
||||
)`
|
||||
)
|
||||
.run(profileId, tables.dimensions);
|
||||
|
||||
this.db
|
||||
.prepare(
|
||||
`DELETE FROM ${tables.quotedVectorTableName}
|
||||
WHERE rowid NOT IN (SELECT rowid FROM ${tables.quotedRowidTableName})`
|
||||
)
|
||||
.run();
|
||||
|
||||
const missingRows = this.db
|
||||
.prepare<[string, number], CanonicalEmbeddingRow>(
|
||||
`SELECT se.snippet_id, se.embedding
|
||||
FROM snippet_embeddings se
|
||||
LEFT JOIN ${tables.quotedRowidTableName} rowids ON rowids.snippet_id = se.snippet_id
|
||||
WHERE se.profile_id = ?
|
||||
AND se.dimensions = ?
|
||||
AND rowids.snippet_id IS NULL`
|
||||
)
|
||||
.all(profileId, tables.dimensions);
|
||||
|
||||
if (missingRows.length > 0) {
|
||||
const backfill = this.db.transaction((rows: CanonicalEmbeddingRow[]) => {
|
||||
for (const row of rows) {
|
||||
this.upsertEmbedding(
|
||||
profileId,
|
||||
row.snippet_id,
|
||||
new Float32Array(
|
||||
row.embedding.buffer,
|
||||
row.embedding.byteOffset,
|
||||
tables.dimensions
|
||||
)
|
||||
);
|
||||
}
|
||||
});
|
||||
backfill(missingRows);
|
||||
}
|
||||
|
||||
return (
|
||||
this.db
|
||||
.prepare<[], { count: number }>(
|
||||
`SELECT COUNT(*) AS count
|
||||
FROM ${tables.quotedVectorTableName} vec
|
||||
JOIN ${tables.quotedRowidTableName} rowids ON rowids.rowid = vec.rowid`
|
||||
)
|
||||
.get()?.count ?? 0
|
||||
);
|
||||
}
|
||||
|
||||
private deleteEmbeddingRefs(rows: StoredEmbeddingRef[]): void {
|
||||
if (rows.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const removeRows = this.db.transaction((refs: StoredEmbeddingRef[]) => {
|
||||
for (const ref of refs) {
|
||||
this.deleteEmbedding(ref.profile_id, ref.snippet_id);
|
||||
}
|
||||
});
|
||||
|
||||
removeRows(rows);
|
||||
}
|
||||
|
||||
private getProfileStoreTables(
|
||||
profileId: string,
|
||||
preferredDimensions?: number
|
||||
): ProfileStoreTables {
|
||||
loadSqliteVec(this.db);
|
||||
|
||||
const dimensionsRow = this.db
|
||||
.prepare<[string], ProfileDimensionsRow>(
|
||||
'SELECT dimensions FROM embedding_profiles WHERE id = ?'
|
||||
)
|
||||
.get(profileId);
|
||||
if (!dimensionsRow) {
|
||||
throw new Error(`Embedding profile not found: ${profileId}`);
|
||||
}
|
||||
|
||||
const storedDimensions = this.db
|
||||
.prepare<[string], StoredDimensionsRow>(
|
||||
`SELECT
|
||||
COUNT(*) AS count,
|
||||
MIN(dimensions) AS min_dimensions,
|
||||
MAX(dimensions) AS max_dimensions
|
||||
FROM snippet_embeddings
|
||||
WHERE profile_id = ?`
|
||||
)
|
||||
.get(profileId);
|
||||
|
||||
const effectiveDimensions = this.resolveDimensions(
|
||||
profileId,
|
||||
dimensionsRow.dimensions,
|
||||
storedDimensions,
|
||||
preferredDimensions
|
||||
);
|
||||
|
||||
const vectorTableName = sqliteVecTableName(profileId);
|
||||
const rowidTableName = sqliteVecRowidTableName(profileId);
|
||||
|
||||
return {
|
||||
vectorTableName,
|
||||
rowidTableName,
|
||||
quotedVectorTableName: quoteSqliteIdentifier(vectorTableName),
|
||||
quotedRowidTableName: quoteSqliteIdentifier(rowidTableName),
|
||||
dimensions: effectiveDimensions
|
||||
};
|
||||
}
|
||||
|
||||
private resolveDimensions(
|
||||
profileId: string,
|
||||
profileDimensions: number,
|
||||
storedDimensions: StoredDimensionsRow | undefined,
|
||||
preferredDimensions?: number
|
||||
): number {
|
||||
if (storedDimensions && storedDimensions.count > 0) {
|
||||
if (storedDimensions.min_dimensions !== storedDimensions.max_dimensions) {
|
||||
throw new Error(`Stored embedding dimensions are inconsistent for profile ${profileId}`);
|
||||
}
|
||||
|
||||
const canonicalDimensions = storedDimensions.min_dimensions;
|
||||
if (canonicalDimensions === null) {
|
||||
throw new Error(`Stored embedding dimensions are missing for profile ${profileId}`);
|
||||
}
|
||||
|
||||
if (
|
||||
preferredDimensions !== undefined &&
|
||||
preferredDimensions !== canonicalDimensions
|
||||
) {
|
||||
throw new Error(
|
||||
`Embedding dimension mismatch for profile ${profileId}: expected ${canonicalDimensions}, received ${preferredDimensions}`
|
||||
);
|
||||
}
|
||||
|
||||
return canonicalDimensions;
|
||||
}
|
||||
|
||||
return preferredDimensions ?? profileDimensions;
|
||||
}
|
||||
}
|
||||
@@ -1,16 +1,12 @@
|
||||
/**
|
||||
* Vector similarity search over stored snippet embeddings.
|
||||
*
|
||||
* SQLite does not natively support vector operations, so cosine similarity is
|
||||
* computed in JavaScript after loading candidate embeddings from the
|
||||
* snippet_embeddings table.
|
||||
*
|
||||
* Performance note: For repositories with > 50k snippets, pre-filtering by
|
||||
* FTS5 candidates before computing cosine similarity is recommended. For v1,
|
||||
* in-memory computation is acceptable.
|
||||
* Uses sqlite-vec vector_top_k() for ANN search instead of in-memory cosine
|
||||
* similarity computation over all embeddings.
|
||||
*/
|
||||
|
||||
import type Database from 'better-sqlite3';
|
||||
import { SqliteVecStore } from './sqlite-vec.store.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
@@ -28,12 +24,6 @@ export interface VectorSearchOptions {
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
/** Raw DB row from snippet_embeddings joined with snippets. */
|
||||
interface RawEmbeddingRow {
|
||||
snippet_id: string;
|
||||
embedding: Buffer;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Math helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -69,46 +59,26 @@ export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class VectorSearch {
|
||||
constructor(private readonly db: Database.Database) {}
|
||||
private readonly sqliteVecStore: SqliteVecStore;
|
||||
|
||||
constructor(private readonly db: Database.Database) {
|
||||
this.sqliteVecStore = new SqliteVecStore(db);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search stored embeddings by cosine similarity to the query embedding.
|
||||
*
|
||||
* Uses in-memory cosine similarity computation. The vec_embedding column
|
||||
* stores raw Float32 bytes for forward compatibility with vector-capable
|
||||
* libSQL builds; scoring is performed in JS using the same bytes.
|
||||
*
|
||||
* @param queryEmbedding - The embedded representation of the search query.
|
||||
* @param options - Search options including repositoryId, optional versionId, profileId, and limit.
|
||||
* @returns Results sorted by descending cosine similarity score.
|
||||
*/
|
||||
vectorSearch(queryEmbedding: Float32Array, options: VectorSearchOptions): VectorSearchResult[] {
|
||||
const { repositoryId, versionId, profileId = 'local-default', limit = 50 } = options;
|
||||
|
||||
let sql = `
|
||||
SELECT se.snippet_id, se.embedding
|
||||
FROM snippet_embeddings se
|
||||
JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ?
|
||||
AND se.profile_id = ?
|
||||
`;
|
||||
const params: unknown[] = [repositoryId, profileId];
|
||||
|
||||
if (versionId) {
|
||||
sql += ' AND s.version_id = ?';
|
||||
params.push(versionId);
|
||||
}
|
||||
|
||||
const rows = this.db.prepare<unknown[], RawEmbeddingRow>(sql).all(...params);
|
||||
|
||||
const scored: VectorSearchResult[] = rows.map((row) => {
|
||||
const embedding = new Float32Array(
|
||||
row.embedding.buffer,
|
||||
row.embedding.byteOffset,
|
||||
row.embedding.byteLength / 4
|
||||
);
|
||||
return {
|
||||
snippetId: row.snippet_id,
|
||||
score: cosineSimilarity(queryEmbedding, embedding)
|
||||
};
|
||||
});
|
||||
|
||||
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
||||
return this.sqliteVecStore
|
||||
.queryNearestNeighbors(queryEmbedding, options)
|
||||
.map((result) => ({ snippetId: result.snippetId, score: result.score }));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@ import Database from 'better-sqlite3';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { RepositoryService } from './repository.service';
|
||||
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import {
|
||||
AlreadyExistsError,
|
||||
InvalidInputError,
|
||||
@@ -25,6 +27,7 @@ import {
|
||||
function createTestDb(): Database.Database {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
|
||||
@@ -33,7 +36,9 @@ function createTestDb(): Database.Database {
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql',
|
||||
'0003_multiversion_config.sql',
|
||||
'0004_complete_sentry.sql'
|
||||
'0004_complete_sentry.sql',
|
||||
'0005_fix_stage_defaults.sql',
|
||||
'0006_yielding_centennial.sql'
|
||||
]) {
|
||||
const statements = readFileSync(join(migrationsFolder, migration), 'utf-8')
|
||||
.split('--> statement-breakpoint')
|
||||
@@ -331,6 +336,41 @@ describe('RepositoryService.remove()', () => {
|
||||
it('throws NotFoundError when the repository does not exist', () => {
|
||||
expect(() => service.remove('/not/found')).toThrow(NotFoundError);
|
||||
});
|
||||
|
||||
it('removes derived vec rows before the repository cascade deletes snippets', () => {
|
||||
const docId = crypto.randomUUID();
|
||||
const snippetId = crypto.randomUUID();
|
||||
const embedding = Float32Array.from([1, 0, 0]);
|
||||
const vecStore = new SqliteVecStore((service as unknown as { db: Database.Database }).db);
|
||||
const db = (service as unknown as { db: Database.Database }).db;
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
|
||||
db.prepare(
|
||||
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, '/facebook/react', NULL, 'README.md', 'repo-doc', ?)`
|
||||
).run(docId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
|
||||
VALUES (?, ?, '/facebook/react', NULL, 'info', 'repo snippet', ?)`
|
||||
).run(snippetId, docId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
|
||||
).run(snippetId, Buffer.from(embedding.buffer), now);
|
||||
vecStore.upsertEmbedding('local-default', snippetId, embedding);
|
||||
|
||||
service.remove('/facebook/react');
|
||||
|
||||
const vecTable = sqliteVecTableName('local-default');
|
||||
const rowidTable = sqliteVecRowidTableName('local-default');
|
||||
const vecCount = db.prepare(`SELECT COUNT(*) as n FROM "${vecTable}"`).get() as { n: number };
|
||||
const rowidCount = db.prepare(`SELECT COUNT(*) as n FROM "${rowidTable}"`).get() as {
|
||||
n: number;
|
||||
};
|
||||
|
||||
expect(vecCount.n).toBe(0);
|
||||
expect(rowidCount.n).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -8,6 +8,7 @@ import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
|
||||
import { IndexingJobMapper } from '$lib/server/mappers/indexing-job.mapper.js';
|
||||
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
|
||||
import { IndexingJob, IndexingJobEntity } from '$lib/server/models/indexing-job.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import { resolveGitHubId, resolveLocalId } from '$lib/server/utils/id-resolver';
|
||||
import {
|
||||
AlreadyExistsError,
|
||||
@@ -230,7 +231,11 @@ export class RepositoryService {
|
||||
const existing = this.get(id);
|
||||
if (!existing) throw new NotFoundError(`Repository ${id} not found`);
|
||||
|
||||
this.db.prepare(`DELETE FROM repositories WHERE id = ?`).run(id);
|
||||
const sqliteVecStore = new SqliteVecStore(this.db);
|
||||
this.db.transaction(() => {
|
||||
sqliteVecStore.deleteEmbeddingsForRepository(id);
|
||||
this.db.prepare(`DELETE FROM repositories WHERE id = ?`).run(id);
|
||||
})();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -10,6 +10,8 @@ import { describe, it, expect } from 'vitest';
|
||||
import Database from 'better-sqlite3';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import { VersionService } from './version.service';
|
||||
import { RepositoryService } from './repository.service';
|
||||
import { AlreadyExistsError, NotFoundError } from '$lib/server/utils/validation';
|
||||
@@ -21,31 +23,27 @@ import { AlreadyExistsError, NotFoundError } from '$lib/server/utils/validation'
|
||||
function createTestDb(): Database.Database {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
|
||||
// Apply all migration files in order
|
||||
const migration0 = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
|
||||
const migration1 = readFileSync(join(migrationsFolder, '0001_quick_nighthawk.sql'), 'utf-8');
|
||||
for (const migration of [
|
||||
'0000_large_master_chief.sql',
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql',
|
||||
'0003_multiversion_config.sql',
|
||||
'0004_complete_sentry.sql',
|
||||
'0005_fix_stage_defaults.sql',
|
||||
'0006_yielding_centennial.sql'
|
||||
]) {
|
||||
const statements = readFileSync(join(migrationsFolder, migration), 'utf-8')
|
||||
.split('--> statement-breakpoint')
|
||||
.map((statement) => statement.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
// Apply first migration
|
||||
const statements0 = migration0
|
||||
.split('--> statement-breakpoint')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
for (const stmt of statements0) {
|
||||
client.exec(stmt);
|
||||
}
|
||||
|
||||
// Apply second migration
|
||||
const statements1 = migration1
|
||||
.split('--> statement-breakpoint')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
for (const stmt of statements1) {
|
||||
client.exec(stmt);
|
||||
for (const statement of statements) {
|
||||
client.exec(statement);
|
||||
}
|
||||
}
|
||||
|
||||
return client;
|
||||
@@ -198,6 +196,44 @@ describe('VersionService.remove()', () => {
|
||||
const doc = client.prepare(`SELECT id FROM documents WHERE id = ?`).get(docId);
|
||||
expect(doc).toBeUndefined();
|
||||
});
|
||||
|
||||
it('removes derived vec rows before deleting the version', () => {
|
||||
const { client, versionService } = setup();
|
||||
const version = versionService.add('/facebook/react', 'v18.3.0');
|
||||
const docId = crypto.randomUUID();
|
||||
const snippetId = crypto.randomUUID();
|
||||
const embedding = Float32Array.from([0.5, 0.25, 0.125]);
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
const vecStore = new SqliteVecStore(client);
|
||||
|
||||
client.prepare(
|
||||
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, '/facebook/react', ?, 'README.md', 'version-doc', ?)`
|
||||
).run(docId, version.id, now);
|
||||
client.prepare(
|
||||
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
|
||||
VALUES (?, ?, '/facebook/react', ?, 'info', 'version snippet', ?)`
|
||||
).run(snippetId, docId, version.id, now);
|
||||
client.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
|
||||
).run(snippetId, Buffer.from(embedding.buffer), now);
|
||||
vecStore.upsertEmbedding('local-default', snippetId, embedding);
|
||||
|
||||
versionService.remove('/facebook/react', 'v18.3.0');
|
||||
|
||||
const vecTable = sqliteVecTableName('local-default');
|
||||
const rowidTable = sqliteVecRowidTableName('local-default');
|
||||
const vecCount = client.prepare(`SELECT COUNT(*) as n FROM "${vecTable}"`).get() as {
|
||||
n: number;
|
||||
};
|
||||
const rowidCount = client.prepare(`SELECT COUNT(*) as n FROM "${rowidTable}"`).get() as {
|
||||
n: number;
|
||||
};
|
||||
|
||||
expect(vecCount.n).toBe(0);
|
||||
expect(rowidCount.n).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -11,6 +11,7 @@ import {
|
||||
RepositoryVersion,
|
||||
RepositoryVersionEntity
|
||||
} from '$lib/server/models/repository-version.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import { AlreadyExistsError, NotFoundError } from '$lib/server/utils/validation';
|
||||
import { resolveTagToCommit, discoverVersionTags } from '$lib/server/utils/git.js';
|
||||
|
||||
@@ -99,9 +100,13 @@ export class VersionService {
|
||||
throw new NotFoundError(`Version ${tag} not found for repository ${repositoryId}`);
|
||||
}
|
||||
|
||||
this.db
|
||||
.prepare(`DELETE FROM repository_versions WHERE repository_id = ? AND tag = ?`)
|
||||
.run(repositoryId, tag);
|
||||
const sqliteVecStore = new SqliteVecStore(this.db);
|
||||
this.db.transaction(() => {
|
||||
sqliteVecStore.deleteEmbeddingsForVersion(repositoryId, version.id);
|
||||
this.db
|
||||
.prepare(`DELETE FROM repository_versions WHERE repository_id = ? AND tag = ?`)
|
||||
.run(repositoryId, tag);
|
||||
})();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user