feat(TRUEREF-0023): add sqlite-vec search pipeline

This commit is contained in:
Giancarmine Salucci
2026-04-01 14:09:19 +02:00
parent 0752636847
commit 9525c58e9a
45 changed files with 4009 additions and 614 deletions

View File

@@ -4,6 +4,7 @@
*/
import Database from 'better-sqlite3';
import { env } from '$env/dynamic/private';
import { loadSqliteVec } from './sqlite-vec';
let _client: Database.Database | null = null;
@@ -14,6 +15,12 @@ export function getClient(): Database.Database {
_client.pragma('journal_mode = WAL');
_client.pragma('foreign_keys = ON');
_client.pragma('busy_timeout = 5000');
_client.pragma('synchronous = NORMAL');
_client.pragma('cache_size = -65536');
_client.pragma('temp_store = MEMORY');
_client.pragma('mmap_size = 268435456');
_client.pragma('wal_autocheckpoint = 1000');
loadSqliteVec(_client);
}
return _client;
}

View File

@@ -5,6 +5,7 @@ import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { join, dirname } from 'node:path';
import * as schema from './schema';
import { loadSqliteVec } from './sqlite-vec';
import { env } from '$env/dynamic/private';
if (!env.DATABASE_URL) throw new Error('DATABASE_URL is not set');
@@ -19,6 +20,12 @@ client.pragma('foreign_keys = ON');
// Prevents SQLITE_BUSY errors when the indexing pipeline holds the write lock
// and an HTTP request arrives simultaneously.
client.pragma('busy_timeout = 5000');
client.pragma('synchronous = NORMAL');
client.pragma('cache_size = -65536');
client.pragma('temp_store = MEMORY');
client.pragma('mmap_size = 268435456');
client.pragma('wal_autocheckpoint = 1000');
loadSqliteVec(client);
export const db = drizzle(client, { schema });

View File

@@ -0,0 +1,6 @@
CREATE INDEX `idx_embeddings_profile` ON `snippet_embeddings` (`profile_id`,`snippet_id`);--> statement-breakpoint
CREATE INDEX `idx_documents_repo_version` ON `documents` (`repository_id`,`version_id`);--> statement-breakpoint
CREATE INDEX `idx_jobs_repo_status` ON `indexing_jobs` (`repository_id`,`status`);--> statement-breakpoint
CREATE INDEX `idx_repositories_state` ON `repositories` (`state`);--> statement-breakpoint
CREATE INDEX `idx_snippets_repo_version` ON `snippets` (`repository_id`,`version_id`);--> statement-breakpoint
CREATE INDEX `idx_snippets_repo_type` ON `snippets` (`repository_id`,`type`);

View File

@@ -0,0 +1,948 @@
{
"version": "6",
"dialect": "sqlite",
"id": "b8998bda-f89b-41bc-b923-3f676d153c79",
"prevId": "c326dcbe-1771-4a90-a566-0ebd1eca47ec",
"tables": {
"documents": {
"name": "documents",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"file_path": {
"name": "file_path",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"language": {
"name": "language",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"token_count": {
"name": "token_count",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"checksum": {
"name": "checksum",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"indexed_at": {
"name": "indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"idx_documents_repo_version": {
"name": "idx_documents_repo_version",
"columns": [
"repository_id",
"version_id"
],
"isUnique": false
}
},
"foreignKeys": {
"documents_repository_id_repositories_id_fk": {
"name": "documents_repository_id_repositories_id_fk",
"tableFrom": "documents",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"documents_version_id_repository_versions_id_fk": {
"name": "documents_version_id_repository_versions_id_fk",
"tableFrom": "documents",
"tableTo": "repository_versions",
"columnsFrom": [
"version_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"embedding_profiles": {
"name": "embedding_profiles",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"provider_kind": {
"name": "provider_kind",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"enabled": {
"name": "enabled",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": true
},
"is_default": {
"name": "is_default",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": false
},
"model": {
"name": "model",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"dimensions": {
"name": "dimensions",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"config": {
"name": "config",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"indexing_jobs": {
"name": "indexing_jobs",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"status": {
"name": "status",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'queued'"
},
"progress": {
"name": "progress",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"total_files": {
"name": "total_files",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"processed_files": {
"name": "processed_files",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"stage": {
"name": "stage",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'queued'"
},
"stage_detail": {
"name": "stage_detail",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"error": {
"name": "error",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"started_at": {
"name": "started_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"completed_at": {
"name": "completed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"idx_jobs_repo_status": {
"name": "idx_jobs_repo_status",
"columns": [
"repository_id",
"status"
],
"isUnique": false
}
},
"foreignKeys": {
"indexing_jobs_repository_id_repositories_id_fk": {
"name": "indexing_jobs_repository_id_repositories_id_fk",
"tableFrom": "indexing_jobs",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repositories": {
"name": "repositories",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"source": {
"name": "source",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"source_url": {
"name": "source_url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"branch": {
"name": "branch",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": "'main'"
},
"state": {
"name": "state",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'pending'"
},
"total_snippets": {
"name": "total_snippets",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"total_tokens": {
"name": "total_tokens",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"trust_score": {
"name": "trust_score",
"type": "real",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"benchmark_score": {
"name": "benchmark_score",
"type": "real",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"stars": {
"name": "stars",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"github_token": {
"name": "github_token",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"last_indexed_at": {
"name": "last_indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"idx_repositories_state": {
"name": "idx_repositories_state",
"columns": [
"state"
],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repository_configs": {
"name": "repository_configs",
"columns": {
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"project_title": {
"name": "project_title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"folders": {
"name": "folders",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"exclude_folders": {
"name": "exclude_folders",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"exclude_files": {
"name": "exclude_files",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"rules": {
"name": "rules",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"previous_versions": {
"name": "previous_versions",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"uniq_repo_config_base": {
"name": "uniq_repo_config_base",
"columns": [
"repository_id"
],
"isUnique": true,
"where": "\"repository_configs\".\"version_id\" IS NULL"
},
"uniq_repo_config_version": {
"name": "uniq_repo_config_version",
"columns": [
"repository_id",
"version_id"
],
"isUnique": true,
"where": "\"repository_configs\".\"version_id\" IS NOT NULL"
}
},
"foreignKeys": {
"repository_configs_repository_id_repositories_id_fk": {
"name": "repository_configs_repository_id_repositories_id_fk",
"tableFrom": "repository_configs",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repository_versions": {
"name": "repository_versions",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"tag": {
"name": "tag",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"commit_hash": {
"name": "commit_hash",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"state": {
"name": "state",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'pending'"
},
"total_snippets": {
"name": "total_snippets",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"indexed_at": {
"name": "indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"repository_versions_repository_id_repositories_id_fk": {
"name": "repository_versions_repository_id_repositories_id_fk",
"tableFrom": "repository_versions",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"settings": {
"name": "settings",
"columns": {
"key": {
"name": "key",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"value": {
"name": "value",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"snippet_embeddings": {
"name": "snippet_embeddings",
"columns": {
"snippet_id": {
"name": "snippet_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"profile_id": {
"name": "profile_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"model": {
"name": "model",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"dimensions": {
"name": "dimensions",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"embedding": {
"name": "embedding",
"type": "blob",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"idx_embeddings_profile": {
"name": "idx_embeddings_profile",
"columns": [
"profile_id",
"snippet_id"
],
"isUnique": false
}
},
"foreignKeys": {
"snippet_embeddings_snippet_id_snippets_id_fk": {
"name": "snippet_embeddings_snippet_id_snippets_id_fk",
"tableFrom": "snippet_embeddings",
"tableTo": "snippets",
"columnsFrom": [
"snippet_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"snippet_embeddings_profile_id_embedding_profiles_id_fk": {
"name": "snippet_embeddings_profile_id_embedding_profiles_id_fk",
"tableFrom": "snippet_embeddings",
"tableTo": "embedding_profiles",
"columnsFrom": [
"profile_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {
"snippet_embeddings_snippet_id_profile_id_pk": {
"columns": [
"snippet_id",
"profile_id"
],
"name": "snippet_embeddings_snippet_id_profile_id_pk"
}
},
"uniqueConstraints": {},
"checkConstraints": {}
},
"snippets": {
"name": "snippets",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"document_id": {
"name": "document_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"type": {
"name": "type",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"content": {
"name": "content",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"language": {
"name": "language",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"breadcrumb": {
"name": "breadcrumb",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"token_count": {
"name": "token_count",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"idx_snippets_repo_version": {
"name": "idx_snippets_repo_version",
"columns": [
"repository_id",
"version_id"
],
"isUnique": false
},
"idx_snippets_repo_type": {
"name": "idx_snippets_repo_type",
"columns": [
"repository_id",
"type"
],
"isUnique": false
}
},
"foreignKeys": {
"snippets_document_id_documents_id_fk": {
"name": "snippets_document_id_documents_id_fk",
"tableFrom": "snippets",
"tableTo": "documents",
"columnsFrom": [
"document_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"snippets_repository_id_repositories_id_fk": {
"name": "snippets_repository_id_repositories_id_fk",
"tableFrom": "snippets",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"snippets_version_id_repository_versions_id_fk": {
"name": "snippets_version_id_repository_versions_id_fk",
"tableFrom": "snippets",
"tableTo": "repository_versions",
"columnsFrom": [
"version_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
}
},
"views": {},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
},
"internal": {
"indexes": {}
}
}

View File

@@ -43,6 +43,13 @@
"when": 1774890536284,
"tag": "0005_fix_stage_defaults",
"breakpoints": true
},
{
"idx": 6,
"version": "6",
"when": 1775038799913,
"tag": "0006_yielding_centennial",
"breakpoints": true
}
]
}

View File

@@ -6,6 +6,7 @@ import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { eq } from 'drizzle-orm';
import * as schema from './schema';
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from './sqlite-vec';
import {
repositories,
repositoryVersions,
@@ -24,6 +25,7 @@ import {
function createTestDb() {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
loadSqliteVec(client);
const db = drizzle(client, { schema });
@@ -266,10 +268,11 @@ describe('snippets table', () => {
describe('snippet_embeddings table', () => {
let db: ReturnType<typeof createTestDb>['db'];
let client: Database.Database;
let snippetId: string;
beforeEach(() => {
({ db } = createTestDb());
({ db, client } = createTestDb());
db.insert(repositories).values(makeRepo()).run();
const docId = crypto.randomUUID();
db.insert(documents)
@@ -344,6 +347,30 @@ describe('snippet_embeddings table', () => {
const result = db.select().from(snippetEmbeddings).all();
expect(result).toHaveLength(0);
});
it('keeps the relational schema free of vec_embedding and retains the profile index', () => {
const columns = client
.prepare("PRAGMA table_info('snippet_embeddings')")
.all() as Array<{ name: string }>;
expect(columns.map((column) => column.name)).not.toContain('vec_embedding');
const indexes = client
.prepare("PRAGMA index_list('snippet_embeddings')")
.all() as Array<{ name: string }>;
expect(indexes.map((index) => index.name)).toContain('idx_embeddings_profile');
});
it('loads sqlite-vec idempotently and derives deterministic per-profile table names', () => {
expect(() => loadSqliteVec(client)).not.toThrow();
const tableName = sqliteVecTableName('local-default');
const rowidTableName = sqliteVecRowidTableName('local-default');
expect(tableName).toMatch(/^snippet_embeddings_vec_local_default_[0-9a-f]{8}$/);
expect(rowidTableName).toMatch(/^snippet_embeddings_vec_rowids_local_default_[0-9a-f]{8}$/);
expect(sqliteVecTableName('local-default')).toBe(tableName);
expect(sqliteVecRowidTableName('local-default')).toBe(rowidTableName);
expect(sqliteVecTableName('local-default')).not.toBe(sqliteVecTableName('openai/custom'));
});
});
describe('indexing_jobs table', () => {

View File

@@ -1,6 +1,7 @@
import { sql } from 'drizzle-orm';
import {
blob,
index,
integer,
primaryKey,
real,
@@ -34,7 +35,7 @@ export const repositories = sqliteTable('repositories', {
lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull()
});
}, (t) => [index('idx_repositories_state').on(t.state)]);
// ---------------------------------------------------------------------------
// repository_versions
@@ -72,7 +73,7 @@ export const documents = sqliteTable('documents', {
tokenCount: integer('token_count').default(0),
checksum: text('checksum').notNull(), // SHA-256 of file content
indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull()
});
}, (t) => [index('idx_documents_repo_version').on(t.repositoryId, t.versionId)]);
// ---------------------------------------------------------------------------
// snippets
@@ -93,7 +94,10 @@ export const snippets = sqliteTable('snippets', {
breadcrumb: text('breadcrumb'), // e.g. "Installation > Getting Started"
tokenCount: integer('token_count').default(0),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
});
}, (t) => [
index('idx_snippets_repo_version').on(t.repositoryId, t.versionId),
index('idx_snippets_repo_type').on(t.repositoryId, t.type),
]);
// ---------------------------------------------------------------------------
// embedding_profiles
@@ -128,7 +132,10 @@ export const snippetEmbeddings = sqliteTable(
embedding: blob('embedding').notNull(), // Float32Array as binary blob
createdAt: integer('created_at').notNull()
},
(table) => [primaryKey({ columns: [table.snippetId, table.profileId] })]
(table) => [
primaryKey({ columns: [table.snippetId, table.profileId] }),
index('idx_embeddings_profile').on(table.profileId, table.snippetId),
]
);
// ---------------------------------------------------------------------------
@@ -154,7 +161,7 @@ export const indexingJobs = sqliteTable('indexing_jobs', {
startedAt: integer('started_at', { mode: 'timestamp' }),
completedAt: integer('completed_at', { mode: 'timestamp' }),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
});
}, (t) => [index('idx_jobs_repo_status').on(t.repositoryId, t.status)]);
// ---------------------------------------------------------------------------
// repository_configs

View File

@@ -0,0 +1,49 @@
import type Database from 'better-sqlite3';
import * as sqliteVec from 'sqlite-vec';
const loadedConnections = new WeakSet<Database.Database>();
function stableHash(value: string): string {
let hash = 2166136261;
for (let index = 0; index < value.length; index += 1) {
hash ^= value.charCodeAt(index);
hash = Math.imul(hash, 16777619);
}
return (hash >>> 0).toString(16).padStart(8, '0');
}
function sanitizeIdentifierPart(value: string): string {
const sanitized = value
.toLowerCase()
.replace(/[^a-z0-9]+/g, '_')
.replace(/^_+|_+$/g, '');
return sanitized.length > 0 ? sanitized.slice(0, 32) : 'profile';
}
export function sqliteVecTableSuffix(profileId: string): string {
return `${sanitizeIdentifierPart(profileId)}_${stableHash(profileId)}`;
}
export function sqliteVecTableName(profileId: string): string {
return `snippet_embeddings_vec_${sqliteVecTableSuffix(profileId)}`;
}
export function sqliteVecRowidTableName(profileId: string): string {
return `snippet_embeddings_vec_rowids_${sqliteVecTableSuffix(profileId)}`;
}
export function quoteSqliteIdentifier(identifier: string): string {
return `"${identifier.replace(/"/g, '""')}"`;
}
export function loadSqliteVec(db: Database.Database): void {
if (loadedConnections.has(db)) {
return;
}
sqliteVec.load(db);
loadedConnections.add(db);
}

View File

@@ -0,0 +1,2 @@
-- Relational vec_embedding bootstrap removed in iteration 2.
-- Downstream sqlite-vec vec0 tables are created on demand in application code.