From f57b6225059299d00d12dd8da7cd62fd3dcd0160 Mon Sep 17 00:00:00 2001 From: Giancarmine Salucci Date: Sun, 22 Mar 2026 17:18:01 +0100 Subject: [PATCH] feat(TRUEREF-0001): implement complete database schema and core data models Define all SQLite tables via Drizzle ORM (repositories, repository_versions, documents, snippets, snippet_embeddings, indexing_jobs, repository_configs, settings), generate the initial migration, create FTS5 virtual table and sync triggers in fts.sql, add shared TypeScript types in src/lib/types.ts, and write 21 unit tests covering insertions, cascade deletes, FK constraints, blob storage, JSON fields, and FTS5 trigger behaviour. Co-Authored-By: Claude Sonnet 4.6 --- drizzle.config.ts | 1 + src/lib/server/db/fts.sql | 30 + src/lib/server/db/index.ts | 29 + .../db/migrations/0000_large_master_chief.sql | 104 +++ .../db/migrations/meta/0000_snapshot.json | 739 ++++++++++++++++++ .../server/db/migrations/meta/_journal.json | 13 + src/lib/server/db/schema.test.ts | 490 ++++++++++++ src/lib/server/db/schema.ts | 180 ++++- src/lib/types.ts | 96 +++ 9 files changed, 1676 insertions(+), 6 deletions(-) create mode 100644 src/lib/server/db/fts.sql create mode 100644 src/lib/server/db/migrations/0000_large_master_chief.sql create mode 100644 src/lib/server/db/migrations/meta/0000_snapshot.json create mode 100644 src/lib/server/db/migrations/meta/_journal.json create mode 100644 src/lib/server/db/schema.test.ts create mode 100644 src/lib/types.ts diff --git a/drizzle.config.ts b/drizzle.config.ts index 317f310..6a1d787 100644 --- a/drizzle.config.ts +++ b/drizzle.config.ts @@ -4,6 +4,7 @@ if (!process.env.DATABASE_URL) throw new Error('DATABASE_URL is not set'); export default defineConfig({ schema: './src/lib/server/db/schema.ts', + out: './src/lib/server/db/migrations', dialect: 'sqlite', dbCredentials: { url: process.env.DATABASE_URL }, verbose: true, diff --git a/src/lib/server/db/fts.sql b/src/lib/server/db/fts.sql new file mode 100644 index 0000000..8a78527 --- /dev/null +++ b/src/lib/server/db/fts.sql @@ -0,0 +1,30 @@ +-- FTS5 virtual table for full-text search on snippets. +-- This cannot be expressed in Drizzle ORM DDL; applied manually during DB init. + +CREATE VIRTUAL TABLE IF NOT EXISTS snippets_fts USING fts5( + content, + title, + breadcrumb, + content='snippets', + content_rowid='rowid', + tokenize='porter unicode61' +); + +-- Keep FTS index in sync with the snippets table via triggers. + +CREATE TRIGGER IF NOT EXISTS snippets_ai AFTER INSERT ON snippets BEGIN + INSERT INTO snippets_fts(rowid, content, title, breadcrumb) + VALUES (new.rowid, new.content, new.title, new.breadcrumb); +END; + +CREATE TRIGGER IF NOT EXISTS snippets_ad AFTER DELETE ON snippets BEGIN + INSERT INTO snippets_fts(snippets_fts, rowid, content, title, breadcrumb) + VALUES ('delete', old.rowid, old.content, old.title, old.breadcrumb); +END; + +CREATE TRIGGER IF NOT EXISTS snippets_au AFTER UPDATE ON snippets BEGIN + INSERT INTO snippets_fts(snippets_fts, rowid, content, title, breadcrumb) + VALUES ('delete', old.rowid, old.content, old.title, old.breadcrumb); + INSERT INTO snippets_fts(rowid, content, title, breadcrumb) + VALUES (new.rowid, new.content, new.title, new.breadcrumb); +END; diff --git a/src/lib/server/db/index.ts b/src/lib/server/db/index.ts index b3c877b..c196249 100644 --- a/src/lib/server/db/index.ts +++ b/src/lib/server/db/index.ts @@ -1,5 +1,9 @@ import { drizzle } from 'drizzle-orm/better-sqlite3'; +import { migrate } from 'drizzle-orm/better-sqlite3/migrator'; import Database from 'better-sqlite3'; +import { readFileSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { join, dirname } from 'node:path'; import * as schema from './schema'; import { env } from '$env/dynamic/private'; @@ -7,4 +11,29 @@ if (!env.DATABASE_URL) throw new Error('DATABASE_URL is not set'); const client = new Database(env.DATABASE_URL); +// Enable WAL mode for better concurrent read performance. +client.pragma('journal_mode = WAL'); +// Enforce foreign key constraints. +client.pragma('foreign_keys = ON'); + export const db = drizzle(client, { schema }); + +// --------------------------------------------------------------------------- +// Database initialisation — run on startup +// --------------------------------------------------------------------------- + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +/** + * Run Drizzle migrations then apply the FTS5 virtual table and triggers. + * Safe to call multiple times — all DDL uses IF NOT EXISTS guards. + */ +export function initializeDatabase(): void { + const migrationsFolder = join(__dirname, 'migrations'); + migrate(db, { migrationsFolder }); + + // Apply FTS5 virtual table and trigger DDL (not expressible via Drizzle). + // exec() handles multi-statement SQL with embedded comments correctly. + const ftsSql = readFileSync(join(__dirname, 'fts.sql'), 'utf-8'); + client.exec(ftsSql); +} diff --git a/src/lib/server/db/migrations/0000_large_master_chief.sql b/src/lib/server/db/migrations/0000_large_master_chief.sql new file mode 100644 index 0000000..076309f --- /dev/null +++ b/src/lib/server/db/migrations/0000_large_master_chief.sql @@ -0,0 +1,104 @@ +CREATE TABLE `documents` ( + `id` text PRIMARY KEY NOT NULL, + `repository_id` text NOT NULL, + `version_id` text, + `file_path` text NOT NULL, + `title` text, + `language` text, + `token_count` integer DEFAULT 0, + `checksum` text NOT NULL, + `indexed_at` integer NOT NULL, + FOREIGN KEY (`repository_id`) REFERENCES `repositories`(`id`) ON UPDATE no action ON DELETE cascade, + FOREIGN KEY (`version_id`) REFERENCES `repository_versions`(`id`) ON UPDATE no action ON DELETE cascade +); +--> statement-breakpoint +CREATE TABLE `indexing_jobs` ( + `id` text PRIMARY KEY NOT NULL, + `repository_id` text NOT NULL, + `version_id` text, + `status` text DEFAULT 'queued' NOT NULL, + `progress` integer DEFAULT 0, + `total_files` integer DEFAULT 0, + `processed_files` integer DEFAULT 0, + `error` text, + `started_at` integer, + `completed_at` integer, + `created_at` integer NOT NULL, + FOREIGN KEY (`repository_id`) REFERENCES `repositories`(`id`) ON UPDATE no action ON DELETE cascade +); +--> statement-breakpoint +CREATE TABLE `repositories` ( + `id` text PRIMARY KEY NOT NULL, + `title` text NOT NULL, + `description` text, + `source` text NOT NULL, + `source_url` text NOT NULL, + `branch` text DEFAULT 'main', + `state` text DEFAULT 'pending' NOT NULL, + `total_snippets` integer DEFAULT 0, + `total_tokens` integer DEFAULT 0, + `trust_score` real DEFAULT 0, + `benchmark_score` real DEFAULT 0, + `stars` integer, + `github_token` text, + `last_indexed_at` integer, + `created_at` integer NOT NULL, + `updated_at` integer NOT NULL +); +--> statement-breakpoint +CREATE TABLE `repository_configs` ( + `repository_id` text PRIMARY KEY NOT NULL, + `project_title` text, + `description` text, + `folders` text, + `exclude_folders` text, + `exclude_files` text, + `rules` text, + `previous_versions` text, + `updated_at` integer NOT NULL, + FOREIGN KEY (`repository_id`) REFERENCES `repositories`(`id`) ON UPDATE no action ON DELETE cascade +); +--> statement-breakpoint +CREATE TABLE `repository_versions` ( + `id` text PRIMARY KEY NOT NULL, + `repository_id` text NOT NULL, + `tag` text NOT NULL, + `title` text, + `state` text DEFAULT 'pending' NOT NULL, + `total_snippets` integer DEFAULT 0, + `indexed_at` integer, + `created_at` integer NOT NULL, + FOREIGN KEY (`repository_id`) REFERENCES `repositories`(`id`) ON UPDATE no action ON DELETE cascade +); +--> statement-breakpoint +CREATE TABLE `settings` ( + `key` text PRIMARY KEY NOT NULL, + `value` text, + `updated_at` integer NOT NULL +); +--> statement-breakpoint +CREATE TABLE `snippet_embeddings` ( + `snippet_id` text PRIMARY KEY NOT NULL, + `model` text NOT NULL, + `dimensions` integer NOT NULL, + `embedding` blob NOT NULL, + `created_at` integer NOT NULL, + FOREIGN KEY (`snippet_id`) REFERENCES `snippets`(`id`) ON UPDATE no action ON DELETE cascade +); +--> statement-breakpoint +CREATE TABLE `snippets` ( + `id` text PRIMARY KEY NOT NULL, + `document_id` text NOT NULL, + `repository_id` text NOT NULL, + `version_id` text, + `type` text NOT NULL, + `title` text, + `content` text NOT NULL, + `language` text, + `breadcrumb` text, + `token_count` integer DEFAULT 0, + `created_at` integer NOT NULL, + FOREIGN KEY (`document_id`) REFERENCES `documents`(`id`) ON UPDATE no action ON DELETE cascade, + FOREIGN KEY (`repository_id`) REFERENCES `repositories`(`id`) ON UPDATE no action ON DELETE cascade, + FOREIGN KEY (`version_id`) REFERENCES `repository_versions`(`id`) ON UPDATE no action ON DELETE cascade +); diff --git a/src/lib/server/db/migrations/meta/0000_snapshot.json b/src/lib/server/db/migrations/meta/0000_snapshot.json new file mode 100644 index 0000000..9e2b5c1 --- /dev/null +++ b/src/lib/server/db/migrations/meta/0000_snapshot.json @@ -0,0 +1,739 @@ +{ + "version": "6", + "dialect": "sqlite", + "id": "9dec55ea-0c03-4c98-99a6-dd143b336791", + "prevId": "00000000-0000-0000-0000-000000000000", + "tables": { + "documents": { + "name": "documents", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "version_id": { + "name": "version_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "file_path": { + "name": "file_path", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "language": { + "name": "language", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "token_count": { + "name": "token_count", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "checksum": { + "name": "checksum", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "indexed_at": { + "name": "indexed_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "documents_repository_id_repositories_id_fk": { + "name": "documents_repository_id_repositories_id_fk", + "tableFrom": "documents", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "documents_version_id_repository_versions_id_fk": { + "name": "documents_version_id_repository_versions_id_fk", + "tableFrom": "documents", + "tableTo": "repository_versions", + "columnsFrom": [ + "version_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "indexing_jobs": { + "name": "indexing_jobs", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "version_id": { + "name": "version_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'queued'" + }, + "progress": { + "name": "progress", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "total_files": { + "name": "total_files", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "processed_files": { + "name": "processed_files", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "error": { + "name": "error", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "started_at": { + "name": "started_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "completed_at": { + "name": "completed_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "indexing_jobs_repository_id_repositories_id_fk": { + "name": "indexing_jobs_repository_id_repositories_id_fk", + "tableFrom": "indexing_jobs", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "repositories": { + "name": "repositories", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "source": { + "name": "source", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "source_url": { + "name": "source_url", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "branch": { + "name": "branch", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": "'main'" + }, + "state": { + "name": "state", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'pending'" + }, + "total_snippets": { + "name": "total_snippets", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "total_tokens": { + "name": "total_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "trust_score": { + "name": "trust_score", + "type": "real", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "benchmark_score": { + "name": "benchmark_score", + "type": "real", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "stars": { + "name": "stars", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "github_token": { + "name": "github_token", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "last_indexed_at": { + "name": "last_indexed_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "repository_configs": { + "name": "repository_configs", + "columns": { + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "project_title": { + "name": "project_title", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "folders": { + "name": "folders", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "exclude_folders": { + "name": "exclude_folders", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "exclude_files": { + "name": "exclude_files", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "rules": { + "name": "rules", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "previous_versions": { + "name": "previous_versions", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "repository_configs_repository_id_repositories_id_fk": { + "name": "repository_configs_repository_id_repositories_id_fk", + "tableFrom": "repository_configs", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "repository_versions": { + "name": "repository_versions", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "tag": { + "name": "tag", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "state": { + "name": "state", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'pending'" + }, + "total_snippets": { + "name": "total_snippets", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "indexed_at": { + "name": "indexed_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "repository_versions_repository_id_repositories_id_fk": { + "name": "repository_versions_repository_id_repositories_id_fk", + "tableFrom": "repository_versions", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "settings": { + "name": "settings", + "columns": { + "key": { + "name": "key", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "value": { + "name": "value", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "snippet_embeddings": { + "name": "snippet_embeddings", + "columns": { + "snippet_id": { + "name": "snippet_id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "dimensions": { + "name": "dimensions", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "embedding": { + "name": "embedding", + "type": "blob", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "snippet_embeddings_snippet_id_snippets_id_fk": { + "name": "snippet_embeddings_snippet_id_snippets_id_fk", + "tableFrom": "snippet_embeddings", + "tableTo": "snippets", + "columnsFrom": [ + "snippet_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "snippets": { + "name": "snippets", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "document_id": { + "name": "document_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "version_id": { + "name": "version_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "language": { + "name": "language", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "breadcrumb": { + "name": "breadcrumb", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "token_count": { + "name": "token_count", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "snippets_document_id_documents_id_fk": { + "name": "snippets_document_id_documents_id_fk", + "tableFrom": "snippets", + "tableTo": "documents", + "columnsFrom": [ + "document_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "snippets_repository_id_repositories_id_fk": { + "name": "snippets_repository_id_repositories_id_fk", + "tableFrom": "snippets", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "snippets_version_id_repository_versions_id_fk": { + "name": "snippets_version_id_repository_versions_id_fk", + "tableFrom": "snippets", + "tableTo": "repository_versions", + "columnsFrom": [ + "version_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + } + }, + "views": {}, + "enums": {}, + "_meta": { + "schemas": {}, + "tables": {}, + "columns": {} + }, + "internal": { + "indexes": {} + } +} \ No newline at end of file diff --git a/src/lib/server/db/migrations/meta/_journal.json b/src/lib/server/db/migrations/meta/_journal.json new file mode 100644 index 0000000..cccc65c --- /dev/null +++ b/src/lib/server/db/migrations/meta/_journal.json @@ -0,0 +1,13 @@ +{ + "version": "7", + "dialect": "sqlite", + "entries": [ + { + "idx": 0, + "version": "6", + "when": 1774196053634, + "tag": "0000_large_master_chief", + "breakpoints": true + } + ] +} \ No newline at end of file diff --git a/src/lib/server/db/schema.test.ts b/src/lib/server/db/schema.test.ts new file mode 100644 index 0000000..6409599 --- /dev/null +++ b/src/lib/server/db/schema.test.ts @@ -0,0 +1,490 @@ +import { describe, it, expect, beforeEach } from 'vitest'; +import Database from 'better-sqlite3'; +import { drizzle } from 'drizzle-orm/better-sqlite3'; +import { migrate } from 'drizzle-orm/better-sqlite3/migrator'; +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { eq } from 'drizzle-orm'; +import * as schema from './schema'; +import { + repositories, + repositoryVersions, + documents, + snippets, + snippetEmbeddings, + indexingJobs, + repositoryConfigs, + settings +} from './schema'; + +// --------------------------------------------------------------------------- +// Test helpers +// --------------------------------------------------------------------------- + +function createTestDb() { + const client = new Database(':memory:'); + client.pragma('foreign_keys = ON'); + + const db = drizzle(client, { schema }); + + // Run migrations from the generated migration folder. + const migrationsFolder = join(import.meta.dirname, 'migrations'); + migrate(db, { migrationsFolder }); + + // Apply FTS5 DDL using exec() which handles multi-statement SQL with comments. + const ftsSql = readFileSync(join(import.meta.dirname, 'fts.sql'), 'utf-8'); + client.exec(ftsSql); + + return { db, client }; +} + +const now = new Date(); + +function makeRepo(overrides: Partial = {}): schema.NewRepository { + return { + id: '/test/repo', + title: 'Test Repo', + source: 'github', + sourceUrl: 'https://github.com/test/repo', + createdAt: now, + updatedAt: now, + ...overrides + }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('repositories table', () => { + let db: ReturnType['db']; + + beforeEach(() => { + ({ db } = createTestDb()); + }); + + it('inserts and retrieves a repository', () => { + const repo = makeRepo(); + db.insert(repositories).values(repo).run(); + + const result = db.select().from(repositories).all(); + expect(result).toHaveLength(1); + expect(result[0].id).toBe('/test/repo'); + expect(result[0].title).toBe('Test Repo'); + expect(result[0].source).toBe('github'); + expect(result[0].state).toBe('pending'); + expect(result[0].totalSnippets).toBe(0); + expect(result[0].totalTokens).toBe(0); + expect(result[0].trustScore).toBe(0); + expect(result[0].benchmarkScore).toBe(0); + }); + + it('allows nullable optional fields', () => { + const repo = makeRepo({ description: null, stars: null, githubToken: null }); + db.insert(repositories).values(repo).run(); + + const result = db.select().from(repositories).all(); + expect(result[0].description).toBeNull(); + expect(result[0].stars).toBeNull(); + expect(result[0].githubToken).toBeNull(); + }); + + it('supports all state enum values', () => { + const states = ['pending', 'indexing', 'indexed', 'error'] as const; + for (const state of states) { + db.insert(repositories) + .values(makeRepo({ id: `/test/${state}`, state })) + .run(); + } + const results = db.select().from(repositories).all(); + const resultStates = results.map((r) => r.state).sort(); + expect(resultStates).toEqual([...states].sort()); + }); +}); + +describe('repository_versions table', () => { + let db: ReturnType['db']; + + beforeEach(() => { + ({ db } = createTestDb()); + db.insert(repositories).values(makeRepo()).run(); + }); + + it('inserts a version linked to a repository', () => { + db.insert(repositoryVersions) + .values({ + id: '/test/repo/v1.0.0', + repositoryId: '/test/repo', + tag: 'v1.0.0', + title: 'Version 1.0.0', + createdAt: now + }) + .run(); + + const result = db.select().from(repositoryVersions).all(); + expect(result).toHaveLength(1); + expect(result[0].tag).toBe('v1.0.0'); + expect(result[0].repositoryId).toBe('/test/repo'); + }); + + it('cascades delete when parent repository is deleted', () => { + db.insert(repositoryVersions) + .values({ + id: '/test/repo/v1.0.0', + repositoryId: '/test/repo', + tag: 'v1.0.0', + createdAt: now + }) + .run(); + + db.delete(repositories).where(eq(repositories.id, '/test/repo')).run(); + + const result = db.select().from(repositoryVersions).all(); + expect(result).toHaveLength(0); + }); +}); + +describe('documents table', () => { + let db: ReturnType['db']; + + beforeEach(() => { + ({ db } = createTestDb()); + db.insert(repositories).values(makeRepo()).run(); + }); + + it('inserts a document', () => { + db.insert(documents) + .values({ + id: crypto.randomUUID(), + repositoryId: '/test/repo', + filePath: 'README.md', + checksum: 'abc123', + indexedAt: now + }) + .run(); + + const result = db.select().from(documents).all(); + expect(result).toHaveLength(1); + expect(result[0].filePath).toBe('README.md'); + expect(result[0].checksum).toBe('abc123'); + }); + + it('cascades delete when repository is deleted', () => { + db.insert(documents) + .values({ + id: crypto.randomUUID(), + repositoryId: '/test/repo', + filePath: 'README.md', + checksum: 'abc123', + indexedAt: now + }) + .run(); + + db.delete(repositories).where(eq(repositories.id, '/test/repo')).run(); + + const result = db.select().from(documents).all(); + expect(result).toHaveLength(0); + }); +}); + +describe('snippets table', () => { + let db: ReturnType['db']; + let docId: string; + + beforeEach(() => { + ({ db } = createTestDb()); + db.insert(repositories).values(makeRepo()).run(); + docId = crypto.randomUUID(); + db.insert(documents) + .values({ + id: docId, + repositoryId: '/test/repo', + filePath: 'README.md', + checksum: 'abc123', + indexedAt: now + }) + .run(); + }); + + it('inserts a code snippet', () => { + const snippetId = crypto.randomUUID(); + db.insert(snippets) + .values({ + id: snippetId, + documentId: docId, + repositoryId: '/test/repo', + type: 'code', + content: 'console.log("hello")', + language: 'javascript', + createdAt: now + }) + .run(); + + const result = db.select().from(snippets).all(); + expect(result).toHaveLength(1); + expect(result[0].type).toBe('code'); + expect(result[0].language).toBe('javascript'); + }); + + it('inserts an info snippet', () => { + const snippetId = crypto.randomUUID(); + db.insert(snippets) + .values({ + id: snippetId, + documentId: docId, + repositoryId: '/test/repo', + type: 'info', + content: 'This is documentation text.', + breadcrumb: 'Intro > Overview', + createdAt: now + }) + .run(); + + const result = db.select().from(snippets).all(); + expect(result[0].breadcrumb).toBe('Intro > Overview'); + }); + + it('cascades delete when document is deleted', () => { + db.insert(snippets) + .values({ + id: crypto.randomUUID(), + documentId: docId, + repositoryId: '/test/repo', + type: 'info', + content: 'Some content.', + createdAt: now + }) + .run(); + + db.delete(documents).where(eq(documents.id, docId)).run(); + + const result = db.select().from(snippets).all(); + expect(result).toHaveLength(0); + }); +}); + +describe('snippet_embeddings table', () => { + let db: ReturnType['db']; + let snippetId: string; + + beforeEach(() => { + ({ db } = createTestDb()); + db.insert(repositories).values(makeRepo()).run(); + const docId = crypto.randomUUID(); + db.insert(documents) + .values({ + id: docId, + repositoryId: '/test/repo', + filePath: 'README.md', + checksum: 'abc123', + indexedAt: now + }) + .run(); + snippetId = crypto.randomUUID(); + db.insert(snippets) + .values({ + id: snippetId, + documentId: docId, + repositoryId: '/test/repo', + type: 'info', + content: 'hello world', + createdAt: now + }) + .run(); + }); + + it('stores a Float32Array embedding as blob', () => { + const vec = new Float32Array([0.1, 0.2, 0.3, 0.4]); + const buf = Buffer.from(vec.buffer); + + db.insert(snippetEmbeddings) + .values({ + snippetId, + model: 'text-embedding-3-small', + dimensions: 4, + embedding: buf, + createdAt: now + }) + .run(); + + const result = db.select().from(snippetEmbeddings).all(); + expect(result).toHaveLength(1); + expect(result[0].model).toBe('text-embedding-3-small'); + expect(result[0].dimensions).toBe(4); + + const retrieved = new Float32Array( + (result[0].embedding as Buffer).buffer, + (result[0].embedding as Buffer).byteOffset, + (result[0].embedding as Buffer).byteLength / 4 + ); + // Float32Array has ~7 decimal digits of precision; use toBeCloseTo. + expect(retrieved[0]).toBeCloseTo(0.1, 5); + expect(retrieved[1]).toBeCloseTo(0.2, 5); + expect(retrieved[2]).toBeCloseTo(0.3, 5); + expect(retrieved[3]).toBeCloseTo(0.4, 5); + }); + + it('cascades delete when snippet is deleted', () => { + const vec = new Float32Array([1, 2]); + db.insert(snippetEmbeddings) + .values({ + snippetId, + model: 'test-model', + dimensions: 2, + embedding: Buffer.from(vec.buffer), + createdAt: now + }) + .run(); + + db.delete(snippets).where(eq(snippets.id, snippetId)).run(); + + const result = db.select().from(snippetEmbeddings).all(); + expect(result).toHaveLength(0); + }); +}); + +describe('indexing_jobs table', () => { + let db: ReturnType['db']; + + beforeEach(() => { + ({ db } = createTestDb()); + db.insert(repositories).values(makeRepo()).run(); + }); + + it('creates a job with default queued status', () => { + db.insert(indexingJobs) + .values({ + id: crypto.randomUUID(), + repositoryId: '/test/repo', + createdAt: now + }) + .run(); + + const result = db.select().from(indexingJobs).all(); + expect(result[0].status).toBe('queued'); + expect(result[0].progress).toBe(0); + expect(result[0].totalFiles).toBe(0); + expect(result[0].processedFiles).toBe(0); + }); + + it('supports all status enum values', () => { + const statuses = ['queued', 'running', 'done', 'failed'] as const; + for (const status of statuses) { + db.insert(indexingJobs) + .values({ + id: crypto.randomUUID(), + repositoryId: '/test/repo', + status, + createdAt: now + }) + .run(); + } + const results = db.select().from(indexingJobs).all(); + expect(results.map((r) => r.status).sort()).toEqual([...statuses].sort()); + }); +}); + +describe('repository_configs table', () => { + let db: ReturnType['db']; + + beforeEach(() => { + ({ db } = createTestDb()); + db.insert(repositories).values(makeRepo()).run(); + }); + + it('stores JSON array fields correctly', () => { + db.insert(repositoryConfigs) + .values({ + repositoryId: '/test/repo', + projectTitle: 'My SDK', + folders: ['docs', 'src'], + excludeFolders: ['node_modules', '.git'], + excludeFiles: ['*.test.ts'], + rules: ['Always use TypeScript'], + previousVersions: [{ tag: 'v1.0.0', title: 'Version 1' }], + updatedAt: now + }) + .run(); + + const result = db.select().from(repositoryConfigs).all(); + expect(result).toHaveLength(1); + expect(result[0].folders).toEqual(['docs', 'src']); + expect(result[0].excludeFolders).toEqual(['node_modules', '.git']); + expect(result[0].rules).toEqual(['Always use TypeScript']); + expect(result[0].previousVersions).toEqual([{ tag: 'v1.0.0', title: 'Version 1' }]); + }); +}); + +describe('settings table', () => { + let db: ReturnType['db']; + + beforeEach(() => { + ({ db } = createTestDb()); + }); + + it('stores and retrieves key-value settings', () => { + db.insert(settings) + .values({ key: 'embeddingProvider', value: { provider: 'openai' }, updatedAt: now }) + .run(); + + const result = db.select().from(settings).all(); + expect(result).toHaveLength(1); + expect(result[0].key).toBe('embeddingProvider'); + expect(result[0].value).toEqual({ provider: 'openai' }); + }); +}); + +describe('FTS5 virtual table (snippets_fts)', () => { + let db: ReturnType['db']; + let client: Database.Database; + + beforeEach(() => { + ({ db, client } = createTestDb()); + db.insert(repositories).values(makeRepo()).run(); + const docId = crypto.randomUUID(); + db.insert(documents) + .values({ + id: docId, + repositoryId: '/test/repo', + filePath: 'README.md', + checksum: 'abc', + indexedAt: now + }) + .run(); + db.insert(snippets) + .values({ + id: crypto.randomUUID(), + documentId: docId, + repositoryId: '/test/repo', + type: 'info', + content: 'The quick brown fox jumps over the lazy dog', + title: 'Fox story', + breadcrumb: 'Animals > Foxes', + createdAt: now + }) + .run(); + }); + + it('FTS table exists and is queryable', () => { + const result = client + .prepare(`SELECT rowid FROM snippets_fts WHERE snippets_fts MATCH 'fox'`) + .all(); + expect(result.length).toBeGreaterThan(0); + }); + + it('insert trigger keeps FTS in sync', () => { + const result = client + .prepare(`SELECT rowid FROM snippets_fts WHERE snippets_fts MATCH 'quick'`) + .all(); + expect(result.length).toBe(1); + }); + + it('delete trigger removes entry from FTS', () => { + db.delete(snippets).run(); + + const result = client + .prepare(`SELECT rowid FROM snippets_fts WHERE snippets_fts MATCH 'quick'`) + .all(); + expect(result.length).toBe(0); + }); +}); diff --git a/src/lib/server/db/schema.ts b/src/lib/server/db/schema.ts index 7445bc5..5581c4a 100644 --- a/src/lib/server/db/schema.ts +++ b/src/lib/server/db/schema.ts @@ -1,9 +1,177 @@ -import { integer, sqliteTable, text } from 'drizzle-orm/sqlite-core'; +import { blob, integer, real, sqliteTable, text } from 'drizzle-orm/sqlite-core'; -export const task = sqliteTable('task', { - id: text('id') - .primaryKey() - .$defaultFn(() => crypto.randomUUID()), +// --------------------------------------------------------------------------- +// repositories +// --------------------------------------------------------------------------- +export const repositories = sqliteTable('repositories', { + id: text('id').primaryKey(), // e.g. "/facebook/react" or "/local/my-sdk" title: text('title').notNull(), - priority: integer('priority').notNull().default(1) + description: text('description'), + source: text('source', { enum: ['github', 'local'] }).notNull(), + sourceUrl: text('source_url').notNull(), // GitHub URL or absolute local path + branch: text('branch').default('main'), + state: text('state', { + enum: ['pending', 'indexing', 'indexed', 'error'] + }) + .notNull() + .default('pending'), + totalSnippets: integer('total_snippets').default(0), + totalTokens: integer('total_tokens').default(0), + trustScore: real('trust_score').default(0), // 0.0–10.0 + benchmarkScore: real('benchmark_score').default(0), // 0.0–100.0; reserved for future quality metrics + stars: integer('stars'), + // TODO: encrypt at rest in production; stored as plaintext for v1 + githubToken: text('github_token'), + lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }), + createdAt: integer('created_at', { mode: 'timestamp' }).notNull(), + updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull() }); + +// --------------------------------------------------------------------------- +// repository_versions +// --------------------------------------------------------------------------- +export const repositoryVersions = sqliteTable('repository_versions', { + id: text('id').primaryKey(), // e.g. "/facebook/react/v18.3.0" + repositoryId: text('repository_id') + .notNull() + .references(() => repositories.id, { onDelete: 'cascade' }), + tag: text('tag').notNull(), // git tag or branch name + title: text('title'), + state: text('state', { + enum: ['pending', 'indexing', 'indexed', 'error'] + }) + .notNull() + .default('pending'), + totalSnippets: integer('total_snippets').default(0), + indexedAt: integer('indexed_at', { mode: 'timestamp' }), + createdAt: integer('created_at', { mode: 'timestamp' }).notNull() +}); + +// --------------------------------------------------------------------------- +// documents +// --------------------------------------------------------------------------- +export const documents = sqliteTable('documents', { + id: text('id').primaryKey(), // UUID + repositoryId: text('repository_id') + .notNull() + .references(() => repositories.id, { onDelete: 'cascade' }), + versionId: text('version_id').references(() => repositoryVersions.id, { onDelete: 'cascade' }), + filePath: text('file_path').notNull(), // relative path within repo + title: text('title'), + language: text('language'), // e.g. "typescript", "markdown" + tokenCount: integer('token_count').default(0), + checksum: text('checksum').notNull(), // SHA-256 of file content + indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull() +}); + +// --------------------------------------------------------------------------- +// snippets +// --------------------------------------------------------------------------- +export const snippets = sqliteTable('snippets', { + id: text('id').primaryKey(), // UUID + documentId: text('document_id') + .notNull() + .references(() => documents.id, { onDelete: 'cascade' }), + repositoryId: text('repository_id') + .notNull() + .references(() => repositories.id, { onDelete: 'cascade' }), + versionId: text('version_id').references(() => repositoryVersions.id, { onDelete: 'cascade' }), + type: text('type', { enum: ['code', 'info'] }).notNull(), + title: text('title'), + content: text('content').notNull(), // searchable text / code + language: text('language'), + breadcrumb: text('breadcrumb'), // e.g. "Installation > Getting Started" + tokenCount: integer('token_count').default(0), + createdAt: integer('created_at', { mode: 'timestamp' }).notNull() +}); + +// --------------------------------------------------------------------------- +// snippet_embeddings +// --------------------------------------------------------------------------- +export const snippetEmbeddings = sqliteTable('snippet_embeddings', { + snippetId: text('snippet_id') + .primaryKey() + .references(() => snippets.id, { onDelete: 'cascade' }), + model: text('model').notNull(), // embedding model identifier + dimensions: integer('dimensions').notNull(), + embedding: blob('embedding').notNull(), // Float32Array as binary blob + createdAt: integer('created_at', { mode: 'timestamp' }).notNull() +}); + +// --------------------------------------------------------------------------- +// indexing_jobs +// --------------------------------------------------------------------------- +export const indexingJobs = sqliteTable('indexing_jobs', { + id: text('id').primaryKey(), // UUID + repositoryId: text('repository_id') + .notNull() + .references(() => repositories.id, { onDelete: 'cascade' }), + versionId: text('version_id'), + status: text('status', { + enum: ['queued', 'running', 'done', 'failed'] + }) + .notNull() + .default('queued'), + progress: integer('progress').default(0), // 0–100 + totalFiles: integer('total_files').default(0), + processedFiles: integer('processed_files').default(0), + error: text('error'), + startedAt: integer('started_at', { mode: 'timestamp' }), + completedAt: integer('completed_at', { mode: 'timestamp' }), + createdAt: integer('created_at', { mode: 'timestamp' }).notNull() +}); + +// --------------------------------------------------------------------------- +// repository_configs +// --------------------------------------------------------------------------- +export const repositoryConfigs = sqliteTable('repository_configs', { + repositoryId: text('repository_id') + .primaryKey() + .references(() => repositories.id, { onDelete: 'cascade' }), + projectTitle: text('project_title'), + description: text('description'), + folders: text('folders', { mode: 'json' }).$type(), + excludeFolders: text('exclude_folders', { mode: 'json' }).$type(), + excludeFiles: text('exclude_files', { mode: 'json' }).$type(), + rules: text('rules', { mode: 'json' }).$type(), + previousVersions: text('previous_versions', { mode: 'json' }).$type< + { tag: string; title: string }[] + >(), + updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull() +}); + +// --------------------------------------------------------------------------- +// settings +// --------------------------------------------------------------------------- +export const settings = sqliteTable('settings', { + key: text('key').primaryKey(), + value: text('value', { mode: 'json' }), + updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull() +}); + +// --------------------------------------------------------------------------- +// Inferred TypeScript types +// --------------------------------------------------------------------------- +export type Repository = typeof repositories.$inferSelect; +export type NewRepository = typeof repositories.$inferInsert; + +export type RepositoryVersion = typeof repositoryVersions.$inferSelect; +export type NewRepositoryVersion = typeof repositoryVersions.$inferInsert; + +export type Document = typeof documents.$inferSelect; +export type NewDocument = typeof documents.$inferInsert; + +export type Snippet = typeof snippets.$inferSelect; +export type NewSnippet = typeof snippets.$inferInsert; + +export type SnippetEmbedding = typeof snippetEmbeddings.$inferSelect; +export type NewSnippetEmbedding = typeof snippetEmbeddings.$inferInsert; + +export type IndexingJob = typeof indexingJobs.$inferSelect; +export type NewIndexingJob = typeof indexingJobs.$inferInsert; + +export type RepositoryConfig = typeof repositoryConfigs.$inferSelect; +export type NewRepositoryConfig = typeof repositoryConfigs.$inferInsert; + +export type Settings = typeof settings.$inferSelect; +export type NewSettings = typeof settings.$inferInsert; diff --git a/src/lib/types.ts b/src/lib/types.ts new file mode 100644 index 0000000..c41d8f3 --- /dev/null +++ b/src/lib/types.ts @@ -0,0 +1,96 @@ +/** + * Shared domain types for TrueRef. + * These re-export and extend the Drizzle-inferred schema types with + * application-level conveniences. + */ + +export type { + Repository, + NewRepository, + RepositoryVersion, + NewRepositoryVersion, + Document, + NewDocument, + Snippet, + NewSnippet, + SnippetEmbedding, + NewSnippetEmbedding, + IndexingJob, + NewIndexingJob, + RepositoryConfig, + NewRepositoryConfig, + Settings, + NewSettings +} from './server/db/schema'; + +// --------------------------------------------------------------------------- +// Application-level union types (narrower than raw DB enums) +// --------------------------------------------------------------------------- + +export type RepositorySource = 'github' | 'local'; +export type RepositoryState = 'pending' | 'indexing' | 'indexed' | 'error'; +export type SnippetType = 'code' | 'info'; +export type JobStatus = 'queued' | 'running' | 'done' | 'failed'; +export type VersionState = 'pending' | 'indexing' | 'indexed' | 'error'; + +// --------------------------------------------------------------------------- +// API / service layer types +// --------------------------------------------------------------------------- + +/** Payload accepted by the repository management service when adding a repo. */ +export interface AddRepositoryInput { + id: string; + title: string; + description?: string; + source: RepositorySource; + sourceUrl: string; + branch?: string; + githubToken?: string; +} + +/** Lightweight repository summary returned in list endpoints. */ +export interface RepositorySummary { + id: string; + title: string; + description: string | null; + source: RepositorySource; + state: RepositoryState; + totalSnippets: number; + totalTokens: number; + trustScore: number; + stars: number | null; + lastIndexedAt: Date | null; +} + +/** Snippet returned from search results. */ +export interface SearchResultSnippet { + id: string; + repositoryId: string; + documentId: string; + type: SnippetType; + title: string | null; + content: string; + language: string | null; + breadcrumb: string | null; + tokenCount: number; + score?: number; +} + +/** Search request parameters. */ +export interface SearchQuery { + query: string; + libraryId?: string; + type?: SnippetType; + limit?: number; +} + +/** Parsed trueref.json / context7.json configuration. */ +export interface TrueRefConfig { + projectTitle?: string; + description?: string; + folders?: string[]; + excludeFolders?: string[]; + excludeFiles?: string[]; + rules?: string[]; + previousVersions?: Array<{ tag: string; title: string }>; +}