9.2 KiB
TRUEREF-0001 — Database Schema & Core Data Models
Priority: P0 Status: Pending Depends On: — Blocks: All other features
Overview
Define and implement the complete SQLite database schema using Drizzle ORM. This is the foundation of all data persistence in TrueRef. Every other feature depends on these tables and types being in place.
Acceptance Criteria
- All tables defined in
src/lib/server/db/schema.tsusing Drizzle ORM syntax - All TypeScript types exported and usable across the codebase
- Migration generated via
drizzle-kit generateand applied viadrizzle-kit migrate - Schema validates via
drizzle-kit pushin dev mode - Unit tests covering insertions, queries, and foreign key constraints
Schema Specification
Table: repositories
Represents an indexed library source (GitHub repo or local directory).
export const repositories = sqliteTable('repositories', {
id: text('id').primaryKey(), // e.g. "/facebook/react" or "/local/my-sdk"
title: text('title').notNull(),
description: text('description'),
source: text('source', { enum: ['github', 'local'] }).notNull(),
sourceUrl: text('source_url').notNull(), // GitHub URL or absolute local path
branch: text('branch').default('main'),
state: text('state', {
enum: ['pending', 'indexing', 'indexed', 'error']
})
.notNull()
.default('pending'),
totalSnippets: integer('total_snippets').default(0),
totalTokens: integer('total_tokens').default(0),
trustScore: real('trust_score').default(0), // 0.0–10.0
benchmarkScore: real('benchmark_score').default(0), // 0.0–100.0
stars: integer('stars'),
githubToken: text('github_token'), // encrypted PAT for private repos
lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull()
});
Table: repository_versions
Tracks indexed git tags/branches beyond the default branch.
export const repositoryVersions = sqliteTable('repository_versions', {
id: text('id').primaryKey(), // e.g. "/facebook/react/v18.3.0"
repositoryId: text('repository_id')
.notNull()
.references(() => repositories.id, { onDelete: 'cascade' }),
tag: text('tag').notNull(), // git tag or branch name
title: text('title'),
state: text('state', {
enum: ['pending', 'indexing', 'indexed', 'error']
})
.notNull()
.default('pending'),
totalSnippets: integer('total_snippets').default(0),
indexedAt: integer('indexed_at', { mode: 'timestamp' }),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
});
Table: documents
A parsed source file within a repository.
export const documents = sqliteTable('documents', {
id: text('id').primaryKey(), // UUID
repositoryId: text('repository_id')
.notNull()
.references(() => repositories.id, { onDelete: 'cascade' }),
versionId: text('version_id').references(() => repositoryVersions.id, { onDelete: 'cascade' }),
filePath: text('file_path').notNull(), // relative path within repo
title: text('title'),
language: text('language'), // e.g. "typescript", "markdown"
tokenCount: integer('token_count').default(0),
checksum: text('checksum').notNull(), // SHA-256 of file content
indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull()
});
Table: snippets
An indexed chunk of content, the atomic unit of search.
export const snippets = sqliteTable('snippets', {
id: text('id').primaryKey(), // UUID
documentId: text('document_id')
.notNull()
.references(() => documents.id, { onDelete: 'cascade' }),
repositoryId: text('repository_id')
.notNull()
.references(() => repositories.id, { onDelete: 'cascade' }),
versionId: text('version_id').references(() => repositoryVersions.id, { onDelete: 'cascade' }),
type: text('type', { enum: ['code', 'info'] }).notNull(),
title: text('title'),
content: text('content').notNull(), // searchable text / code
language: text('language'),
breadcrumb: text('breadcrumb'), // e.g. "Installation > Getting Started"
tokenCount: integer('token_count').default(0),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
});
Table: snippet_embeddings
Stores vector embeddings separately to keep snippets table lean.
export const snippetEmbeddings = sqliteTable('snippet_embeddings', {
snippetId: text('snippet_id')
.primaryKey()
.references(() => snippets.id, { onDelete: 'cascade' }),
model: text('model').notNull(), // embedding model identifier
dimensions: integer('dimensions').notNull(),
embedding: blob('embedding').notNull(), // Float32Array as binary blob
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
});
Table: indexing_jobs
Tracks asynchronous indexing operations.
export const indexingJobs = sqliteTable('indexing_jobs', {
id: text('id').primaryKey(), // UUID
repositoryId: text('repository_id')
.notNull()
.references(() => repositories.id, { onDelete: 'cascade' }),
versionId: text('version_id'),
status: text('status', {
enum: ['queued', 'running', 'done', 'failed']
})
.notNull()
.default('queued'),
progress: integer('progress').default(0), // 0–100
totalFiles: integer('total_files').default(0),
processedFiles: integer('processed_files').default(0),
error: text('error'),
startedAt: integer('started_at', { mode: 'timestamp' }),
completedAt: integer('completed_at', { mode: 'timestamp' }),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
});
Table: repository_configs
Stores parsed trueref.json / context7.json configuration.
export const repositoryConfigs = sqliteTable('repository_configs', {
repositoryId: text('repository_id')
.primaryKey()
.references(() => repositories.id, { onDelete: 'cascade' }),
projectTitle: text('project_title'),
description: text('description'),
folders: text('folders', { mode: 'json' }).$type<string[]>(),
excludeFolders: text('exclude_folders', { mode: 'json' }).$type<string[]>(),
excludeFiles: text('exclude_files', { mode: 'json' }).$type<string[]>(),
rules: text('rules', { mode: 'json' }).$type<string[]>(),
previousVersions: text('previous_versions', { mode: 'json' }).$type<
{ tag: string; title: string }[]
>(),
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull()
});
Table: settings
Key-value store for global application settings.
export const settings = sqliteTable('settings', {
key: text('key').primaryKey(),
value: text('value', { mode: 'json' }),
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull()
});
FTS5 Virtual Tables
Full-text search indexes created via raw SQL (not Drizzle, which doesn't support FTS5 DDL):
-- Content-based FTS5 table pointing to snippets
CREATE VIRTUAL TABLE IF NOT EXISTS snippets_fts USING fts5(
content,
title,
breadcrumb,
content='snippets',
content_rowid='rowid',
tokenize='porter unicode61'
);
-- Triggers to keep FTS in sync
CREATE TRIGGER snippets_ai AFTER INSERT ON snippets BEGIN
INSERT INTO snippets_fts(rowid, content, title, breadcrumb)
VALUES (new.rowid, new.content, new.title, new.breadcrumb);
END;
CREATE TRIGGER snippets_ad AFTER DELETE ON snippets BEGIN
INSERT INTO snippets_fts(snippets_fts, rowid, content, title, breadcrumb)
VALUES ('delete', old.rowid, old.content, old.title, old.breadcrumb);
END;
CREATE TRIGGER snippets_au AFTER UPDATE ON snippets BEGIN
INSERT INTO snippets_fts(snippets_fts, rowid, content, title, breadcrumb)
VALUES ('delete', old.rowid, old.content, old.title, old.breadcrumb);
INSERT INTO snippets_fts(rowid, content, title, breadcrumb)
VALUES (new.rowid, new.content, new.title, new.breadcrumb);
END;
TypeScript Types
Export inferred types alongside the schema:
export type Repository = typeof repositories.$inferSelect;
export type NewRepository = typeof repositories.$inferInsert;
export type Document = typeof documents.$inferSelect;
export type NewDocument = typeof documents.$inferInsert;
export type Snippet = typeof snippets.$inferSelect;
export type NewSnippet = typeof snippets.$inferInsert;
export type IndexingJob = typeof indexingJobs.$inferSelect;
export type NewIndexingJob = typeof indexingJobs.$inferInsert;
export type RepositoryConfig = typeof repositoryConfigs.$inferSelect;
Implementation Notes
- Use
crypto.randomUUID()for all UUID primary keys. trustScoreis computed from: stars (normalized), snippet count, successful indexing history.benchmarkScoreis reserved for future quality metrics; default to 0.- The
githubTokenfield should be encrypted at rest in production; for v1 store as plaintext with a TODO comment. - FTS5 triggers must be created in the initial migration SQL file alongside the Drizzle-generated schema.
- Database initialization should happen in
src/lib/server/db/index.ts, running migrations on startup.
Files to Create/Modify
src/lib/server/db/schema.ts— complete schema definitionsrc/lib/server/db/index.ts— database connection + migration runnersrc/lib/server/db/migrations/— generated migration filessrc/lib/server/db/fts.sql— raw SQL for FTS5 virtual tables and triggerssrc/lib/types.ts— shared domain types