chore: initial project scaffold
This commit is contained in:
261
docs/features/TRUEREF-0001.md
Normal file
261
docs/features/TRUEREF-0001.md
Normal file
@@ -0,0 +1,261 @@
|
||||
# TRUEREF-0001 — Database Schema & Core Data Models
|
||||
|
||||
**Priority:** P0
|
||||
**Status:** Pending
|
||||
**Depends On:** —
|
||||
**Blocks:** All other features
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Define and implement the complete SQLite database schema using Drizzle ORM. This is the foundation of all data persistence in TrueRef. Every other feature depends on these tables and types being in place.
|
||||
|
||||
---
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] All tables defined in `src/lib/server/db/schema.ts` using Drizzle ORM syntax
|
||||
- [ ] All TypeScript types exported and usable across the codebase
|
||||
- [ ] Migration generated via `drizzle-kit generate` and applied via `drizzle-kit migrate`
|
||||
- [ ] Schema validates via `drizzle-kit push` in dev mode
|
||||
- [ ] Unit tests covering insertions, queries, and foreign key constraints
|
||||
|
||||
---
|
||||
|
||||
## Schema Specification
|
||||
|
||||
### Table: `repositories`
|
||||
|
||||
Represents an indexed library source (GitHub repo or local directory).
|
||||
|
||||
```typescript
|
||||
export const repositories = sqliteTable('repositories', {
|
||||
id: text('id').primaryKey(), // e.g. "/facebook/react" or "/local/my-sdk"
|
||||
title: text('title').notNull(),
|
||||
description: text('description'),
|
||||
source: text('source', { enum: ['github', 'local'] }).notNull(),
|
||||
sourceUrl: text('source_url').notNull(), // GitHub URL or absolute local path
|
||||
branch: text('branch').default('main'),
|
||||
state: text('state', {
|
||||
enum: ['pending', 'indexing', 'indexed', 'error']
|
||||
}).notNull().default('pending'),
|
||||
totalSnippets: integer('total_snippets').default(0),
|
||||
totalTokens: integer('total_tokens').default(0),
|
||||
trustScore: real('trust_score').default(0), // 0.0–10.0
|
||||
benchmarkScore: real('benchmark_score').default(0), // 0.0–100.0
|
||||
stars: integer('stars'),
|
||||
githubToken: text('github_token'), // encrypted PAT for private repos
|
||||
lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }),
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
|
||||
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull(),
|
||||
});
|
||||
```
|
||||
|
||||
### Table: `repository_versions`
|
||||
|
||||
Tracks indexed git tags/branches beyond the default branch.
|
||||
|
||||
```typescript
|
||||
export const repositoryVersions = sqliteTable('repository_versions', {
|
||||
id: text('id').primaryKey(), // e.g. "/facebook/react/v18.3.0"
|
||||
repositoryId: text('repository_id').notNull()
|
||||
.references(() => repositories.id, { onDelete: 'cascade' }),
|
||||
tag: text('tag').notNull(), // git tag or branch name
|
||||
title: text('title'),
|
||||
state: text('state', {
|
||||
enum: ['pending', 'indexing', 'indexed', 'error']
|
||||
}).notNull().default('pending'),
|
||||
totalSnippets: integer('total_snippets').default(0),
|
||||
indexedAt: integer('indexed_at', { mode: 'timestamp' }),
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
|
||||
});
|
||||
```
|
||||
|
||||
### Table: `documents`
|
||||
|
||||
A parsed source file within a repository.
|
||||
|
||||
```typescript
|
||||
export const documents = sqliteTable('documents', {
|
||||
id: text('id').primaryKey(), // UUID
|
||||
repositoryId: text('repository_id').notNull()
|
||||
.references(() => repositories.id, { onDelete: 'cascade' }),
|
||||
versionId: text('version_id')
|
||||
.references(() => repositoryVersions.id, { onDelete: 'cascade' }),
|
||||
filePath: text('file_path').notNull(), // relative path within repo
|
||||
title: text('title'),
|
||||
language: text('language'), // e.g. "typescript", "markdown"
|
||||
tokenCount: integer('token_count').default(0),
|
||||
checksum: text('checksum').notNull(), // SHA-256 of file content
|
||||
indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull(),
|
||||
});
|
||||
```
|
||||
|
||||
### Table: `snippets`
|
||||
|
||||
An indexed chunk of content, the atomic unit of search.
|
||||
|
||||
```typescript
|
||||
export const snippets = sqliteTable('snippets', {
|
||||
id: text('id').primaryKey(), // UUID
|
||||
documentId: text('document_id').notNull()
|
||||
.references(() => documents.id, { onDelete: 'cascade' }),
|
||||
repositoryId: text('repository_id').notNull()
|
||||
.references(() => repositories.id, { onDelete: 'cascade' }),
|
||||
versionId: text('version_id')
|
||||
.references(() => repositoryVersions.id, { onDelete: 'cascade' }),
|
||||
type: text('type', { enum: ['code', 'info'] }).notNull(),
|
||||
title: text('title'),
|
||||
content: text('content').notNull(), // searchable text / code
|
||||
language: text('language'),
|
||||
breadcrumb: text('breadcrumb'), // e.g. "Installation > Getting Started"
|
||||
tokenCount: integer('token_count').default(0),
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
|
||||
});
|
||||
```
|
||||
|
||||
### Table: `snippet_embeddings`
|
||||
|
||||
Stores vector embeddings separately to keep snippets table lean.
|
||||
|
||||
```typescript
|
||||
export const snippetEmbeddings = sqliteTable('snippet_embeddings', {
|
||||
snippetId: text('snippet_id').primaryKey()
|
||||
.references(() => snippets.id, { onDelete: 'cascade' }),
|
||||
model: text('model').notNull(), // embedding model identifier
|
||||
dimensions: integer('dimensions').notNull(),
|
||||
embedding: blob('embedding').notNull(), // Float32Array as binary blob
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
|
||||
});
|
||||
```
|
||||
|
||||
### Table: `indexing_jobs`
|
||||
|
||||
Tracks asynchronous indexing operations.
|
||||
|
||||
```typescript
|
||||
export const indexingJobs = sqliteTable('indexing_jobs', {
|
||||
id: text('id').primaryKey(), // UUID
|
||||
repositoryId: text('repository_id').notNull()
|
||||
.references(() => repositories.id, { onDelete: 'cascade' }),
|
||||
versionId: text('version_id'),
|
||||
status: text('status', {
|
||||
enum: ['queued', 'running', 'done', 'failed']
|
||||
}).notNull().default('queued'),
|
||||
progress: integer('progress').default(0), // 0–100
|
||||
totalFiles: integer('total_files').default(0),
|
||||
processedFiles: integer('processed_files').default(0),
|
||||
error: text('error'),
|
||||
startedAt: integer('started_at', { mode: 'timestamp' }),
|
||||
completedAt: integer('completed_at', { mode: 'timestamp' }),
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
|
||||
});
|
||||
```
|
||||
|
||||
### Table: `repository_configs`
|
||||
|
||||
Stores parsed `trueref.json` / `context7.json` configuration.
|
||||
|
||||
```typescript
|
||||
export const repositoryConfigs = sqliteTable('repository_configs', {
|
||||
repositoryId: text('repository_id').primaryKey()
|
||||
.references(() => repositories.id, { onDelete: 'cascade' }),
|
||||
projectTitle: text('project_title'),
|
||||
description: text('description'),
|
||||
folders: text('folders', { mode: 'json' }).$type<string[]>(),
|
||||
excludeFolders: text('exclude_folders', { mode: 'json' }).$type<string[]>(),
|
||||
excludeFiles: text('exclude_files', { mode: 'json' }).$type<string[]>(),
|
||||
rules: text('rules', { mode: 'json' }).$type<string[]>(),
|
||||
previousVersions: text('previous_versions', { mode: 'json' })
|
||||
.$type<{ tag: string; title: string }[]>(),
|
||||
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull(),
|
||||
});
|
||||
```
|
||||
|
||||
### Table: `settings`
|
||||
|
||||
Key-value store for global application settings.
|
||||
|
||||
```typescript
|
||||
export const settings = sqliteTable('settings', {
|
||||
key: text('key').primaryKey(),
|
||||
value: text('value', { mode: 'json' }),
|
||||
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull(),
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## FTS5 Virtual Tables
|
||||
|
||||
Full-text search indexes created via raw SQL (not Drizzle, which doesn't support FTS5 DDL):
|
||||
|
||||
```sql
|
||||
-- Content-based FTS5 table pointing to snippets
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS snippets_fts USING fts5(
|
||||
content,
|
||||
title,
|
||||
breadcrumb,
|
||||
content='snippets',
|
||||
content_rowid='rowid',
|
||||
tokenize='porter unicode61'
|
||||
);
|
||||
|
||||
-- Triggers to keep FTS in sync
|
||||
CREATE TRIGGER snippets_ai AFTER INSERT ON snippets BEGIN
|
||||
INSERT INTO snippets_fts(rowid, content, title, breadcrumb)
|
||||
VALUES (new.rowid, new.content, new.title, new.breadcrumb);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER snippets_ad AFTER DELETE ON snippets BEGIN
|
||||
INSERT INTO snippets_fts(snippets_fts, rowid, content, title, breadcrumb)
|
||||
VALUES ('delete', old.rowid, old.content, old.title, old.breadcrumb);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER snippets_au AFTER UPDATE ON snippets BEGIN
|
||||
INSERT INTO snippets_fts(snippets_fts, rowid, content, title, breadcrumb)
|
||||
VALUES ('delete', old.rowid, old.content, old.title, old.breadcrumb);
|
||||
INSERT INTO snippets_fts(rowid, content, title, breadcrumb)
|
||||
VALUES (new.rowid, new.content, new.title, new.breadcrumb);
|
||||
END;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## TypeScript Types
|
||||
|
||||
Export inferred types alongside the schema:
|
||||
|
||||
```typescript
|
||||
export type Repository = typeof repositories.$inferSelect;
|
||||
export type NewRepository = typeof repositories.$inferInsert;
|
||||
export type Document = typeof documents.$inferSelect;
|
||||
export type NewDocument = typeof documents.$inferInsert;
|
||||
export type Snippet = typeof snippets.$inferSelect;
|
||||
export type NewSnippet = typeof snippets.$inferInsert;
|
||||
export type IndexingJob = typeof indexingJobs.$inferSelect;
|
||||
export type NewIndexingJob = typeof indexingJobs.$inferInsert;
|
||||
export type RepositoryConfig = typeof repositoryConfigs.$inferSelect;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
- Use `crypto.randomUUID()` for all UUID primary keys.
|
||||
- `trustScore` is computed from: stars (normalized), snippet count, successful indexing history.
|
||||
- `benchmarkScore` is reserved for future quality metrics; default to 0.
|
||||
- The `githubToken` field should be encrypted at rest in production; for v1 store as plaintext with a TODO comment.
|
||||
- FTS5 triggers must be created in the initial migration SQL file alongside the Drizzle-generated schema.
|
||||
- Database initialization should happen in `src/lib/server/db/index.ts`, running migrations on startup.
|
||||
|
||||
---
|
||||
|
||||
## Files to Create/Modify
|
||||
|
||||
- `src/lib/server/db/schema.ts` — complete schema definition
|
||||
- `src/lib/server/db/index.ts` — database connection + migration runner
|
||||
- `src/lib/server/db/migrations/` — generated migration files
|
||||
- `src/lib/server/db/fts.sql` — raw SQL for FTS5 virtual tables and triggers
|
||||
- `src/lib/types.ts` — shared domain types
|
||||
Reference in New Issue
Block a user