chore: initial project scaffold

2026-03-22 17:08:15 +01:00
commit 18437dfa7c
53 changed files with 12002 additions and 0 deletions
--- a/docs/features/TRUEREF-0001.md
+++ b/docs/features/TRUEREF-0001.md
@@ -0,0 +1,261 @@
+# TRUEREF-0001 — Database Schema & Core Data Models
+
+**Priority:** P0
+**Status:** Pending
+**Depends On:** —
+**Blocks:** All other features
+
+---
+
+## Overview
+
+Define and implement the complete SQLite database schema using Drizzle ORM. This is the foundation of all data persistence in TrueRef. Every other feature depends on these tables and types being in place.
+
+---
+
+## Acceptance Criteria
+
+- [ ] All tables defined in `src/lib/server/db/schema.ts` using Drizzle ORM syntax
+- [ ] All TypeScript types exported and usable across the codebase
+- [ ] Migration generated via `drizzle-kit generate` and applied via `drizzle-kit migrate`
+- [ ] Schema validates via `drizzle-kit push` in dev mode
+- [ ] Unit tests covering insertions, queries, and foreign key constraints
+
+---
+
+## Schema Specification
+
+### Table: `repositories`
+
+Represents an indexed library source (GitHub repo or local directory).
+
+```typescript
+export const repositories = sqliteTable('repositories', {
+  id: text('id').primaryKey(),           // e.g. "/facebook/react" or "/local/my-sdk"
+  title: text('title').notNull(),
+  description: text('description'),
+  source: text('source', { enum: ['github', 'local'] }).notNull(),
+  sourceUrl: text('source_url').notNull(), // GitHub URL or absolute local path
+  branch: text('branch').default('main'),
+  state: text('state', {
+    enum: ['pending', 'indexing', 'indexed', 'error']
+  }).notNull().default('pending'),
+  totalSnippets: integer('total_snippets').default(0),
+  totalTokens: integer('total_tokens').default(0),
+  trustScore: real('trust_score').default(0),      // 0.0–10.0
+  benchmarkScore: real('benchmark_score').default(0), // 0.0–100.0
+  stars: integer('stars'),
+  githubToken: text('github_token'),               // encrypted PAT for private repos
+  lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }),
+  createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
+  updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull(),
+});
+```
+
+### Table: `repository_versions`
+
+Tracks indexed git tags/branches beyond the default branch.
+
+```typescript
+export const repositoryVersions = sqliteTable('repository_versions', {
+  id: text('id').primaryKey(),           // e.g. "/facebook/react/v18.3.0"
+  repositoryId: text('repository_id').notNull()
+    .references(() => repositories.id, { onDelete: 'cascade' }),
+  tag: text('tag').notNull(),            // git tag or branch name
+  title: text('title'),
+  state: text('state', {
+    enum: ['pending', 'indexing', 'indexed', 'error']
+  }).notNull().default('pending'),
+  totalSnippets: integer('total_snippets').default(0),
+  indexedAt: integer('indexed_at', { mode: 'timestamp' }),
+  createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
+});
+```
+
+### Table: `documents`
+
+A parsed source file within a repository.
+
+```typescript
+export const documents = sqliteTable('documents', {
+  id: text('id').primaryKey(),           // UUID
+  repositoryId: text('repository_id').notNull()
+    .references(() => repositories.id, { onDelete: 'cascade' }),
+  versionId: text('version_id')
+    .references(() => repositoryVersions.id, { onDelete: 'cascade' }),
+  filePath: text('file_path').notNull(), // relative path within repo
+  title: text('title'),
+  language: text('language'),            // e.g. "typescript", "markdown"
+  tokenCount: integer('token_count').default(0),
+  checksum: text('checksum').notNull(),  // SHA-256 of file content
+  indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull(),
+});
+```
+
+### Table: `snippets`
+
+An indexed chunk of content, the atomic unit of search.
+
+```typescript
+export const snippets = sqliteTable('snippets', {
+  id: text('id').primaryKey(),           // UUID
+  documentId: text('document_id').notNull()
+    .references(() => documents.id, { onDelete: 'cascade' }),
+  repositoryId: text('repository_id').notNull()
+    .references(() => repositories.id, { onDelete: 'cascade' }),
+  versionId: text('version_id')
+    .references(() => repositoryVersions.id, { onDelete: 'cascade' }),
+  type: text('type', { enum: ['code', 'info'] }).notNull(),
+  title: text('title'),
+  content: text('content').notNull(),    // searchable text / code
+  language: text('language'),
+  breadcrumb: text('breadcrumb'),        // e.g. "Installation > Getting Started"
+  tokenCount: integer('token_count').default(0),
+  createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
+});
+```
+
+### Table: `snippet_embeddings`
+
+Stores vector embeddings separately to keep snippets table lean.
+
+```typescript
+export const snippetEmbeddings = sqliteTable('snippet_embeddings', {
+  snippetId: text('snippet_id').primaryKey()
+    .references(() => snippets.id, { onDelete: 'cascade' }),
+  model: text('model').notNull(),        // embedding model identifier
+  dimensions: integer('dimensions').notNull(),
+  embedding: blob('embedding').notNull(), // Float32Array as binary blob
+  createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
+});
+```
+
+### Table: `indexing_jobs`
+
+Tracks asynchronous indexing operations.
+
+```typescript
+export const indexingJobs = sqliteTable('indexing_jobs', {
+  id: text('id').primaryKey(),           // UUID
+  repositoryId: text('repository_id').notNull()
+    .references(() => repositories.id, { onDelete: 'cascade' }),
+  versionId: text('version_id'),
+  status: text('status', {
+    enum: ['queued', 'running', 'done', 'failed']
+  }).notNull().default('queued'),
+  progress: integer('progress').default(0),    // 0–100
+  totalFiles: integer('total_files').default(0),
+  processedFiles: integer('processed_files').default(0),
+  error: text('error'),
+  startedAt: integer('started_at', { mode: 'timestamp' }),
+  completedAt: integer('completed_at', { mode: 'timestamp' }),
+  createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
+});
+```
+
+### Table: `repository_configs`
+
+Stores parsed `trueref.json` / `context7.json` configuration.
+
+```typescript
+export const repositoryConfigs = sqliteTable('repository_configs', {
+  repositoryId: text('repository_id').primaryKey()
+    .references(() => repositories.id, { onDelete: 'cascade' }),
+  projectTitle: text('project_title'),
+  description: text('description'),
+  folders: text('folders', { mode: 'json' }).$type<string[]>(),
+  excludeFolders: text('exclude_folders', { mode: 'json' }).$type<string[]>(),
+  excludeFiles: text('exclude_files', { mode: 'json' }).$type<string[]>(),
+  rules: text('rules', { mode: 'json' }).$type<string[]>(),
+  previousVersions: text('previous_versions', { mode: 'json' })
+    .$type<{ tag: string; title: string }[]>(),
+  updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull(),
+});
+```
+
+### Table: `settings`
+
+Key-value store for global application settings.
+
+```typescript
+export const settings = sqliteTable('settings', {
+  key: text('key').primaryKey(),
+  value: text('value', { mode: 'json' }),
+  updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull(),
+});
+```
+
+---
+
+## FTS5 Virtual Tables
+
+Full-text search indexes created via raw SQL (not Drizzle, which doesn't support FTS5 DDL):
+
+```sql
+-- Content-based FTS5 table pointing to snippets
+CREATE VIRTUAL TABLE IF NOT EXISTS snippets_fts USING fts5(
+  content,
+  title,
+  breadcrumb,
+  content='snippets',
+  content_rowid='rowid',
+  tokenize='porter unicode61'
+);
+
+-- Triggers to keep FTS in sync
+CREATE TRIGGER snippets_ai AFTER INSERT ON snippets BEGIN
+  INSERT INTO snippets_fts(rowid, content, title, breadcrumb)
+  VALUES (new.rowid, new.content, new.title, new.breadcrumb);
+END;
+
+CREATE TRIGGER snippets_ad AFTER DELETE ON snippets BEGIN
+  INSERT INTO snippets_fts(snippets_fts, rowid, content, title, breadcrumb)
+  VALUES ('delete', old.rowid, old.content, old.title, old.breadcrumb);
+END;
+
+CREATE TRIGGER snippets_au AFTER UPDATE ON snippets BEGIN
+  INSERT INTO snippets_fts(snippets_fts, rowid, content, title, breadcrumb)
+  VALUES ('delete', old.rowid, old.content, old.title, old.breadcrumb);
+  INSERT INTO snippets_fts(rowid, content, title, breadcrumb)
+  VALUES (new.rowid, new.content, new.title, new.breadcrumb);
+END;
+```
+
+---
+
+## TypeScript Types
+
+Export inferred types alongside the schema:
+
+```typescript
+export type Repository = typeof repositories.$inferSelect;
+export type NewRepository = typeof repositories.$inferInsert;
+export type Document = typeof documents.$inferSelect;
+export type NewDocument = typeof documents.$inferInsert;
+export type Snippet = typeof snippets.$inferSelect;
+export type NewSnippet = typeof snippets.$inferInsert;
+export type IndexingJob = typeof indexingJobs.$inferSelect;
+export type NewIndexingJob = typeof indexingJobs.$inferInsert;
+export type RepositoryConfig = typeof repositoryConfigs.$inferSelect;
+```
+
+---
+
+## Implementation Notes
+
+- Use `crypto.randomUUID()` for all UUID primary keys.
+- `trustScore` is computed from: stars (normalized), snippet count, successful indexing history.
+- `benchmarkScore` is reserved for future quality metrics; default to 0.
+- The `githubToken` field should be encrypted at rest in production; for v1 store as plaintext with a TODO comment.
+- FTS5 triggers must be created in the initial migration SQL file alongside the Drizzle-generated schema.
+- Database initialization should happen in `src/lib/server/db/index.ts`, running migrations on startup.
+
+---
+
+## Files to Create/Modify
+
+- `src/lib/server/db/schema.ts` — complete schema definition
+- `src/lib/server/db/index.ts` — database connection + migration runner
+- `src/lib/server/db/migrations/` — generated migration files
+- `src/lib/server/db/fts.sql` — raw SQL for FTS5 virtual tables and triggers
+- `src/lib/types.ts` — shared domain types
--- a/docs/features/TRUEREF-0002.md
+++ b/docs/features/TRUEREF-0002.md
@@ -0,0 +1,252 @@
+# TRUEREF-0002 — Repository Management Service & REST API
+
+**Priority:** P0
+**Status:** Pending
+**Depends On:** TRUEREF-0001
+**Blocks:** TRUEREF-0009, TRUEREF-0010, TRUEREF-0015
+
+---
+
+## Overview
+
+Implement the core `RepositoryService` that handles CRUD operations for repositories, and expose those operations via SvelteKit API routes. This feature establishes the management plane for all library sources.
+
+---
+
+## Acceptance Criteria
+
+- [ ] `RepositoryService` class with full CRUD operations
+- [ ] `GET /api/v1/libs` — list all repositories with metadata
+- [ ] `POST /api/v1/libs` — add a new repository (GitHub URL or local path)
+- [ ] `GET /api/v1/libs/:id` — get single repository details
+- [ ] `PATCH /api/v1/libs/:id` — update repository metadata
+- [ ] `DELETE /api/v1/libs/:id` — delete repository and all associated data
+- [ ] `POST /api/v1/libs/:id/index` — trigger indexing job (queues job, returns job ID)
+- [ ] Input validation with descriptive error messages
+- [ ] All endpoints return JSON with consistent error shape
+- [ ] Unit tests for `RepositoryService` covering all operations
+
+---
+
+## Repository ID Generation
+
+GitHub repositories:
+- Input URL: `https://github.com/facebook/react` or `github.com/facebook/react`
+- Generated ID: `/facebook/react`
+
+Local repositories:
+- Input path: `/home/user/projects/my-sdk`
+- Generated ID: `/local/my-sdk` (basename of path, slugified)
+- Collision resolution: append `-2`, `-3`, etc.
+
+Version-specific IDs: `/facebook/react/v18.3.0`
+
+---
+
+## Service Interface
+
+```typescript
+// src/lib/server/services/repository.service.ts
+
+export interface AddRepositoryInput {
+  source: 'github' | 'local';
+  sourceUrl: string;           // GitHub URL or absolute local path
+  title?: string;              // override auto-detected title
+  description?: string;
+  branch?: string;             // GitHub: default branch; Local: n/a
+  githubToken?: string;        // for private GitHub repos
+}
+
+export interface UpdateRepositoryInput {
+  title?: string;
+  description?: string;
+  branch?: string;
+  githubToken?: string;
+}
+
+export class RepositoryService {
+  constructor(private db: BetterSQLite3.Database) {}
+
+  async list(options?: {
+    state?: Repository['state'];
+    limit?: number;
+    offset?: number;
+  }): Promise<Repository[]>
+
+  async get(id: string): Promise<Repository | null>
+
+  async add(input: AddRepositoryInput): Promise<Repository>
+
+  async update(id: string, input: UpdateRepositoryInput): Promise<Repository>
+
+  async remove(id: string): Promise<void>
+
+  async getStats(id: string): Promise<{
+    totalSnippets: number;
+    totalTokens: number;
+    totalDocuments: number;
+    lastIndexedAt: Date | null;
+  }>
+}
+```
+
+---
+
+## API Route Specifications
+
+### `GET /api/v1/libs`
+
+Query parameters:
+- `state` (optional): filter by state (`pending`, `indexed`, `error`, etc.)
+- `limit` (optional, default 50): max results
+- `offset` (optional, default 0): pagination offset
+
+Response `200`:
+```json
+{
+  "libraries": [
+    {
+      "id": "/facebook/react",
+      "title": "React",
+      "description": "...",
+      "source": "github",
+      "state": "indexed",
+      "totalSnippets": 1234,
+      "totalTokens": 98000,
+      "trustScore": 8.5,
+      "stars": 228000,
+      "lastIndexedAt": "2026-03-22T10:00:00Z",
+      "versions": ["v18.3.0", "v17.0.2"]
+    }
+  ],
+  "total": 12,
+  "limit": 50,
+  "offset": 0
+}
+```
+
+### `POST /api/v1/libs`
+
+Request body:
+```json
+{
+  "source": "github",
+  "sourceUrl": "https://github.com/facebook/react",
+  "branch": "main",
+  "githubToken": "ghp_...",
+  "autoIndex": true
+}
+```
+
+Response `201`:
+```json
+{
+  "library": { ...Repository },
+  "job": { "id": "uuid", "status": "queued" }
+}
+```
+
+`autoIndex: true` (default) immediately queues an indexing job.
+
+Response `409` if repository already exists:
+```json
+{ "error": "Repository /facebook/react already exists" }
+```
+
+### `GET /api/v1/libs/:id`
+
+`:id` must be URL-encoded (e.g., `%2Ffacebook%2Freact` for `/facebook/react`).
+
+Response `200`: single `Repository` object with versions array.
+Response `404`: `{ "error": "Repository not found" }`
+
+### `PATCH /api/v1/libs/:id`
+
+Request body: partial `UpdateRepositoryInput`.
+Response `200`: updated `Repository`.
+
+### `DELETE /api/v1/libs/:id`
+
+Cascades: deletes all documents, snippets, embeddings, jobs for this repository.
+Response `204`: no body.
+Response `404`: not found.
+
+### `POST /api/v1/libs/:id/index`
+
+Triggers a new indexing job. If a job is already running for this repo, returns the existing job.
+
+Request body (optional):
+```json
+{ "version": "v18.3.0" }
+```
+
+Response `202`:
+```json
+{
+  "job": {
+    "id": "uuid",
+    "repositoryId": "/facebook/react",
+    "status": "queued",
+    "progress": 0,
+    "createdAt": "2026-03-22T10:00:00Z"
+  }
+}
+```
+
+---
+
+## Error Response Shape
+
+All error responses follow:
+```json
+{
+  "error": "Human-readable message",
+  "code": "MACHINE_READABLE_CODE",
+  "details": {}
+}
+```
+
+Error codes:
+- `NOT_FOUND`
+- `ALREADY_EXISTS`
+- `INVALID_INPUT`
+- `INVALID_URL`
+- `INDEXING_IN_PROGRESS`
+
+---
+
+## ID Resolution Logic
+
+```typescript
+function resolveGitHubId(url: string): string {
+  // Parse owner/repo from URL variants:
+  // https://github.com/facebook/react
+  // https://github.com/facebook/react.git
+  // github.com/facebook/react
+  const match = url.match(/github\.com\/([^/]+)\/([^/\s.]+)/);
+  if (!match) throw new Error('Invalid GitHub URL');
+  return `/${match[1]}/${match[2]}`;
+}
+
+function resolveLocalId(path: string, existingIds: string[]): string {
+  const base = slugify(path.split('/').at(-1)!);
+  let id = `/local/${base}`;
+  let counter = 2;
+  while (existingIds.includes(id)) {
+    id = `/local/${base}-${counter++}`;
+  }
+  return id;
+}
+```
+
+---
+
+## Files to Create
+
+- `src/lib/server/services/repository.service.ts`
+- `src/routes/api/v1/libs/+server.ts` — GET (list), POST (add)
+- `src/routes/api/v1/libs/[id]/+server.ts` — GET, PATCH, DELETE
+- `src/routes/api/v1/libs/[id]/index/+server.ts` — POST (trigger indexing)
+- `src/lib/server/utils/id-resolver.ts` — ID generation helpers
+- `src/lib/server/utils/validation.ts` — input validators
+- `src/lib/server/services/repository.service.test.ts`
--- a/docs/features/TRUEREF-0003.md
+++ b/docs/features/TRUEREF-0003.md
@@ -0,0 +1,218 @@
+# TRUEREF-0003 — GitHub Repository Crawler
+
+**Priority:** P0
+**Status:** Pending
+**Depends On:** TRUEREF-0001
+**Blocks:** TRUEREF-0009, TRUEREF-0013
+
+---
+
+## Overview
+
+Implement the GitHub crawler that fetches repository file trees and downloads file contents using the GitHub REST API. The crawler respects rate limits, supports private repos via PAT, and applies include/exclude filtering from `trueref.json` configuration.
+
+---
+
+## Acceptance Criteria
+
+- [ ] Fetch complete file tree for a GitHub repo (default branch or specific tag/branch)
+- [ ] Filter files by extension (only index relevant file types)
+- [ ] Apply `trueref.json` folder/file include/exclude rules
+- [ ] Download file contents in parallel (with concurrency limit)
+- [ ] Handle GitHub API rate limiting (respect `X-RateLimit-*` headers, exponential backoff)
+- [ ] Support private repositories via GitHub Personal Access Token (PAT)
+- [ ] Return structured `CrawledFile` objects for each fetched file
+- [ ] Report progress via callback (for job tracking)
+- [ ] Unit tests with mocked GitHub API responses
+
+---
+
+## Indexable File Types
+
+The crawler only downloads files with these extensions:
+
+```typescript
+const INDEXABLE_EXTENSIONS = new Set([
+  // Documentation
+  '.md', '.mdx', '.txt', '.rst',
+  // Code
+  '.ts', '.tsx', '.js', '.jsx',
+  '.py', '.rb', '.go', '.rs', '.java', '.cs', '.cpp', '.c', '.h',
+  '.swift', '.kt', '.php', '.scala', '.clj', '.ex', '.exs',
+  '.sh', '.bash', '.zsh', '.fish',
+  // Config / data
+  '.json', '.yaml', '.yml', '.toml',
+  // Web
+  '.html', '.css', '.svelte', '.vue',
+]);
+
+const MAX_FILE_SIZE_BYTES = 500_000; // 500 KB — skip large generated files
+```
+
+---
+
+## Data Types
+
+```typescript
+export interface CrawledFile {
+  path: string;          // relative path within repo, e.g. "src/index.ts"
+  content: string;       // UTF-8 file content
+  size: number;          // bytes
+  sha: string;           // GitHub blob SHA (used as checksum)
+  language: string;      // detected from extension
+}
+
+export interface CrawlResult {
+  files: CrawledFile[];
+  totalFiles: number;    // files matching filters
+  skippedFiles: number;  // filtered out or too large
+  branch: string;        // branch/tag that was crawled
+  commitSha: string;     // HEAD commit SHA
+}
+
+export interface CrawlOptions {
+  owner: string;
+  repo: string;
+  ref?: string;          // branch, tag, or commit SHA; defaults to repo default branch
+  token?: string;        // GitHub PAT for private repos
+  config?: RepoConfig;   // parsed trueref.json
+  onProgress?: (processed: number, total: number) => void;
+}
+```
+
+---
+
+## GitHub API Usage
+
+### Step 1: Get default branch (if ref not specified)
+```
+GET https://api.github.com/repos/{owner}/{repo}
+→ { default_branch: "main", stargazers_count: 12345 }
+```
+
+### Step 2: Fetch file tree (recursive)
+```
+GET https://api.github.com/repos/{owner}/{repo}/git/trees/{ref}?recursive=1
+→ {
+    tree: [
+      { path: "src/index.ts", type: "blob", size: 1234, sha: "abc123", url: "..." },
+      ...
+    ],
+    truncated: false
+  }
+```
+
+If `truncated: true`, the tree has >100k items. Use `--depth` pagination or filter top-level directories first.
+
+### Step 3: Download file contents (parallel)
+```
+GET https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}
+→ { content: "<base64>", encoding: "base64", size: 1234, sha: "abc123" }
+```
+
+Alternative for large repos: use raw content URL:
+```
+GET https://raw.githubusercontent.com/{owner}/{repo}/{ref}/{path}
+```
+
+---
+
+## Filtering Logic
+
+```typescript
+function shouldIndexFile(
+  filePath: string,
+  fileSize: number,
+  config?: RepoConfig
+): boolean {
+  const ext = path.extname(filePath).toLowerCase();
+
+  // 1. Must have indexable extension
+  if (!INDEXABLE_EXTENSIONS.has(ext)) return false;
+
+  // 2. Must not exceed size limit
+  if (fileSize > MAX_FILE_SIZE_BYTES) return false;
+
+  // 3. Apply config excludeFiles (exact filename match)
+  if (config?.excludeFiles?.includes(path.basename(filePath))) return false;
+
+  // 4. Apply config excludeFolders (regex or prefix match)
+  if (config?.excludeFolders?.some(folder =>
+    filePath.startsWith(folder) || new RegExp(folder).test(filePath)
+  )) return false;
+
+  // 5. Apply config folders allowlist (if specified, only index those paths)
+  if (config?.folders?.length) {
+    const inAllowedFolder = config.folders.some(folder =>
+      filePath.startsWith(folder) || new RegExp(folder).test(filePath)
+    );
+    if (!inAllowedFolder) return false;
+  }
+
+  // 6. Default excludes: node_modules, .git, dist, build, coverage
+  const defaultExcludes = [
+    'node_modules/', '.git/', 'dist/', 'build/', 'coverage/',
+    '.next/', '__pycache__/', 'vendor/', 'target/', '.cache/',
+  ];
+  if (defaultExcludes.some(ex => filePath.startsWith(ex))) return false;
+
+  return true;
+}
+```
+
+---
+
+## Rate Limiting
+
+```typescript
+class GitHubRateLimiter {
+  private remaining = 5000;
+  private resetAt = Date.now();
+
+  updateFromHeaders(headers: Headers): void {
+    this.remaining = parseInt(headers.get('X-RateLimit-Remaining') ?? '5000');
+    this.resetAt = parseInt(headers.get('X-RateLimit-Reset') ?? '0') * 1000;
+  }
+
+  async waitIfNeeded(): Promise<void> {
+    if (this.remaining <= 10) {
+      const waitMs = Math.max(0, this.resetAt - Date.now()) + 1000;
+      await sleep(waitMs);
+    }
+  }
+}
+```
+
+Requests are made with a concurrency limit of 10 parallel downloads using a semaphore/pool pattern.
+
+---
+
+## Error Handling
+
+| Scenario | Behavior |
+|----------|---------|
+| 404 Not Found | Throw `RepositoryNotFoundError` |
+| 401 Unauthorized | Throw `AuthenticationError` (invalid or missing token) |
+| 403 Forbidden | If `X-RateLimit-Remaining: 0`, wait and retry; else throw `PermissionError` |
+| 422 Unprocessable | Tree too large; switch to directory-by-directory traversal |
+| Network error | Retry up to 3 times with exponential backoff |
+| File content decode error | Skip file, log warning |
+
+---
+
+## Implementation Notes
+
+- Prefer `raw.githubusercontent.com` for file downloads — faster and doesn't count against rate limit as heavily as API.
+- Cache the file tree in memory during a single crawl run to avoid redundant requests.
+- The `sha` field from the tree response is the blob SHA — use this as the document checksum, not the file content SHA.
+- Detect `trueref.json` / `context7.json` in the tree before downloading other files, so filtering rules apply to the rest of the crawl.
+
+---
+
+## Files to Create
+
+- `src/lib/server/crawler/github.crawler.ts`
+- `src/lib/server/crawler/rate-limiter.ts`
+- `src/lib/server/crawler/file-filter.ts`
+- `src/lib/server/crawler/types.ts`
+- `src/lib/server/crawler/github.crawler.test.ts`
--- a/docs/features/TRUEREF-0004.md
+++ b/docs/features/TRUEREF-0004.md
@@ -0,0 +1,130 @@
+# TRUEREF-0004 — Local Filesystem Crawler
+
+**Priority:** P1
+**Status:** Pending
+**Depends On:** TRUEREF-0001, TRUEREF-0003 (shares types and filter logic)
+**Blocks:** TRUEREF-0009
+
+---
+
+## Overview
+
+Implement a local filesystem crawler that indexes repositories stored on disk. Uses the same file filtering logic as the GitHub crawler but reads from the local filesystem using Node.js `fs` APIs. Useful for private internal codebases, monorepos on disk, and offline development.
+
+---
+
+## Acceptance Criteria
+
+- [ ] Walk a directory tree and enumerate all files
+- [ ] Apply the same extension and size filters as the GitHub crawler
+- [ ] Apply `trueref.json` include/exclude rules
+- [ ] Read file contents as UTF-8 strings
+- [ ] Compute SHA-256 checksum per file for change detection
+- [ ] Detect `trueref.json` / `context7.json` at the repo root before filtering other files
+- [ ] Report progress via callback
+- [ ] Skip symlinks, special files (devices, sockets, etc.)
+- [ ] Unit tests with temporary directory fixtures
+
+---
+
+## Data Types
+
+Reuses `CrawledFile` and `CrawlResult` from TRUEREF-0003 crawler types:
+
+```typescript
+export interface LocalCrawlOptions {
+  rootPath: string;          // absolute path to repository root
+  config?: RepoConfig;       // parsed trueref.json
+  onProgress?: (processed: number, total: number) => void;
+}
+```
+
+---
+
+## Implementation
+
+```typescript
+export class LocalCrawler {
+  async crawl(options: LocalCrawlOptions): Promise<CrawlResult> {
+    // 1. Enumerate all files recursively
+    const allFiles = await this.walkDirectory(options.rootPath);
+
+    // 2. Look for trueref.json / context7.json first
+    const configFile = allFiles.find(f =>
+      f === 'trueref.json' || f === 'context7.json'
+    );
+    let config = options.config;
+    if (configFile && !config) {
+      config = await this.parseConfigFile(
+        path.join(options.rootPath, configFile)
+      );
+    }
+
+    // 3. Filter files
+    const filteredFiles = allFiles.filter(relPath => {
+      const stat = statCache.get(relPath);
+      return shouldIndexFile(relPath, stat.size, config);
+    });
+
+    // 4. Read and return file contents
+    const crawledFiles: CrawledFile[] = [];
+    for (const [i, relPath] of filteredFiles.entries()) {
+      const absPath = path.join(options.rootPath, relPath);
+      const content = await fs.readFile(absPath, 'utf-8');
+      const sha = computeSHA256(content);
+      crawledFiles.push({
+        path: relPath,
+        content,
+        size: Buffer.byteLength(content, 'utf-8'),
+        sha,
+        language: detectLanguage(relPath),
+      });
+      options.onProgress?.(i + 1, filteredFiles.length);
+    }
+
+    return {
+      files: crawledFiles,
+      totalFiles: filteredFiles.length,
+      skippedFiles: allFiles.length - filteredFiles.length,
+      branch: 'local',
+      commitSha: computeSHA256(crawledFiles.map(f => f.sha).join('')),
+    };
+  }
+
+  private async walkDirectory(dir: string, rel = ''): Promise<string[]> {
+    const entries = await fs.readdir(dir, { withFileTypes: true });
+    const files: string[] = [];
+    for (const entry of entries) {
+      if (!entry.isFile() && !entry.isDirectory()) continue; // skip symlinks, devices
+      const relPath = rel ? `${rel}/${entry.name}` : entry.name;
+      if (entry.isDirectory()) {
+        files.push(...await this.walkDirectory(
+          path.join(dir, entry.name), relPath
+        ));
+      } else {
+        files.push(relPath);
+      }
+    }
+    return files;
+  }
+}
+```
+
+---
+
+## Checksum Computation
+
+```typescript
+import { createHash } from 'crypto';
+
+function computeSHA256(content: string): string {
+  return createHash('sha256').update(content, 'utf-8').digest('hex');
+}
+```
+
+---
+
+## Files to Create
+
+- `src/lib/server/crawler/local.crawler.ts`
+- `src/lib/server/crawler/local.crawler.test.ts`
--- a/docs/features/TRUEREF-0005.md
+++ b/docs/features/TRUEREF-0005.md
@@ -0,0 +1,297 @@
+# TRUEREF-0005 — Document Parser & Chunker
+
+**Priority:** P0
+**Status:** Pending
+**Depends On:** TRUEREF-0001
+**Blocks:** TRUEREF-0006, TRUEREF-0007, TRUEREF-0009
+
+---
+
+## Overview
+
+Implement the document parsing and chunking pipeline that transforms raw file contents (from the crawlers) into structured, searchable `Snippet` records. This is the core intellectual layer of TrueRef — the quality of the chunks directly determines the quality of documentation retrieval.
+
+---
+
+## Acceptance Criteria
+
+- [ ] Parse Markdown files into heading-based sections (info snippets)
+- [ ] Extract fenced code blocks from Markdown as separate code snippets
+- [ ] Parse standalone code files into function/class-level chunks
+- [ ] Respect token limits per chunk (max 512 tokens, with 50-token overlap)
+- [ ] Assign breadcrumb paths based on heading hierarchy (Markdown) or file path (code)
+- [ ] Detect programming language from file extension
+- [ ] Produce both `code` and `info` type snippets
+- [ ] Calculate approximate token counts using character-based estimation
+- [ ] Skip empty or trivially short content (< 20 chars)
+- [ ] Unit tests with representative samples of each file type
+
+---
+
+## Supported File Types
+
+| Extension | Parser Strategy |
+|-----------|----------------|
+| `.md`, `.mdx` | Heading-based section splitting + code block extraction |
+| `.txt`, `.rst` | Paragraph-based splitting |
+| `.ts`, `.tsx`, `.js`, `.jsx` | AST-free: function/class boundary detection via regex |
+| `.py` | `def`/`class` boundary detection |
+| `.go` | `func`/`type` boundary detection |
+| `.rs` | `fn`/`impl`/`struct` boundary detection |
+| `.java`, `.cs`, `.kt`, `.swift` | Class/method boundary detection |
+| `.rb` | `def`/`class` boundary detection |
+| `.json`, `.yaml`, `.yml`, `.toml` | Structural chunking (top-level keys) |
+| `.html`, `.svelte`, `.vue` | Text content extraction + script block splitting |
+| Other code | Line-count-based sliding window (200 lines per chunk) |
+
+---
+
+## Token Counting
+
+Use a simple character-based approximation (no tokenizer library needed for v1):
+
+```typescript
+function estimateTokens(text: string): number {
+  // Empirically: ~4 chars per token for English prose
+  // ~3 chars per token for code (more symbols)
+  return Math.ceil(text.length / 3.5);
+}
+```
+
+---
+
+## Markdown Parser
+
+The Markdown parser is the most important parser as most documentation is Markdown.
+
+### Algorithm
+
+1. Split the file into lines.
+2. Track current heading stack (H1 > H2 > H3 > H4).
+3. When a new heading is encountered, emit the accumulated content as an info snippet.
+4. Fenced code blocks (` ``` `) within sections are extracted as separate code snippets.
+5. The breadcrumb is built from the heading stack: `"Getting Started > Installation"`.
+
+```typescript
+interface MarkdownSection {
+  headings: string[];    // heading stack at this point
+  content: string;       // text content (sans code blocks)
+  codeBlocks: { language: string; code: string }[];
+}
+
+function parseMarkdown(content: string, filePath: string): Snippet[] {
+  const sections = splitIntoSections(content);
+  const snippets: Snippet[] = [];
+
+  for (const section of sections) {
+    const breadcrumb = section.headings.join(' > ');
+    const title = section.headings.at(-1) ?? path.basename(filePath);
+
+    // Emit info snippet for text content
+    if (section.content.trim().length >= 20) {
+      const chunks = chunkText(section.content, MAX_TOKENS, OVERLAP_TOKENS);
+      for (const chunk of chunks) {
+        snippets.push({
+          type: 'info',
+          title,
+          content: chunk,
+          breadcrumb,
+          tokenCount: estimateTokens(chunk),
+        });
+      }
+    }
+
+    // Emit code snippets for each code block
+    for (const block of section.codeBlocks) {
+      if (block.code.trim().length >= 20) {
+        snippets.push({
+          type: 'code',
+          title,
+          content: block.code,
+          language: block.language || detectLanguage('.' + block.language),
+          breadcrumb,
+          tokenCount: estimateTokens(block.code),
+        });
+      }
+    }
+  }
+
+  return snippets;
+}
+```
+
+---
+
+## Code File Parser
+
+For non-Markdown code files, use regex-based function/class boundary detection.
+
+### Algorithm
+
+1. Detect language-specific top-level declaration patterns.
+2. Split the file at those boundaries.
+3. Each chunk: the declaration line(s) + body up to the next declaration.
+4. If a chunk exceeds `MAX_TOKENS`, apply sliding window splitting with overlap.
+
+```typescript
+const BOUNDARY_PATTERNS: Record<string, RegExp> = {
+  typescript: /^(export\s+)?(async\s+)?(function|class|interface|type|const|let|var)\s+\w+/m,
+  python: /^(async\s+)?(def|class)\s+\w+/m,
+  go: /^(func|type|var|const)\s+\w+/m,
+  rust: /^(pub\s+)?(fn|impl|struct|enum|trait)\s+\w+/m,
+  java: /^(public|private|protected|static).*?(class|interface|enum|void|\w+)\s+\w+\s*[({]/m,
+};
+
+function parseCodeFile(
+  content: string,
+  filePath: string,
+  language: string
+): Snippet[] {
+  const pattern = BOUNDARY_PATTERNS[language];
+  const breadcrumb = filePath;
+  const title = path.basename(filePath);
+
+  if (!pattern) {
+    // Fallback: sliding window
+    return slidingWindowChunks(content, filePath, language);
+  }
+
+  const chunks = splitAtBoundaries(content, pattern);
+  return chunks
+    .filter(chunk => chunk.trim().length >= 20)
+    .flatMap(chunk => {
+      if (estimateTokens(chunk) <= MAX_TOKENS) {
+        return [{
+          type: 'code' as const,
+          title,
+          content: chunk,
+          language,
+          breadcrumb,
+          tokenCount: estimateTokens(chunk),
+        }];
+      }
+      return slidingWindowChunks(chunk, filePath, language);
+    });
+}
+```
+
+---
+
+## Chunking Constants
+
+```typescript
+const MAX_TOKENS = 512;
+const OVERLAP_TOKENS = 50;
+const MIN_CONTENT_LENGTH = 20; // characters
+```
+
+### Sliding Window Chunker
+
+```typescript
+function chunkText(
+  text: string,
+  maxTokens: number,
+  overlapTokens: number
+): string[] {
+  const words = text.split(/\s+/);
+  const wordsPerToken = 0.75; // ~0.75 words per token
+  const maxWords = Math.floor(maxTokens * wordsPerToken);
+  const overlapWords = Math.floor(overlapTokens * wordsPerToken);
+
+  const chunks: string[] = [];
+  let start = 0;
+
+  while (start < words.length) {
+    const end = Math.min(start + maxWords, words.length);
+    chunks.push(words.slice(start, end).join(' '));
+    if (end === words.length) break;
+    start = end - overlapWords;
+  }
+
+  return chunks;
+}
+```
+
+---
+
+## Language Detection
+
+```typescript
+const LANGUAGE_MAP: Record<string, string> = {
+  '.ts': 'typescript', '.tsx': 'typescript',
+  '.js': 'javascript', '.jsx': 'javascript',
+  '.py': 'python',
+  '.rb': 'ruby',
+  '.go': 'go',
+  '.rs': 'rust',
+  '.java': 'java',
+  '.cs': 'csharp',
+  '.cpp': 'cpp', '.c': 'c', '.h': 'c',
+  '.swift': 'swift',
+  '.kt': 'kotlin',
+  '.php': 'php',
+  '.scala': 'scala',
+  '.sh': 'bash', '.bash': 'bash', '.zsh': 'bash',
+  '.md': 'markdown', '.mdx': 'markdown',
+  '.json': 'json',
+  '.yaml': 'yaml', '.yml': 'yaml',
+  '.toml': 'toml',
+  '.html': 'html',
+  '.css': 'css',
+  '.svelte': 'svelte',
+  '.vue': 'vue',
+  '.sql': 'sql',
+};
+
+function detectLanguage(filePath: string): string {
+  const ext = path.extname(filePath).toLowerCase();
+  return LANGUAGE_MAP[ext] ?? 'text';
+}
+```
+
+---
+
+## Main Entry Point
+
+```typescript
+export interface ParseOptions {
+  repositoryId: string;
+  documentId: string;
+  versionId?: string;
+}
+
+export function parseFile(
+  file: CrawledFile,
+  options: ParseOptions
+): NewSnippet[] {
+  const language = detectLanguage(file.path);
+  let rawSnippets: Omit<NewSnippet, 'id' | 'repositoryId' | 'documentId' | 'versionId' | 'createdAt'>[];
+
+  if (language === 'markdown') {
+    rawSnippets = parseMarkdown(file.content, file.path);
+  } else {
+    rawSnippets = parseCodeFile(file.content, file.path, language);
+  }
+
+  return rawSnippets.map(s => ({
+    ...s,
+    id: crypto.randomUUID(),
+    repositoryId: options.repositoryId,
+    documentId: options.documentId,
+    versionId: options.versionId ?? null,
+    createdAt: new Date(),
+  }));
+}
+```
+
+---
+
+## Files to Create
+
+- `src/lib/server/parser/markdown.parser.ts`
+- `src/lib/server/parser/code.parser.ts`
+- `src/lib/server/parser/chunker.ts`
+- `src/lib/server/parser/language.ts`
+- `src/lib/server/parser/index.ts` — exports `parseFile`
+- `src/lib/server/parser/markdown.parser.test.ts`
+- `src/lib/server/parser/code.parser.test.ts`
--- a/docs/features/TRUEREF-0006.md
+++ b/docs/features/TRUEREF-0006.md
@@ -0,0 +1,269 @@
+# TRUEREF-0006 — SQLite FTS5 Full-Text Search
+
+**Priority:** P0
+**Status:** Pending
+**Depends On:** TRUEREF-0001, TRUEREF-0005
+**Blocks:** TRUEREF-0010
+
+---
+
+## Overview
+
+Implement the full-text search engine using SQLite's built-in FTS5 extension. This provides keyword-based BM25 search over all indexed snippets without requiring any external search service. It serves as both the primary search backend (when embeddings are not configured) and the keyword component of the hybrid search engine (TRUEREF-0008).
+
+---
+
+## Acceptance Criteria
+
+- [ ] FTS5 virtual table created and kept in sync via triggers (defined in TRUEREF-0001)
+- [ ] `SearchService.searchSnippets(query, repositoryId, options)` method implemented
+- [ ] `SearchService.searchRepositories(libraryName, query)` method implemented
+- [ ] Results ranked by BM25 relevance score
+- [ ] Filter by `repositoryId` (required), `type` (optional), `versionId` (optional)
+- [ ] Limit and offset support for pagination
+- [ ] Query preprocessing: tokenization, stop-word handling, wildcard expansion
+- [ ] Library search matches on title, description, and snippet content
+- [ ] Unit tests with seeded test data
+
+---
+
+## Search Service Interface
+
+```typescript
+// src/lib/server/search/search.service.ts
+
+export interface SnippetSearchOptions {
+  repositoryId: string;
+  versionId?: string;
+  type?: 'code' | 'info';
+  limit?: number;          // default: 20
+  offset?: number;         // default: 0
+}
+
+export interface SnippetSearchResult {
+  snippet: Snippet;
+  score: number;           // BM25 rank (negative, lower = better)
+  repository: Pick<Repository, 'id' | 'title'>;
+}
+
+export interface LibrarySearchOptions {
+  libraryName: string;
+  query?: string;          // semantic relevance hint
+  limit?: number;          // default: 10
+}
+
+export interface LibrarySearchResult {
+  repository: Repository;
+  versions: RepositoryVersion[];
+  score: number;           // composite relevance score
+}
+
+export class SearchService {
+  constructor(private db: BetterSQLite3.Database) {}
+
+  searchSnippets(
+    query: string,
+    options: SnippetSearchOptions
+  ): SnippetSearchResult[]
+
+  searchRepositories(
+    options: LibrarySearchOptions
+  ): LibrarySearchResult[]
+}
+```
+
+---
+
+## FTS5 Snippet Search Query
+
+```sql
+SELECT
+  s.*,
+  r.id AS repo_id,
+  r.title AS repo_title,
+  bm25(snippets_fts) AS score
+FROM snippets_fts
+JOIN snippets s ON s.rowid = snippets_fts.rowid
+JOIN repositories r ON r.id = s.repository_id
+WHERE snippets_fts MATCH ?
+  AND s.repository_id = ?
+  [AND s.version_id = ?]
+  [AND s.type = ?]
+ORDER BY score ASC          -- bm25() returns negative values; lower = more relevant
+LIMIT ? OFFSET ?;
+```
+
+The FTS5 MATCH query uses the porter stemmer and unicode61 tokenizer (configured in the virtual table definition).
+
+---
+
+## Query Preprocessing
+
+```typescript
+function preprocessQuery(raw: string): string {
+  // 1. Trim and normalize whitespace
+  let q = raw.trim().replace(/\s+/g, ' ');
+
+  // 2. Escape FTS5 special characters that aren't intended as operators
+  // Keep: * (prefix), " " (phrase), AND, OR, NOT
+  q = q.replace(/[()]/g, ' ');
+
+  // 3. Add prefix wildcard to last token for "typing as you go" feel
+  const tokens = q.split(' ');
+  const lastToken = tokens.at(-1) ?? '';
+  if (lastToken.length >= 3 && !lastToken.endsWith('*')) {
+    tokens[tokens.length - 1] = lastToken + '*';
+  }
+
+  return tokens.join(' ');
+}
+```
+
+---
+
+## Library Search
+
+Library search operates on the `repositories` table (not FTS5) since it's matching library names and descriptions:
+
+```typescript
+searchRepositories(options: LibrarySearchOptions): LibrarySearchResult[] {
+  const { libraryName, query, limit = 10 } = options;
+
+  // Simple LIKE-based search on name and description
+  // Enhanced with scoring:
+  const rows = this.db.prepare(`
+    SELECT r.*,
+      -- Score components
+      CASE WHEN LOWER(r.title) = LOWER(?) THEN 100 ELSE 0 END AS exact_match,
+      CASE WHEN LOWER(r.title) LIKE LOWER(?) THEN 50 ELSE 0 END AS prefix_match,
+      CASE WHEN LOWER(r.description) LIKE LOWER(?) THEN 20 ELSE 0 END AS desc_match,
+      (r.total_snippets / 100.0) AS snippet_score,
+      COALESCE(r.trust_score, 0) * 10 AS trust_component
+    FROM repositories r
+    WHERE r.state = 'indexed'
+      AND (
+        LOWER(r.title) LIKE LOWER(?)
+        OR LOWER(r.id) LIKE LOWER(?)
+        OR LOWER(r.description) LIKE LOWER(?)
+      )
+    ORDER BY (exact_match + prefix_match + desc_match + snippet_score + trust_component) DESC
+    LIMIT ?
+  `).all(
+    libraryName,
+    `${libraryName}%`,
+    `%${libraryName}%`,
+    `%${libraryName}%`,
+    `%${libraryName}%`,
+    `%${libraryName}%`,
+    limit
+  );
+
+  return rows.map(row => ({
+    repository: row as Repository,
+    versions: this.getVersions(row.id),
+    score: row.exact_match + row.prefix_match + row.desc_match +
+           row.snippet_score + row.trust_component,
+  }));
+}
+```
+
+---
+
+## Response Formatting
+
+The search results must be formatted for the REST API and MCP tool responses:
+
+### Library search response (for `resolve-library-id`):
+```typescript
+function formatLibraryResults(results: LibrarySearchResult[]): string {
+  if (results.length === 0) {
+    return 'No libraries found matching your search.';
+  }
+
+  return results.map((r, i) => {
+    const repo = r.repository;
+    const versions = r.versions.map(v => v.tag).join(', ') || 'default branch';
+    return [
+      `${i + 1}. ${repo.title}`,
+      `   Library ID: ${repo.id}`,
+      `   Description: ${repo.description ?? 'No description'}`,
+      `   Snippets: ${repo.totalSnippets} | Trust Score: ${repo.trustScore.toFixed(1)}/10`,
+      `   Available Versions: ${versions}`,
+    ].join('\n');
+  }).join('\n\n');
+}
+```
+
+### Snippet search response (for `query-docs`):
+```typescript
+function formatSnippetResults(
+  results: SnippetSearchResult[],
+  rules?: string[]
+): string {
+  const parts: string[] = [];
+
+  // Prepend repository rules if present
+  if (rules?.length) {
+    parts.push('## Library Rules\n' + rules.map(r => `- ${r}`).join('\n'));
+  }
+
+  for (const { snippet } of results) {
+    if (snippet.type === 'code') {
+      parts.push([
+        snippet.title ? `### ${snippet.title}` : '',
+        snippet.breadcrumb ? `*${snippet.breadcrumb}*` : '',
+        `\`\`\`${snippet.language ?? ''}\n${snippet.content}\n\`\`\``,
+      ].filter(Boolean).join('\n'));
+    } else {
+      parts.push([
+        snippet.title ? `### ${snippet.title}` : '',
+        snippet.breadcrumb ? `*${snippet.breadcrumb}*` : '',
+        snippet.content,
+      ].filter(Boolean).join('\n'));
+    }
+  }
+
+  return parts.join('\n\n---\n\n');
+}
+```
+
+---
+
+## Trust Score Computation
+
+Compute `trustScore` (0–10) when a repository is first indexed:
+
+```typescript
+function computeTrustScore(repo: Repository): number {
+  let score = 0;
+
+  // Stars (up to 4 points): log scale, 10k stars = 4 pts
+  if (repo.stars) {
+    score += Math.min(4, Math.log10(repo.stars + 1));
+  }
+
+  // Documentation coverage (up to 3 points)
+  score += Math.min(3, repo.totalSnippets / 500);
+
+  // Source type (1 point for GitHub, 0 for local)
+  if (repo.source === 'github') score += 1;
+
+  // Successful indexing (1 point)
+  if (repo.state === 'indexed') score += 1;
+
+  // Has description (1 point)
+  if (repo.description) score += 1;
+
+  return Math.min(10, parseFloat(score.toFixed(1)));
+}
+```
+
+---
+
+## Files to Create
+
+- `src/lib/server/search/search.service.ts`
+- `src/lib/server/search/query-preprocessor.ts`
+- `src/lib/server/search/formatters.ts`
+- `src/lib/server/search/trust-score.ts`
+- `src/lib/server/search/search.service.test.ts`
--- a/docs/features/TRUEREF-0007.md
+++ b/docs/features/TRUEREF-0007.md
@@ -0,0 +1,271 @@
+# TRUEREF-0007 — Embedding Generation & Vector Storage
+
+**Priority:** P1
+**Status:** Pending
+**Depends On:** TRUEREF-0001, TRUEREF-0005
+**Blocks:** TRUEREF-0008
+
+---
+
+## Overview
+
+Implement a pluggable embedding generation system that produces vector representations of snippets and stores them in SQLite. Supports multiple embedding backends (OpenAI-compatible API, local ONNX models via transformers.js). When no embedding provider is configured, the system gracefully falls back to FTS5-only search.
+
+---
+
+## Acceptance Criteria
+
+- [ ] Pluggable `EmbeddingProvider` interface with at least two implementations
+- [ ] `OpenAIEmbeddingProvider` — works with OpenAI, Azure OpenAI, Ollama, any OpenAI-compatible endpoint
+- [ ] `LocalEmbeddingProvider` — uses `@xenova/transformers` with a bundled ONNX model (optional dep)
+- [ ] `NoopEmbeddingProvider` — returns null, enables graceful FTS5-only mode
+- [ ] `EmbeddingService` that batches embedding requests and stores results
+- [ ] Embeddings stored as `Float32Array` blobs in `snippet_embeddings` table
+- [ ] Embedding provider configured via settings (stored in `settings` table)
+- [ ] `GET /api/v1/settings/embedding` — get current embedding config
+- [ ] `PUT /api/v1/settings/embedding` — set embedding provider configuration
+- [ ] Unit tests for provider abstraction and storage logic
+
+---
+
+## Provider Interface
+
+```typescript
+// src/lib/server/embeddings/provider.ts
+
+export interface EmbeddingVector {
+  values: Float32Array;
+  dimensions: number;
+  model: string;
+}
+
+export interface EmbeddingProvider {
+  readonly name: string;
+  readonly dimensions: number;
+  readonly model: string;
+
+  embed(texts: string[]): Promise<EmbeddingVector[]>;
+  isAvailable(): Promise<boolean>;
+}
+```
+
+---
+
+## OpenAI-Compatible Provider
+
+```typescript
+export interface OpenAIProviderConfig {
+  baseUrl: string;          // e.g. "https://api.openai.com/v1" or "http://localhost:11434/v1"
+  apiKey: string;
+  model: string;            // e.g. "text-embedding-3-small", "nomic-embed-text"
+  dimensions?: number;      // override for models that support it (e.g. text-embedding-3-small)
+  maxBatchSize?: number;    // default: 100
+}
+
+export class OpenAIEmbeddingProvider implements EmbeddingProvider {
+  constructor(private config: OpenAIProviderConfig) {}
+
+  async embed(texts: string[]): Promise<EmbeddingVector[]> {
+    // Batch into groups of maxBatchSize
+    const batches = chunk(texts, this.config.maxBatchSize ?? 100);
+    const allEmbeddings: EmbeddingVector[] = [];
+
+    for (const batch of batches) {
+      const response = await fetch(`${this.config.baseUrl}/embeddings`, {
+        method: 'POST',
+        headers: {
+          'Authorization': `Bearer ${this.config.apiKey}`,
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify({
+          model: this.config.model,
+          input: batch,
+          dimensions: this.config.dimensions,
+        }),
+      });
+
+      if (!response.ok) {
+        throw new EmbeddingError(`API error: ${response.status}`);
+      }
+
+      const data = await response.json();
+      for (const item of data.data) {
+        allEmbeddings.push({
+          values: new Float32Array(item.embedding),
+          dimensions: item.embedding.length,
+          model: this.config.model,
+        });
+      }
+    }
+
+    return allEmbeddings;
+  }
+}
+```
+
+---
+
+## Local Provider (Optional Dependency)
+
+```typescript
+// Uses @xenova/transformers — only loaded if installed
+export class LocalEmbeddingProvider implements EmbeddingProvider {
+  private pipeline: unknown = null;
+
+  readonly name = 'local';
+  readonly model = 'Xenova/all-MiniLM-L6-v2';  // 384-dim, fast, small
+  readonly dimensions = 384;
+
+  async embed(texts: string[]): Promise<EmbeddingVector[]> {
+    if (!this.pipeline) {
+      const { pipeline } = await import('@xenova/transformers');
+      this.pipeline = await pipeline('feature-extraction', this.model);
+    }
+
+    const results: EmbeddingVector[] = [];
+    for (const text of texts) {
+      const output = await (this.pipeline as Function)(text, {
+        pooling: 'mean',
+        normalize: true,
+      });
+      results.push({
+        values: new Float32Array(output.data),
+        dimensions: this.dimensions,
+        model: this.model,
+      });
+    }
+    return results;
+  }
+
+  async isAvailable(): Promise<boolean> {
+    try {
+      await import('@xenova/transformers');
+      return true;
+    } catch {
+      return false;
+    }
+  }
+}
+```
+
+---
+
+## Embedding Service
+
+```typescript
+export class EmbeddingService {
+  constructor(
+    private db: BetterSQLite3.Database,
+    private provider: EmbeddingProvider
+  ) {}
+
+  async embedSnippets(
+    snippetIds: string[],
+    onProgress?: (done: number, total: number) => void
+  ): Promise<void> {
+    const snippets = this.db.prepare(
+      `SELECT id, content, type FROM snippets WHERE id IN (${snippetIds.map(() => '?').join(',')})`
+    ).all(...snippetIds) as Snippet[];
+
+    // Prepare text for embedding: combine title + content
+    const texts = snippets.map(s =>
+      [s.title, s.breadcrumb, s.content].filter(Boolean).join('\n').slice(0, 2048)
+    );
+
+    const BATCH_SIZE = 50;
+    const insert = this.db.prepare(`
+      INSERT OR REPLACE INTO snippet_embeddings (snippet_id, model, dimensions, embedding, created_at)
+      VALUES (?, ?, ?, ?, unixepoch())
+    `);
+
+    for (let i = 0; i < snippets.length; i += BATCH_SIZE) {
+      const batch = snippets.slice(i, i + BATCH_SIZE);
+      const batchTexts = texts.slice(i, i + BATCH_SIZE);
+
+      const embeddings = await this.provider.embed(batchTexts);
+
+      const insertMany = this.db.transaction(() => {
+        for (let j = 0; j < batch.length; j++) {
+          const snippet = batch[j];
+          const embedding = embeddings[j];
+          insert.run(
+            snippet.id,
+            embedding.model,
+            embedding.dimensions,
+            Buffer.from(embedding.values.buffer)
+          );
+        }
+      });
+      insertMany();
+
+      onProgress?.(Math.min(i + BATCH_SIZE, snippets.length), snippets.length);
+    }
+  }
+}
+```
+
+---
+
+## Provider Configuration
+
+Stored in the `settings` table as JSON:
+
+```typescript
+export interface EmbeddingConfig {
+  provider: 'openai' | 'local' | 'none';
+  openai?: {
+    baseUrl: string;
+    apiKey: string;
+    model: string;
+    dimensions?: number;
+  };
+}
+
+// Settings key: 'embedding_config'
+```
+
+### API Endpoints
+
+`GET /api/v1/settings/embedding`
+```json
+{
+  "provider": "openai",
+  "openai": {
+    "baseUrl": "https://api.openai.com/v1",
+    "model": "text-embedding-3-small",
+    "dimensions": 1536
+  }
+}
+```
+
+`PUT /api/v1/settings/embedding` — same shape, validates provider connectivity before saving.
+
+---
+
+## Blob Storage Format
+
+Embeddings are stored as raw `Float32Array` binary blobs:
+
+```typescript
+// Store
+const buffer = Buffer.from(float32Array.buffer);
+
+// Retrieve
+const float32Array = new Float32Array(
+  buffer.buffer,
+  buffer.byteOffset,
+  buffer.byteLength / 4
+);
+```
+
+---
+
+## Files to Create
+
+- `src/lib/server/embeddings/provider.ts` — interface + noop
+- `src/lib/server/embeddings/openai.provider.ts`
+- `src/lib/server/embeddings/local.provider.ts`
+- `src/lib/server/embeddings/embedding.service.ts`
+- `src/lib/server/embeddings/factory.ts` — create provider from config
+- `src/routes/api/v1/settings/embedding/+server.ts`
+- `src/lib/server/embeddings/embedding.service.test.ts`
--- a/docs/features/TRUEREF-0008.md
+++ b/docs/features/TRUEREF-0008.md
@@ -0,0 +1,213 @@
+# TRUEREF-0008 — Hybrid Semantic Search Engine
+
+**Priority:** P1
+**Status:** Pending
+**Depends On:** TRUEREF-0006, TRUEREF-0007
+**Blocks:** TRUEREF-0010 (enhances it)
+
+---
+
+## Overview
+
+Combine FTS5 BM25 keyword search with vector similarity search (cosine similarity over embeddings) using Reciprocal Rank Fusion (RRF) to produce a hybrid ranking that outperforms either approach alone. When embeddings are not available, the system transparently falls back to FTS5-only mode.
+
+---
+
+## Acceptance Criteria
+
+- [ ] `HybridSearchService` that coordinates FTS5 and vector search
+- [ ] Cosine similarity search over stored embeddings
+- [ ] Reciprocal Rank Fusion for combining ranked lists
+- [ ] Graceful degradation to FTS5-only when embeddings unavailable
+- [ ] Query embedding generated at search time via the configured provider
+- [ ] Results deduplicated by snippet ID (same snippet may appear in both result sets)
+- [ ] Configurable `alpha` parameter: weight between FTS5 (0.0) and vector (1.0)
+- [ ] Performance: < 300ms for searches over 100k snippets
+- [ ] Unit tests with mock embedding provider
+
+---
+
+## Architecture
+
+```
+query text
+    │
+    ├──── FTS5 Search ──────────────┐
+    │      (BM25 ranking)           │
+    │                               │
+    └──── Vector Search ────────────┤
+           (cosine similarity)      │
+           (embed query first)      │
+                                    │
+                               RRF Fusion
+                                    │
+                               Final ranked list
+```
+
+---
+
+## Vector Search Implementation
+
+SQLite does not natively support vector operations, so cosine similarity is computed in JavaScript after loading candidate embeddings:
+
+```typescript
+function cosineSimilarity(a: Float32Array, b: Float32Array): number {
+  let dot = 0, normA = 0, normB = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i] * b[i];
+    normA += a[i] * a[i];
+    normB += b[i] * b[i];
+  }
+  return dot / (Math.sqrt(normA) * Math.sqrt(normB));
+}
+
+async vectorSearch(
+  queryEmbedding: Float32Array,
+  repositoryId: string,
+  limit: number = 50
+): Promise<Array<{ snippetId: string; score: number }>> {
+  // Load all embeddings for the repository (filtered)
+  const rows = this.db.prepare(`
+    SELECT se.snippet_id, se.embedding
+    FROM snippet_embeddings se
+    JOIN snippets s ON s.id = se.snippet_id
+    WHERE s.repository_id = ?
+  `).all(repositoryId) as { snippet_id: string; embedding: Buffer }[];
+
+  // Compute cosine similarity for each
+  const scored = rows.map(row => {
+    const embedding = new Float32Array(
+      row.embedding.buffer,
+      row.embedding.byteOffset,
+      row.embedding.byteLength / 4
+    );
+    return {
+      snippetId: row.snippet_id,
+      score: cosineSimilarity(queryEmbedding, embedding),
+    };
+  });
+
+  // Sort descending by score, return top-k
+  return scored
+    .sort((a, b) => b.score - a.score)
+    .slice(0, limit);
+}
+```
+
+**Performance note:** For repositories with > 50k snippets, pre-filtering by FTS5 candidates before computing cosine similarity is recommended. This is a future optimization — for v1, in-memory computation is acceptable.
+
+---
+
+## Reciprocal Rank Fusion
+
+```typescript
+function reciprocalRankFusion(
+  ...rankings: Array<Array<{ id: string; score: number }>>
+): Array<{ id: string; rrfScore: number }> {
+  const K = 60; // RRF constant (standard value)
+  const scores = new Map<string, number>();
+
+  for (const ranking of rankings) {
+    ranking.forEach(({ id }, rank) => {
+      const current = scores.get(id) ?? 0;
+      scores.set(id, current + 1 / (K + rank + 1));
+    });
+  }
+
+  return Array.from(scores.entries())
+    .map(([id, rrfScore]) => ({ id, rrfScore }))
+    .sort((a, b) => b.rrfScore - a.rrfScore);
+}
+```
+
+---
+
+## Hybrid Search Service
+
+```typescript
+export interface HybridSearchOptions {
+  repositoryId: string;
+  versionId?: string;
+  type?: 'code' | 'info';
+  limit?: number;
+  alpha?: number;          // 0.0 = FTS5 only, 1.0 = vector only, 0.5 = balanced
+}
+
+export class HybridSearchService {
+  constructor(
+    private db: BetterSQLite3.Database,
+    private searchService: SearchService,
+    private embeddingProvider: EmbeddingProvider | null,
+  ) {}
+
+  async search(
+    query: string,
+    options: HybridSearchOptions
+  ): Promise<SnippetSearchResult[]> {
+    const limit = options.limit ?? 20;
+    const alpha = options.alpha ?? 0.5;
+
+    // Always run FTS5 search
+    const ftsResults = this.searchService.searchSnippets(query, {
+      repositoryId: options.repositoryId,
+      versionId: options.versionId,
+      type: options.type,
+      limit: limit * 3, // get more candidates for fusion
+    });
+
+    // If no embedding provider or alpha = 0, return FTS5 results directly
+    if (!this.embeddingProvider || alpha === 0) {
+      return ftsResults.slice(0, limit);
+    }
+
+    // Embed the query and run vector search
+    const [queryEmbedding] = await this.embeddingProvider.embed([query]);
+    const vectorResults = await this.vectorSearch(
+      queryEmbedding.values,
+      options.repositoryId,
+      limit * 3
+    );
+
+    // Normalize result lists for RRF
+    const ftsRanked = ftsResults.map((r, i) => ({
+      id: r.snippet.id,
+      score: i,
+    }));
+    const vecRanked = vectorResults.map((r, i) => ({
+      id: r.snippetId,
+      score: i,
+    }));
+
+    // Apply RRF
+    const fused = reciprocalRankFusion(ftsRanked, vecRanked);
+
+    // Fetch full snippet data for top results
+    const topIds = fused.slice(0, limit).map(r => r.id);
+    return this.fetchSnippetsByIds(topIds, options.repositoryId);
+  }
+}
+```
+
+---
+
+## Configuration
+
+The hybrid search alpha value can be set per-request or globally via settings:
+
+```typescript
+// Default config stored in settings table under key 'search_config'
+export interface SearchConfig {
+  alpha: number;           // 0.5 default
+  maxResults: number;      // 20 default
+  enableHybrid: boolean;   // true if embedding provider is configured
+}
+```
+
+---
+
+## Files to Create
+
+- `src/lib/server/search/hybrid.search.service.ts`
+- `src/lib/server/search/vector.search.ts`
+- `src/lib/server/search/rrf.ts`
+- `src/lib/server/search/hybrid.search.service.test.ts`
--- a/docs/features/TRUEREF-0009.md
+++ b/docs/features/TRUEREF-0009.md
@@ -0,0 +1,330 @@
+# TRUEREF-0009 — Indexing Pipeline & Job Queue
+
+**Priority:** P0
+**Status:** Pending
+**Depends On:** TRUEREF-0003, TRUEREF-0004, TRUEREF-0005, TRUEREF-0001
+**Blocks:** TRUEREF-0010, TRUEREF-0015
+
+---
+
+## Overview
+
+Implement the end-to-end indexing pipeline that orchestrates crawling, parsing, storage, and embedding generation for a repository. Uses a SQLite-backed job queue with sequential processing. Each indexing run is atomic — new data replaces old data only upon successful completion.
+
+---
+
+## Acceptance Criteria
+
+- [ ] `IndexingPipeline` class that orchestrates the full indexing flow
+- [ ] SQLite-backed `JobQueue` (no external message broker)
+- [ ] Atomic snippet replacement: old snippets deleted only after new ones are stored successfully
+- [ ] Progress tracked in `indexing_jobs` table (processedFiles, totalFiles, progress 0-100)
+- [ ] `GET /api/v1/jobs/:id` — get job status and progress
+- [ ] `GET /api/v1/jobs` — list recent jobs (with filtering by repositoryId)
+- [ ] Jobs run sequentially (one at a time) to avoid SQLite write contention
+- [ ] Graceful error handling: job marked as failed with error message, existing data preserved
+- [ ] Server startup: resume any `running` jobs that were interrupted
+- [ ] Unit tests for pipeline stages and job queue
+
+---
+
+## Pipeline Stages
+
+```
+1. Create IndexingJob (status: queued)
+2. Dequeue job (status: running, startedAt: now)
+3. Fetch trueref.json config (if exists)
+4. Crawl repository (GitHub or local)
+   → Update job.totalFiles
+5. For each file:
+   a. Check checksum against existing document
+   b. If unchanged, skip (reuse existing snippets)
+   c. If changed/new: parse into snippets
+   d. Buffer new snippets in memory
+   e. Update job.processedFiles + job.progress
+6. Transaction: Delete old snippets/documents → Insert new ones
+7. Generate embeddings for new snippets (if provider configured)
+8. Update repository stats (totalSnippets, totalTokens, trustScore, lastIndexedAt)
+9. Mark job as done (status: done, completedAt: now)
+```
+
+---
+
+## Job Queue Implementation
+
+```typescript
+// src/lib/server/pipeline/job-queue.ts
+
+export class JobQueue {
+  private isRunning = false;
+
+  constructor(private db: BetterSQLite3.Database) {}
+
+  enqueue(repositoryId: string, versionId?: string): IndexingJob {
+    const job: NewIndexingJob = {
+      id: crypto.randomUUID(),
+      repositoryId,
+      versionId: versionId ?? null,
+      status: 'queued',
+      progress: 0,
+      totalFiles: 0,
+      processedFiles: 0,
+      error: null,
+      startedAt: null,
+      completedAt: null,
+      createdAt: new Date(),
+    };
+
+    this.db.prepare(`
+      INSERT INTO indexing_jobs VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+    `).run(...Object.values(job));
+
+    // Kick off processing if not already running
+    if (!this.isRunning) {
+      setImmediate(() => this.processNext());
+    }
+
+    return job;
+  }
+
+  private async processNext(): Promise<void> {
+    if (this.isRunning) return;
+
+    const job = this.db.prepare(`
+      SELECT * FROM indexing_jobs
+      WHERE status = 'queued'
+      ORDER BY created_at ASC
+      LIMIT 1
+    `).get() as IndexingJob | undefined;
+
+    if (!job) return;
+
+    this.isRunning = true;
+    try {
+      await this.pipeline.run(job);
+    } finally {
+      this.isRunning = false;
+      // Check for next queued job
+      const nextJob = this.db.prepare(
+        `SELECT id FROM indexing_jobs WHERE status = 'queued' LIMIT 1`
+      ).get();
+      if (nextJob) setImmediate(() => this.processNext());
+    }
+  }
+
+  getJob(id: string): IndexingJob | null {
+    return this.db.prepare(
+      `SELECT * FROM indexing_jobs WHERE id = ?`
+    ).get(id) as IndexingJob | null;
+  }
+
+  listJobs(repositoryId?: string, limit = 20): IndexingJob[] {
+    const query = repositoryId
+      ? `SELECT * FROM indexing_jobs WHERE repository_id = ? ORDER BY created_at DESC LIMIT ?`
+      : `SELECT * FROM indexing_jobs ORDER BY created_at DESC LIMIT ?`;
+    const params = repositoryId ? [repositoryId, limit] : [limit];
+    return this.db.prepare(query).all(...params) as IndexingJob[];
+  }
+}
+```
+
+---
+
+## Indexing Pipeline
+
+```typescript
+// src/lib/server/pipeline/indexing.pipeline.ts
+
+export class IndexingPipeline {
+  constructor(
+    private db: BetterSQLite3.Database,
+    private githubCrawler: GitHubCrawler,
+    private localCrawler: LocalCrawler,
+    private embeddingService: EmbeddingService | null,
+  ) {}
+
+  async run(job: IndexingJob): Promise<void> {
+    this.updateJob(job.id, { status: 'running', startedAt: new Date() });
+
+    try {
+      const repo = this.getRepository(job.repositoryId);
+      if (!repo) throw new Error(`Repository ${job.repositoryId} not found`);
+
+      // Update repo state
+      this.updateRepo(repo.id, { state: 'indexing' });
+
+      // Step 1: Crawl
+      const crawlResult = await this.crawl(repo, job);
+
+      // Step 2: Parse and diff
+      const { newSnippets, changedDocIds, newDocuments } =
+        await this.parseAndDiff(crawlResult, repo, job);
+
+      // Step 3: Atomic replacement
+      this.replaceSnippets(repo.id, changedDocIds, newDocuments, newSnippets);
+
+      // Step 4: Embeddings (async, non-blocking for job completion)
+      if (this.embeddingService && newSnippets.length > 0) {
+        await this.embeddingService.embedSnippets(
+          newSnippets.map(s => s.id),
+          (done, total) => {
+            // Update job progress for embedding phase
+          }
+        );
+      }
+
+      // Step 5: Update repo stats
+      const stats = this.computeStats(repo.id);
+      this.updateRepo(repo.id, {
+        state: 'indexed',
+        totalSnippets: stats.totalSnippets,
+        totalTokens: stats.totalTokens,
+        trustScore: computeTrustScore({ ...repo, ...stats }),
+        lastIndexedAt: new Date(),
+      });
+
+      this.updateJob(job.id, {
+        status: 'done',
+        progress: 100,
+        completedAt: new Date(),
+      });
+
+    } catch (error) {
+      this.updateJob(job.id, {
+        status: 'failed',
+        error: (error as Error).message,
+        completedAt: new Date(),
+      });
+      this.updateRepo(job.repositoryId, { state: 'error' });
+      throw error;
+    }
+  }
+
+  private replaceSnippets(
+    repositoryId: string,
+    changedDocIds: string[],
+    newDocuments: NewDocument[],
+    newSnippets: NewSnippet[]
+  ): void {
+    // Single transaction: delete old → insert new
+    this.db.transaction(() => {
+      if (changedDocIds.length > 0) {
+        // Cascade deletes snippets via FK constraint
+        this.db.prepare(
+          `DELETE FROM documents WHERE id IN (${changedDocIds.map(() => '?').join(',')})`
+        ).run(...changedDocIds);
+      }
+
+      for (const doc of newDocuments) {
+        this.insertDocument(doc);
+      }
+
+      for (const snippet of newSnippets) {
+        this.insertSnippet(snippet);
+      }
+    })();
+  }
+}
+```
+
+---
+
+## Progress Calculation
+
+```typescript
+function calculateProgress(
+  processedFiles: number,
+  totalFiles: number,
+  embeddingsDone: number,
+  embeddingsTotal: number,
+  hasEmbeddings: boolean
+): number {
+  if (totalFiles === 0) return 0;
+
+  if (!hasEmbeddings) {
+    // Crawl + parse = 100%
+    return Math.round((processedFiles / totalFiles) * 100);
+  }
+
+  // Crawl+parse = 80%, embeddings = 20%
+  const parseProgress = (processedFiles / totalFiles) * 80;
+  const embedProgress = embeddingsTotal > 0
+    ? (embeddingsDone / embeddingsTotal) * 20
+    : 0;
+
+  return Math.round(parseProgress + embedProgress);
+}
+```
+
+---
+
+## API Endpoints
+
+### `GET /api/v1/jobs/:id`
+
+Response `200`:
+```json
+{
+  "job": {
+    "id": "uuid",
+    "repositoryId": "/facebook/react",
+    "status": "running",
+    "progress": 47,
+    "totalFiles": 342,
+    "processedFiles": 162,
+    "error": null,
+    "startedAt": "2026-03-22T10:00:00Z",
+    "completedAt": null,
+    "createdAt": "2026-03-22T09:59:55Z"
+  }
+}
+```
+
+### `GET /api/v1/jobs`
+
+Query params: `repositoryId` (optional), `status` (optional), `limit` (default 20).
+
+Response `200`:
+```json
+{
+  "jobs": [...],
+  "total": 5
+}
+```
+
+---
+
+## Server Startup Recovery
+
+On application start, mark any jobs in `running` state as `failed` (they were interrupted by a process crash):
+
+```typescript
+function recoverStaleJobs(db: BetterSQLite3.Database): void {
+  db.prepare(`
+    UPDATE indexing_jobs
+    SET status = 'failed',
+        error = 'Server restarted while job was running',
+        completed_at = unixepoch()
+    WHERE status = 'running'
+  `).run();
+
+  // Also reset any repositories stuck in 'indexing' state
+  db.prepare(`
+    UPDATE repositories
+    SET state = 'error'
+    WHERE state = 'indexing'
+  `).run();
+}
+```
+
+---
+
+## Files to Create
+
+- `src/lib/server/pipeline/indexing.pipeline.ts`
+- `src/lib/server/pipeline/job-queue.ts`
+- `src/lib/server/pipeline/startup.ts` — recovery + queue initialization
+- `src/routes/api/v1/jobs/+server.ts` — GET list
+- `src/routes/api/v1/jobs/[id]/+server.ts` — GET single
+- `src/hooks.server.ts` — call startup on server init
+- `src/lib/server/pipeline/indexing.pipeline.test.ts`
--- a/docs/features/TRUEREF-0010.md
+++ b/docs/features/TRUEREF-0010.md
@@ -0,0 +1,261 @@
+# TRUEREF-0010 — REST API (Search & Context Endpoints)
+
+**Priority:** P0
+**Status:** Pending
+**Depends On:** TRUEREF-0006, TRUEREF-0009
+**Blocks:** TRUEREF-0011
+
+---
+
+## Overview
+
+Implement the public-facing REST API endpoints that replicate context7's `/api/v2/libs/search` and `/api/v2/context` interfaces. These are the endpoints that the MCP server, CLI tools, and external integrations call. The response format is designed to be compatible with context7's API surface so that tools built for context7 can be pointed at TrueRef with minimal changes.
+
+---
+
+## Acceptance Criteria
+
+- [ ] `GET /api/v1/libs/search` — search libraries by name (equivalent to context7's `/api/v2/libs/search`)
+- [ ] `GET /api/v1/context` — fetch documentation for a library (equivalent to context7's `/api/v2/context`)
+- [ ] Both endpoints support `type=json` (structured) and `type=txt` (plain text for LLMs)
+- [ ] `libraryId` supports `/owner/repo` (default branch) and `/owner/repo/version` (specific version)
+- [ ] Rules from `trueref.json` are prepended to context responses
+- [ ] CORS headers set (allow all origins) for browser-accessible usage
+- [ ] Response times < 500ms p99 for typical queries
+- [ ] Integration tests covering both endpoints
+
+---
+
+## Endpoint: `GET /api/v1/libs/search`
+
+**Equivalent to:** `GET https://context7.com/api/v2/libs/search`
+
+### Query Parameters
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `libraryName` | string | Yes | Library name to search for |
+| `query` | string | No | User's question for relevance ranking |
+| `limit` | integer | No | Max results (default: 10, max: 50) |
+
+### Response `200` (`type=json`, default):
+
+```json
+{
+  "results": [
+    {
+      "id": "/facebook/react",
+      "title": "React",
+      "description": "A JavaScript library for building user interfaces",
+      "branch": "main",
+      "lastUpdateDate": "2026-03-22T10:00:00Z",
+      "state": "finalized",
+      "totalTokens": 142000,
+      "totalSnippets": 1247,
+      "stars": 228000,
+      "trustScore": 9.2,
+      "benchmarkScore": 87,
+      "versions": ["v18.3.0", "v17.0.2"],
+      "source": "https://github.com/facebook/react"
+    }
+  ]
+}
+```
+
+Note: `state: "finalized"` maps from TrueRef's `state: "indexed"` for compatibility.
+
+### State Mapping
+
+| TrueRef state | context7 state |
+|---------------|---------------|
+| `pending` | `initial` |
+| `indexing` | `initial` |
+| `indexed` | `finalized` |
+| `error` | `error` |
+
+---
+
+## Endpoint: `GET /api/v1/context`
+
+**Equivalent to:** `GET https://context7.com/api/v2/context`
+
+### Query Parameters
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `libraryId` | string | Yes | Library ID, e.g. `/facebook/react` or `/facebook/react/v18.3.0` |
+| `query` | string | Yes | Specific question about the library |
+| `type` | string | No | `json` (default) or `txt` (plain text for LLM injection) |
+| `tokens` | integer | No | Approximate max token count for response (default: 10000) |
+
+### Response `200` (`type=json`):
+
+```json
+{
+  "snippets": [
+    {
+      "type": "code",
+      "title": "Basic Component",
+      "description": "Getting Started > Components",
+      "language": "tsx",
+      "codeList": [
+        {
+          "language": "tsx",
+          "code": "function MyComponent() {\n  return <div>Hello</div>;\n}"
+        }
+      ],
+      "id": "uuid",
+      "tokenCount": 45,
+      "pageTitle": "Getting Started"
+    },
+    {
+      "type": "info",
+      "text": "React components let you split the UI into independent...",
+      "breadcrumb": "Core Concepts > Components",
+      "pageId": "uuid",
+      "tokenCount": 120
+    }
+  ],
+  "rules": ["Always use functional components", "..."],
+  "totalTokens": 2840
+}
+```
+
+### Response `200` (`type=txt`):
+
+Plain text formatted for direct LLM context injection:
+
+```
+## Library Rules
+- Always use functional components
+- Use hooks for state management
+
+---
+
+### Basic Component
+*Getting Started > Components*
+
+```tsx
+function MyComponent() {
+  return <div>Hello</div>;
+}
+```
+
+---
+
+### React components let you split the UI...
+*Core Concepts > Components*
+
+React components let you split the UI into independent, reusable pieces...
+```
+
+---
+
+## LibraryId Parsing
+
+```typescript
+function parseLibraryId(libraryId: string): {
+  repositoryId: string;  // "/facebook/react"
+  version?: string;      // "v18.3.0"
+} {
+  // Match: /owner/repo or /owner/repo/version
+  const match = libraryId.match(/^(\/[^/]+\/[^/]+)(\/(.+))?$/);
+  if (!match) throw new Error(`Invalid libraryId: ${libraryId}`);
+
+  return {
+    repositoryId: match[1],
+    version: match[3],
+  };
+}
+```
+
+---
+
+## Token Budget
+
+The `tokens` parameter limits the total response size. Snippets are added greedily until the budget is exhausted:
+
+```typescript
+function selectSnippetsWithinBudget(
+  snippets: Snippet[],
+  maxTokens: number
+): Snippet[] {
+  const selected: Snippet[] = [];
+  let usedTokens = 0;
+
+  for (const snippet of snippets) {
+    if (usedTokens + (snippet.tokenCount ?? 0) > maxTokens) break;
+    selected.push(snippet);
+    usedTokens += snippet.tokenCount ?? 0;
+  }
+
+  return selected;
+}
+```
+
+Default token budget: 10,000 tokens (~7,500 words) — enough for ~20 medium snippets.
+
+---
+
+## Error Responses
+
+```json
+// 400 Bad Request
+{ "error": "libraryId is required", "code": "MISSING_PARAMETER" }
+
+// 404 Not Found
+{ "error": "Library /facebook/react not found or not yet indexed", "code": "LIBRARY_NOT_FOUND" }
+
+// 503 Service Unavailable
+{ "error": "Library is currently being indexed, please try again shortly", "code": "INDEXING_IN_PROGRESS" }
+```
+
+---
+
+## CORS Configuration
+
+All API routes include:
+```
+Access-Control-Allow-Origin: *
+Access-Control-Allow-Methods: GET, POST, PATCH, DELETE, OPTIONS
+Access-Control-Allow-Headers: Content-Type, Authorization
+```
+
+Set via `src/hooks.server.ts` handle function.
+
+---
+
+## SvelteKit Route Structure
+
+```
+src/routes/api/v1/
+  libs/
+    search/
+      +server.ts          — GET /api/v1/libs/search
+    +server.ts            — GET (list), POST (add)
+    [id]/
+      +server.ts          — GET, PATCH, DELETE
+      index/
+        +server.ts        — POST (trigger indexing)
+  context/
+    +server.ts            — GET /api/v1/context
+  jobs/
+    +server.ts            — GET (list jobs)
+    [id]/
+      +server.ts          — GET (single job)
+  settings/
+    embedding/
+      +server.ts          — GET, PUT
+```
+
+---
+
+## Files to Create
+
+- `src/routes/api/v1/libs/search/+server.ts`
+- `src/routes/api/v1/context/+server.ts`
+- `src/lib/server/api/formatters.ts` — JSON ↔ txt response formatting
+- `src/lib/server/api/library-id.ts` — ID parsing
+- `src/lib/server/api/token-budget.ts`
+- `src/hooks.server.ts` — CORS + startup recovery
+- Integration tests in `src/lib/server/api/*.test.ts`
--- a/docs/features/TRUEREF-0011.md
+++ b/docs/features/TRUEREF-0011.md
@@ -0,0 +1,320 @@
+# TRUEREF-0011 — MCP Server (stdio Transport)
+
+**Priority:** P0
+**Status:** Pending
+**Depends On:** TRUEREF-0010
+**Blocks:** TRUEREF-0012
+
+---
+
+## Overview
+
+Implement a Model Context Protocol (MCP) server that exposes `resolve-library-id` and `query-docs` tools via stdio transport. This is the primary integration point for AI coding assistants (Claude Code, Cursor, Zed, etc.). The tool names, input schemas, and output formats are intentionally identical to context7's MCP tools to enable drop-in compatibility.
+
+---
+
+## Acceptance Criteria
+
+- [ ] MCP server binary entry point (`src/mcp/index.ts`) runnable via `node` or `tsx`
+- [ ] `resolve-library-id` tool implemented with identical schema to context7
+- [ ] `query-docs` tool implemented with identical schema to context7
+- [ ] Both tools call the local TrueRef REST API (configurable base URL)
+- [ ] Server identifies as `io.github.trueref/trueref` to Claude Code
+- [ ] stdio transport via `@modelcontextprotocol/sdk`
+- [ ] `TRUEREF_API_URL` env var configures the base URL (default: `http://localhost:5173`)
+- [ ] npm script `mcp:start` in `package.json`
+- [ ] Instructions for adding to Claude Code `.mcp.json` in README
+- [ ] Integration test that starts the MCP server and exercises both tools
+
+---
+
+## Dependencies to Add
+
+```json
+{
+  "@modelcontextprotocol/sdk": "^1.25.1",
+  "zod": "^4.3.4"
+}
+```
+
+---
+
+## MCP Server Implementation
+
+```typescript
+// src/mcp/index.ts
+
+import { Server } from '@modelcontextprotocol/sdk/server/index.js';
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+import {
+  CallToolRequestSchema,
+  ListToolsRequestSchema,
+} from '@modelcontextprotocol/sdk/types.js';
+import { z } from 'zod';
+
+const API_BASE = process.env.TRUEREF_API_URL ?? 'http://localhost:5173';
+
+const server = new Server(
+  {
+    name: 'io.github.trueref/trueref',
+    version: '1.0.0',
+  },
+  {
+    capabilities: { tools: {} },
+  }
+);
+
+// Tool schemas — identical to context7 for drop-in compatibility
+const ResolveLibraryIdSchema = z.object({
+  libraryName: z.string().describe(
+    'Library name to search for and resolve to a TrueRef library ID'
+  ),
+  query: z.string().describe(
+    "The user's question or context to help rank results"
+  ),
+});
+
+const QueryDocsSchema = z.object({
+  libraryId: z.string().describe(
+    'The TrueRef library ID obtained from resolve-library-id, e.g. /facebook/react'
+  ),
+  query: z.string().describe(
+    'Specific question about the library to retrieve relevant documentation'
+  ),
+  tokens: z.number().optional().describe(
+    'Maximum token budget for the response (default: 10000)'
+  ),
+});
+
+server.setRequestHandler(ListToolsRequestSchema, async () => ({
+  tools: [
+    {
+      name: 'resolve-library-id',
+      description: [
+        'Searches TrueRef to find a library matching the given name.',
+        'Returns a list of matching libraries with their IDs.',
+        'ALWAYS call this tool before query-docs to get the correct library ID.',
+        'Call at most 3 times per user question.',
+      ].join(' '),
+      inputSchema: {
+        type: 'object',
+        properties: {
+          libraryName: {
+            type: 'string',
+            description: 'Library name to search for',
+          },
+          query: {
+            type: 'string',
+            description: "User's question for relevance ranking",
+          },
+        },
+        required: ['libraryName', 'query'],
+      },
+    },
+    {
+      name: 'query-docs',
+      description: [
+        'Fetches documentation and code examples from TrueRef for a specific library.',
+        'Requires a library ID obtained from resolve-library-id.',
+        'Returns relevant snippets formatted for LLM consumption.',
+        'Call at most 3 times per user question.',
+      ].join(' '),
+      inputSchema: {
+        type: 'object',
+        properties: {
+          libraryId: {
+            type: 'string',
+            description: 'TrueRef library ID, e.g. /facebook/react',
+          },
+          query: {
+            type: 'string',
+            description: 'Specific question about the library',
+          },
+          tokens: {
+            type: 'number',
+            description: 'Max token budget (default: 10000)',
+          },
+        },
+        required: ['libraryId', 'query'],
+      },
+    },
+  ],
+}));
+
+server.setRequestHandler(CallToolRequestSchema, async (request) => {
+  const { name, arguments: args } = request.params;
+
+  if (name === 'resolve-library-id') {
+    const { libraryName, query } = ResolveLibraryIdSchema.parse(args);
+
+    const url = new URL(`${API_BASE}/api/v1/libs/search`);
+    url.searchParams.set('libraryName', libraryName);
+    url.searchParams.set('query', query);
+    url.searchParams.set('type', 'txt');
+
+    const response = await fetch(url.toString());
+    if (!response.ok) {
+      return {
+        content: [{
+          type: 'text',
+          text: `Error searching libraries: ${response.status} ${response.statusText}`,
+        }],
+        isError: true,
+      };
+    }
+
+    const text = await response.text();
+    return {
+      content: [{ type: 'text', text }],
+    };
+  }
+
+  if (name === 'query-docs') {
+    const { libraryId, query, tokens } = QueryDocsSchema.parse(args);
+
+    const url = new URL(`${API_BASE}/api/v1/context`);
+    url.searchParams.set('libraryId', libraryId);
+    url.searchParams.set('query', query);
+    url.searchParams.set('type', 'txt');
+    if (tokens) url.searchParams.set('tokens', String(tokens));
+
+    const response = await fetch(url.toString());
+    if (!response.ok) {
+      const status = response.status;
+      if (status === 404) {
+        return {
+          content: [{
+            type: 'text',
+            text: `Library "${libraryId}" not found. Please run resolve-library-id first.`,
+          }],
+          isError: true,
+        };
+      }
+      if (status === 503) {
+        return {
+          content: [{
+            type: 'text',
+            text: `Library "${libraryId}" is currently being indexed. Please try again in a moment.`,
+          }],
+          isError: true,
+        };
+      }
+      return {
+        content: [{
+          type: 'text',
+          text: `Error fetching documentation: ${response.status} ${response.statusText}`,
+        }],
+        isError: true,
+      };
+    }
+
+    const text = await response.text();
+    return {
+      content: [{ type: 'text', text }],
+    };
+  }
+
+  return {
+    content: [{ type: 'text', text: `Unknown tool: ${name}` }],
+    isError: true,
+  };
+});
+
+async function main() {
+  const transport = new StdioServerTransport();
+  await server.connect(transport);
+  // Server runs until process exits
+}
+
+main().catch((err) => {
+  process.stderr.write(`MCP server error: ${err.message}\n`);
+  process.exit(1);
+});
+```
+
+---
+
+## Package.json Scripts
+
+```json
+{
+  "scripts": {
+    "mcp:start": "node --experimental-vm-modules src/mcp/index.ts"
+  }
+}
+```
+
+Or with `tsx` for TypeScript-direct execution:
+```json
+{
+  "scripts": {
+    "mcp:start": "tsx src/mcp/index.ts"
+  }
+}
+```
+
+---
+
+## Claude Code Integration
+
+Users add to `.mcp.json`:
+
+```json
+{
+  "mcpServers": {
+    "trueref": {
+      "command": "node",
+      "args": ["/path/to/trueref/dist/mcp/index.js"],
+      "env": {
+        "TRUEREF_API_URL": "http://localhost:5173"
+      }
+    }
+  }
+}
+```
+
+Or with tsx for development:
+```json
+{
+  "mcpServers": {
+    "trueref": {
+      "command": "npx",
+      "args": ["tsx", "/path/to/trueref/src/mcp/index.ts"],
+      "env": {
+        "TRUEREF_API_URL": "http://localhost:5173"
+      }
+    }
+  }
+}
+```
+
+---
+
+## System Prompt / Rules
+
+The MCP server should include a `resources` list item (optional) or the library responses themselves prepend rules. Additionally, users should add a Claude rule file:
+
+```markdown
+<!-- .claude/rules/trueref.md -->
+---
+description: Use TrueRef to retrieve documentation for indexed libraries
+alwaysApply: true
+---
+
+When answering questions about indexed libraries, always use the TrueRef MCP tools:
+1. Call `resolve-library-id` with the library name and the user's question to get the library ID
+2. Call `query-docs` with the library ID and question to retrieve relevant documentation
+3. Use the returned documentation to answer the question accurately
+
+Never rely on training data alone for library APIs that may have changed.
+```
+
+---
+
+## Files to Create
+
+- `src/mcp/index.ts` — MCP server entry point
+- `src/mcp/tools/resolve-library-id.ts` — tool handler
+- `src/mcp/tools/query-docs.ts` — tool handler
+- `src/mcp/client.ts` — HTTP client for TrueRef API
+- `.claude/rules/trueref.md` — Claude Code rule file
--- a/docs/features/TRUEREF-0012.md
+++ b/docs/features/TRUEREF-0012.md
@@ -0,0 +1,148 @@
+# TRUEREF-0012 — MCP Server (HTTP Transport)
+
+**Priority:** P1
+**Status:** Pending
+**Depends On:** TRUEREF-0011
+**Blocks:** —
+
+---
+
+## Overview
+
+Extend the MCP server to support an HTTP/SSE transport in addition to the stdio transport from TRUEREF-0011. This allows TrueRef to be used as a remote MCP server (similar to `mcp.context7.com`) without requiring local installation of the MCP binary on the client machine.
+
+---
+
+## Acceptance Criteria
+
+- [ ] HTTP transport alongside existing stdio transport (flag-based selection)
+- [ ] `--transport http` and `--port <n>` CLI flags
+- [ ] Streamable HTTP transport via `@modelcontextprotocol/sdk` `StreamableHTTPServerTransport`
+- [ ] `/mcp` endpoint handles MCP protocol over HTTP
+- [ ] `/ping` health check endpoint returns `{ "ok": true }`
+- [ ] Default port 3001 (to avoid conflict with SvelteKit dev server on 5173)
+- [ ] CORS headers on `/mcp` for browser-based clients
+- [ ] `npm run mcp:http` script for convenience
+- [ ] Docker-friendly: reads `PORT` env var
+
+---
+
+## CLI Interface
+
+```
+node dist/mcp/index.js [--transport stdio|http] [--port 3001]
+
+Options:
+  --transport  Transport mode: 'stdio' (default) or 'http'
+  --port       Port for HTTP transport (default: 3001, env: PORT)
+```
+
+---
+
+## Implementation
+
+```typescript
+// src/mcp/index.ts (extended)
+
+import { parseArgs } from 'node:util';
+import { createServer } from 'node:http';
+import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+
+const { values: args } = parseArgs({
+  options: {
+    transport: { type: 'string', default: 'stdio' },
+    port: { type: 'string', default: process.env.PORT ?? '3001' },
+  },
+});
+
+async function startHttp(server: Server, port: number): Promise<void> {
+  const httpServer = createServer(async (req, res) => {
+    const url = new URL(req.url!, `http://localhost:${port}`);
+
+    // Health check
+    if (url.pathname === '/ping') {
+      res.writeHead(200, { 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ ok: true }));
+      return;
+    }
+
+    // MCP endpoint
+    if (url.pathname === '/mcp') {
+      // CORS preflight
+      res.setHeader('Access-Control-Allow-Origin', '*');
+      res.setHeader('Access-Control-Allow-Methods', 'POST, GET, OPTIONS');
+      res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Accept');
+
+      if (req.method === 'OPTIONS') {
+        res.writeHead(204);
+        res.end();
+        return;
+      }
+
+      const transport = new StreamableHTTPServerTransport({
+        sessionIdGenerator: () => crypto.randomUUID(),
+      });
+
+      await server.connect(transport);
+      await transport.handleRequest(req, res);
+      return;
+    }
+
+    res.writeHead(404);
+    res.end('Not Found');
+  });
+
+  httpServer.listen(port, () => {
+    process.stderr.write(`TrueRef MCP server listening on http://localhost:${port}/mcp\n`);
+  });
+}
+
+async function main() {
+  const mcpServer = createMcpServer(); // shared server creation
+
+  if (args.transport === 'http') {
+    const port = parseInt(args.port!, 10);
+    await startHttp(mcpServer, port);
+  } else {
+    const transport = new StdioServerTransport();
+    await mcpServer.connect(transport);
+  }
+}
+```
+
+---
+
+## Package.json Scripts
+
+```json
+{
+  "scripts": {
+    "mcp:start": "tsx src/mcp/index.ts",
+    "mcp:http": "tsx src/mcp/index.ts --transport http --port 3001"
+  }
+}
+```
+
+---
+
+## Remote Integration (Claude Code)
+
+For HTTP transport, users configure Claude Code with the remote URL:
+
+```json
+{
+  "mcpServers": {
+    "trueref": {
+      "type": "http",
+      "url": "http://localhost:3001/mcp"
+    }
+  }
+}
+```
+
+---
+
+## Files to Modify
+
+- `src/mcp/index.ts` — add HTTP transport branch and CLI arg parsing
--- a/docs/features/TRUEREF-0013.md
+++ b/docs/features/TRUEREF-0013.md
@@ -0,0 +1,274 @@
+# TRUEREF-0013 — `trueref.json` Config File Support
+
+**Priority:** P0
+**Status:** Pending
+**Depends On:** TRUEREF-0003
+**Blocks:** TRUEREF-0009
+
+---
+
+## Overview
+
+Support `trueref.json` configuration files placed in the root of a repository. This file allows repository owners to control how their repo is indexed: which folders to include, which to exclude, best-practice rules to inject into LLM responses, and versioning information. Also support `context7.json` as an alias for backward compatibility.
+
+---
+
+## Acceptance Criteria
+
+- [ ] Parser for `trueref.json` and `context7.json` (same schema, `trueref.json` takes precedence)
+- [ ] Config applied during crawling to filter files (TRUEREF-0003, TRUEREF-0004)
+- [ ] Config stored in `repository_configs` table (TRUEREF-0001)
+- [ ] Config validated with descriptive error messages (invalid paths, oversized rules, etc.)
+- [ ] `rules` injected at the top of every `query-docs` response
+- [ ] `previousVersions` registered as `RepositoryVersion` records
+- [ ] JSON Schema published at `/api/v1/schema/trueref-config.json`
+- [ ] Unit tests for parser and validator
+
+---
+
+## Config File Schema
+
+```typescript
+// src/lib/server/config/trueref-config.schema.ts
+
+export interface TrueRefConfig {
+  /**
+   * Override the display name for this library.
+   * 1–100 characters.
+   */
+  projectTitle?: string;
+
+  /**
+   * Description of the library for search ranking.
+   * 10–500 characters.
+   */
+  description?: string;
+
+  /**
+   * Folders to include in indexing (allowlist).
+   * Each entry is a path prefix or regex string.
+   * If empty/absent, all folders are included.
+   * Examples: ["src/", "docs/", "^packages/core"]
+   */
+  folders?: string[];
+
+  /**
+   * Folders to exclude from indexing.
+   * Applied after `folders` allowlist.
+   * Examples: ["test/", "fixtures/", "__mocks__"]
+   */
+  excludeFolders?: string[];
+
+  /**
+   * Exact filenames to exclude (no path, no regex).
+   * Examples: ["README.md", "CHANGELOG.md", "jest.config.ts"]
+   */
+  excludeFiles?: string[];
+
+  /**
+   * Best practices / rules to inject at the top of every query-docs response.
+   * Each rule: 5–500 characters.
+   * Maximum 20 rules.
+   */
+  rules?: string[];
+
+  /**
+   * Previously released versions to make available for versioned queries.
+   */
+  previousVersions?: Array<{
+    tag: string;     // git tag (e.g. "v1.2.3")
+    title: string;   // human-readable (e.g. "Version 1.2.3")
+  }>;
+}
+```
+
+---
+
+## Validation Rules
+
+```typescript
+const CONFIG_CONSTRAINTS = {
+  projectTitle: { minLength: 1, maxLength: 100 },
+  description: { minLength: 10, maxLength: 500 },
+  folders: { maxItems: 50, maxLength: 200 },         // per entry
+  excludeFolders: { maxItems: 50, maxLength: 200 },
+  excludeFiles: { maxItems: 100, maxLength: 200 },
+  rules: { maxItems: 20, minLength: 5, maxLength: 500 },
+  previousVersions: { maxItems: 50 },
+  versionTag: { pattern: /^v?\d+\.\d+(\.\d+)?(-.*)?$/ },
+};
+```
+
+---
+
+## Parser Implementation
+
+```typescript
+// src/lib/server/config/config-parser.ts
+
+export interface ParsedConfig {
+  config: TrueRefConfig;
+  source: 'trueref.json' | 'context7.json';
+  warnings: string[];
+}
+
+export function parseConfigFile(content: string, filename: string): ParsedConfig {
+  let raw: unknown;
+
+  try {
+    raw = JSON.parse(content);
+  } catch (e) {
+    throw new ConfigParseError(`${filename} is not valid JSON: ${(e as Error).message}`);
+  }
+
+  if (typeof raw !== 'object' || raw === null) {
+    throw new ConfigParseError(`${filename} must be a JSON object`);
+  }
+
+  const config = raw as Record<string, unknown>;
+  const validated: TrueRefConfig = {};
+  const warnings: string[] = [];
+
+  // projectTitle
+  if (config.projectTitle !== undefined) {
+    if (typeof config.projectTitle !== 'string') {
+      warnings.push('projectTitle must be a string, ignoring');
+    } else if (config.projectTitle.length > 100) {
+      validated.projectTitle = config.projectTitle.slice(0, 100);
+      warnings.push('projectTitle truncated to 100 characters');
+    } else {
+      validated.projectTitle = config.projectTitle;
+    }
+  }
+
+  // description
+  if (config.description !== undefined) {
+    if (typeof config.description === 'string') {
+      validated.description = config.description.slice(0, 500);
+    }
+  }
+
+  // folders / excludeFolders / excludeFiles — validated as string arrays
+  for (const field of ['folders', 'excludeFolders', 'excludeFiles'] as const) {
+    if (config[field] !== undefined) {
+      if (!Array.isArray(config[field])) {
+        warnings.push(`${field} must be an array, ignoring`);
+      } else {
+        validated[field] = (config[field] as unknown[])
+          .filter((item): item is string => {
+            if (typeof item !== 'string') {
+              warnings.push(`${field} entry must be a string, skipping: ${item}`);
+              return false;
+            }
+            return true;
+          })
+          .slice(0, field === 'excludeFiles' ? 100 : 50);
+      }
+    }
+  }
+
+  // rules
+  if (config.rules !== undefined) {
+    if (Array.isArray(config.rules)) {
+      validated.rules = (config.rules as unknown[])
+        .filter((r): r is string => typeof r === 'string' && r.length >= 5)
+        .map(r => r.slice(0, 500))
+        .slice(0, 20);
+    }
+  }
+
+  // previousVersions
+  if (config.previousVersions !== undefined) {
+    if (Array.isArray(config.previousVersions)) {
+      validated.previousVersions = (config.previousVersions as unknown[])
+        .filter((v): v is { tag: string; title: string } =>
+          typeof v === 'object' && v !== null &&
+          typeof (v as Record<string, unknown>).tag === 'string' &&
+          typeof (v as Record<string, unknown>).title === 'string'
+        )
+        .slice(0, 50);
+    }
+  }
+
+  return {
+    config: validated,
+    source: filename.startsWith('trueref') ? 'trueref.json' : 'context7.json',
+    warnings,
+  };
+}
+```
+
+---
+
+## Integration with Crawlers
+
+During crawl, the crawler:
+
+1. Looks for `trueref.json` first, then `context7.json` in the root.
+2. Downloads and parses the config before downloading other files.
+3. Applies `folders`, `excludeFolders`, `excludeFiles` to the file list.
+4. Returns the parsed config alongside crawl results.
+
+The `IndexingPipeline` stores the config in `repository_configs` table and registers `previousVersions` as `RepositoryVersion` records (with state `pending`, triggering additional indexing jobs if configured).
+
+---
+
+## Rules Injection
+
+When `query-docs` returns results, `rules` from `repository_configs` are prepended:
+
+```typescript
+// In formatters.ts
+function buildContextResponse(
+  snippets: Snippet[],
+  config: RepositoryConfig | null
+): string {
+  const parts: string[] = [];
+
+  if (config?.rules?.length) {
+    parts.push(
+      '## Library Best Practices\n' +
+      config.rules.map(r => `- ${r}`).join('\n')
+    );
+  }
+
+  // ... append snippet content
+  return parts.join('\n\n---\n\n');
+}
+```
+
+---
+
+## JSON Schema Endpoint
+
+`GET /api/v1/schema/trueref-config.json`
+
+Returns the full JSON Schema for `trueref.json` so IDE validation (VS Code, etc.) can provide autocomplete:
+
+```json
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "TrueRef Repository Configuration",
+  "type": "object",
+  "properties": {
+    "projectTitle": {
+      "type": "string",
+      "minLength": 1,
+      "maxLength": 100,
+      "description": "Override the display name for this library"
+    },
+    ...
+  }
+}
+```
+
+---
+
+## Files to Create
+
+- `src/lib/server/config/config-parser.ts`
+- `src/lib/server/config/config-validator.ts`
+- `src/lib/server/config/trueref-config.schema.ts` — TypeScript types
+- `src/lib/server/config/trueref-config.json` — JSON Schema
+- `src/routes/api/v1/schema/trueref-config.json/+server.ts`
+- `src/lib/server/config/config-parser.test.ts`
--- a/docs/features/TRUEREF-0014.md
+++ b/docs/features/TRUEREF-0014.md
@@ -0,0 +1,185 @@
+# TRUEREF-0014 — Repository Version Management
+
+**Priority:** P1
+**Status:** Pending
+**Depends On:** TRUEREF-0003
+**Blocks:** —
+
+---
+
+## Overview
+
+Support indexing specific git tags and branches as distinct versioned snapshots of a repository. Users can query documentation for a specific version using the `/owner/repo/version` library ID format. Versions are registered via `trueref.json`'s `previousVersions` field or manually via the API.
+
+---
+
+## Acceptance Criteria
+
+- [ ] `GET /api/v1/libs/:id/versions` — list all indexed versions for a repository
+- [ ] `POST /api/v1/libs/:id/versions` — add a new version (tag or branch)
+- [ ] `DELETE /api/v1/libs/:id/versions/:versionTag` — remove a version and its snippets
+- [ ] `POST /api/v1/libs/:id/versions/:versionTag/index` — trigger indexing for a specific version
+- [ ] Version-specific queries: `/api/v1/context?libraryId=/facebook/react/v18.3.0`
+- [ ] Default branch queries: `/api/v1/context?libraryId=/facebook/react` (no version suffix)
+- [ ] `previousVersions` from `trueref.json` automatically registered during indexing (state: `pending`)
+- [ ] GitHub tag list endpoint used to validate tag existence before indexing
+- [ ] Version snippets stored with `versionId` FK; default branch snippets have `versionId = NULL`
+
+---
+
+## Version ID Convention
+
+```
+Version ID format: {repositoryId}/{tag}
+
+Examples:
+  /facebook/react/v18.3.0
+  /facebook/react/v17.0.2
+  /vercel/next.js/v14.3.0-canary.1
+```
+
+---
+
+## API Endpoints
+
+### `GET /api/v1/libs/:id/versions`
+
+Response `200`:
+```json
+{
+  "versions": [
+    {
+      "id": "/facebook/react/v18.3.0",
+      "repositoryId": "/facebook/react",
+      "tag": "v18.3.0",
+      "title": "React v18.3.0",
+      "state": "indexed",
+      "totalSnippets": 892,
+      "indexedAt": "2026-03-22T10:00:00Z"
+    }
+  ]
+}
+```
+
+### `POST /api/v1/libs/:id/versions`
+
+Request body:
+```json
+{
+  "tag": "v18.3.0",
+  "title": "React v18.3.0",
+  "autoIndex": true
+}
+```
+
+Response `201`:
+```json
+{
+  "version": { ...RepositoryVersion },
+  "job": { "id": "uuid", "status": "queued" }
+}
+```
+
+### `DELETE /api/v1/libs/:id/versions/:tag`
+
+Deletes the version record and all associated documents/snippets via cascade.
+Response `204`.
+
+### `POST /api/v1/libs/:id/versions/:tag/index`
+
+Queues an indexing job for this specific version tag.
+Response `202` with job details.
+
+---
+
+## GitHub Tag Discovery
+
+```typescript
+async function listGitHubTags(
+  owner: string,
+  repo: string,
+  token?: string
+): Promise<Array<{ name: string; commit: { sha: string } }>> {
+  const headers: Record<string, string> = {
+    'Accept': 'application/vnd.github.v3+json',
+    'User-Agent': 'TrueRef/1.0',
+  };
+  if (token) headers['Authorization'] = `Bearer ${token}`;
+
+  const response = await fetch(
+    `https://api.github.com/repos/${owner}/${repo}/tags?per_page=100`,
+    { headers }
+  );
+
+  if (!response.ok) throw new GitHubApiError(response.status);
+  return response.json();
+}
+```
+
+---
+
+## Query Routing
+
+In the search/context endpoints, the `libraryId` is parsed to extract the optional version:
+
+```typescript
+function resolveSearchTarget(libraryId: string): {
+  repositoryId: string;
+  versionId?: string;
+} {
+  const { repositoryId, version } = parseLibraryId(libraryId);
+
+  if (!version) {
+    // Query default branch: versionId = NULL
+    return { repositoryId };
+  }
+
+  // Look up versionId from tag
+  const versionRecord = db.prepare(
+    `SELECT id FROM repository_versions WHERE repository_id = ? AND tag = ?`
+  ).get(repositoryId, version) as { id: string } | undefined;
+
+  if (!versionRecord) {
+    throw new NotFoundError(
+      `Version "${version}" not found for library "${repositoryId}"`
+    );
+  }
+
+  return { repositoryId, versionId: versionRecord.id };
+}
+```
+
+Snippets with `version_id IS NULL` belong to the default branch; snippets with a `version_id` belong to that specific version. Search queries filter by `version_id = ?` or `version_id IS NULL` accordingly.
+
+---
+
+## Version Service
+
+```typescript
+export class VersionService {
+  constructor(private db: BetterSQLite3.Database) {}
+
+  list(repositoryId: string): RepositoryVersion[]
+
+  add(repositoryId: string, tag: string, title?: string): RepositoryVersion
+
+  remove(repositoryId: string, tag: string): void
+
+  getByTag(repositoryId: string, tag: string): RepositoryVersion | null
+
+  registerFromConfig(
+    repositoryId: string,
+    previousVersions: { tag: string; title: string }[]
+  ): RepositoryVersion[]
+}
+```
+
+---
+
+## Files to Create
+
+- `src/lib/server/services/version.service.ts`
+- `src/routes/api/v1/libs/[id]/versions/+server.ts` — GET, POST
+- `src/routes/api/v1/libs/[id]/versions/[tag]/+server.ts` — DELETE
+- `src/routes/api/v1/libs/[id]/versions/[tag]/index/+server.ts` — POST
+- `src/lib/server/crawler/github-tags.ts`
--- a/docs/features/TRUEREF-0015.md
+++ b/docs/features/TRUEREF-0015.md
@@ -0,0 +1,311 @@
+# TRUEREF-0015 — Web UI: Repository Dashboard
+
+**Priority:** P1
+**Status:** Pending
+**Depends On:** TRUEREF-0002, TRUEREF-0009
+**Blocks:** TRUEREF-0016
+
+---
+
+## Overview
+
+Implement the main web interface for managing repositories. Built with SvelteKit and TailwindCSS v4. The dashboard lets users add repositories, view indexing status with live progress, trigger re-indexing, remove repositories, and view basic statistics.
+
+---
+
+## Acceptance Criteria
+
+- [ ] Repository list page at `/` showing all repositories with status, snippet count, last indexed date
+- [ ] Add repository modal/form (GitHub URL or local path input)
+- [ ] Per-repository card with: title, description, state badge, stats, action buttons
+- [ ] Live indexing progress bar (polls `GET /api/v1/jobs/:id` every 2s while running)
+- [ ] Trigger re-index button
+- [ ] Delete repository (with confirmation dialog)
+- [ ] View indexed versions per repository
+- [ ] Error state display (show error message when state = `error`)
+- [ ] Empty state (no repositories yet) with clear call-to-action
+- [ ] Responsive layout (mobile + desktop)
+- [ ] No page reloads — all interactions via `fetch` with SvelteKit load functions
+
+---
+
+## Page Structure
+
+```
+/ (root)
+├── Layout: navbar with TrueRef logo + nav links
+├── /
+│   └── Repository list + add button
+├── /repos/[id]
+│   └── Repository detail: versions, recent jobs, config
+└── /settings
+    └── Embedding provider configuration
+```
+
+---
+
+## Repository Card Component
+
+```svelte
+<!-- src/lib/components/RepositoryCard.svelte -->
+<script lang="ts">
+  import type { Repository } from '$lib/types';
+
+  let { repo, onReindex, onDelete } = $props<{
+    repo: Repository;
+    onReindex: (id: string) => void;
+    onDelete: (id: string) => void;
+  }>();
+
+  const stateColors = {
+    pending: 'bg-gray-100 text-gray-600',
+    indexing: 'bg-blue-100 text-blue-700',
+    indexed: 'bg-green-100 text-green-700',
+    error: 'bg-red-100 text-red-700',
+  };
+
+  const stateLabels = {
+    pending: 'Pending',
+    indexing: 'Indexing...',
+    indexed: 'Indexed',
+    error: 'Error',
+  };
+</script>
+
+<div class="rounded-xl border border-gray-200 bg-white p-5 shadow-sm">
+  <div class="flex items-start justify-between">
+    <div>
+      <h3 class="font-semibold text-gray-900">{repo.title}</h3>
+      <p class="mt-0.5 font-mono text-sm text-gray-500">{repo.id}</p>
+    </div>
+    <span class="rounded-full px-2.5 py-0.5 text-xs font-medium {stateColors[repo.state]}">
+      {stateLabels[repo.state]}
+    </span>
+  </div>
+
+  {#if repo.description}
+    <p class="mt-2 line-clamp-2 text-sm text-gray-600">{repo.description}</p>
+  {/if}
+
+  <div class="mt-4 flex gap-4 text-sm text-gray-500">
+    <span>{repo.totalSnippets.toLocaleString()} snippets</span>
+    <span>·</span>
+    <span>Trust: {repo.trustScore.toFixed(1)}/10</span>
+    {#if repo.stars}
+      <span>·</span>
+      <span>★ {repo.stars.toLocaleString()}</span>
+    {/if}
+  </div>
+
+  {#if repo.state === 'error'}
+    <p class="mt-2 text-xs text-red-600">Indexing failed. Check jobs for details.</p>
+  {/if}
+
+  <div class="mt-4 flex gap-2">
+    <button
+      onclick={() => onReindex(repo.id)}
+      class="rounded-lg bg-blue-600 px-3 py-1.5 text-sm text-white hover:bg-blue-700"
+      disabled={repo.state === 'indexing'}
+    >
+      {repo.state === 'indexing' ? 'Indexing...' : 'Re-index'}
+    </button>
+    <a
+      href="/repos/{encodeURIComponent(repo.id)}"
+      class="rounded-lg border border-gray-200 px-3 py-1.5 text-sm text-gray-700 hover:bg-gray-50"
+    >
+      Details
+    </a>
+    <button
+      onclick={() => onDelete(repo.id)}
+      class="ml-auto rounded-lg px-3 py-1.5 text-sm text-red-600 hover:bg-red-50"
+    >
+      Delete
+    </button>
+  </div>
+</div>
+```
+
+---
+
+## Add Repository Modal
+
+```svelte
+<!-- src/lib/components/AddRepositoryModal.svelte -->
+<script lang="ts">
+  let { onClose, onAdded } = $props<{
+    onClose: () => void;
+    onAdded: () => void;
+  }>();
+
+  let source = $state<'github' | 'local'>('github');
+  let sourceUrl = $state('');
+  let githubToken = $state('');
+  let loading = $state(false);
+  let error = $state<string | null>(null);
+
+  async function handleSubmit() {
+    loading = true;
+    error = null;
+    try {
+      const res = await fetch('/api/v1/libs', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ source, sourceUrl, githubToken: githubToken || undefined }),
+      });
+      if (!res.ok) {
+        const data = await res.json();
+        throw new Error(data.error ?? 'Failed to add repository');
+      }
+      onAdded();
+      onClose();
+    } catch (e) {
+      error = (e as Error).message;
+    } finally {
+      loading = false;
+    }
+  }
+</script>
+
+<dialog class="modal" open>
+  <div class="modal-box max-w-md">
+    <h2 class="mb-4 text-lg font-semibold">Add Repository</h2>
+
+    <div class="mb-4 flex gap-2">
+      <button
+        class="flex-1 rounded-lg py-2 text-sm {source === 'github' ? 'bg-blue-600 text-white' : 'border border-gray-200 text-gray-700'}"
+        onclick={() => source = 'github'}
+      >GitHub</button>
+      <button
+        class="flex-1 rounded-lg py-2 text-sm {source === 'local' ? 'bg-blue-600 text-white' : 'border border-gray-200 text-gray-700'}"
+        onclick={() => source = 'local'}
+      >Local Path</button>
+    </div>
+
+    <label class="block">
+      <span class="text-sm font-medium text-gray-700">
+        {source === 'github' ? 'GitHub URL' : 'Absolute Path'}
+      </span>
+      <input
+        type="text"
+        bind:value={sourceUrl}
+        placeholder={source === 'github'
+          ? 'https://github.com/facebook/react'
+          : '/home/user/projects/my-sdk'}
+        class="mt-1 w-full rounded-lg border border-gray-300 px-3 py-2 text-sm"
+      />
+    </label>
+
+    {#if source === 'github'}
+      <label class="mt-3 block">
+        <span class="text-sm font-medium text-gray-700">GitHub Token (optional, for private repos)</span>
+        <input
+          type="password"
+          bind:value={githubToken}
+          placeholder="ghp_..."
+          class="mt-1 w-full rounded-lg border border-gray-300 px-3 py-2 text-sm"
+        />
+      </label>
+    {/if}
+
+    {#if error}
+      <p class="mt-3 text-sm text-red-600">{error}</p>
+    {/if}
+
+    <div class="mt-6 flex justify-end gap-3">
+      <button onclick={onClose} class="rounded-lg border border-gray-200 px-4 py-2 text-sm">
+        Cancel
+      </button>
+      <button
+        onclick={handleSubmit}
+        disabled={loading || !sourceUrl}
+        class="rounded-lg bg-blue-600 px-4 py-2 text-sm text-white disabled:opacity-50"
+      >
+        {loading ? 'Adding...' : 'Add & Index'}
+      </button>
+    </div>
+  </div>
+</dialog>
+```
+
+---
+
+## Live Progress Component
+
+```svelte
+<!-- src/lib/components/IndexingProgress.svelte -->
+<script lang="ts">
+  import { onMount, onDestroy } from 'svelte';
+  import type { IndexingJob } from '$lib/types';
+
+  let { jobId } = $props<{ jobId: string }>();
+  let job = $state<IndexingJob | null>(null);
+  let interval: ReturnType<typeof setInterval>;
+
+  async function pollJob() {
+    const res = await fetch(`/api/v1/jobs/${jobId}`);
+    if (res.ok) {
+      const data = await res.json();
+      job = data.job;
+      if (job?.status === 'done' || job?.status === 'failed') {
+        clearInterval(interval);
+      }
+    }
+  }
+
+  onMount(() => {
+    pollJob();
+    interval = setInterval(pollJob, 2000);
+  });
+
+  onDestroy(() => clearInterval(interval));
+</script>
+
+{#if job}
+  <div class="mt-2">
+    <div class="flex justify-between text-xs text-gray-500">
+      <span>{job.processedFiles} / {job.totalFiles} files</span>
+      <span>{job.progress}%</span>
+    </div>
+    <div class="mt-1 h-1.5 w-full rounded-full bg-gray-200">
+      <div
+        class="h-1.5 rounded-full bg-blue-600 transition-all"
+        style="width: {job.progress}%"
+      ></div>
+    </div>
+    {#if job.status === 'failed'}
+      <p class="mt-1 text-xs text-red-600">{job.error}</p>
+    {/if}
+  </div>
+{/if}
+```
+
+---
+
+## Main Page Data Loading
+
+```typescript
+// src/routes/+page.server.ts
+import type { PageServerLoad } from './$types';
+
+export const load: PageServerLoad = async ({ fetch }) => {
+  const res = await fetch('/api/v1/libs');
+  const data = await res.json();
+  return { repositories: data.libraries };
+};
+```
+
+---
+
+## Files to Create
+
+- `src/routes/+page.svelte` — repository list
+- `src/routes/+page.server.ts` — load function
+- `src/routes/repos/[id]/+page.svelte` — repository detail
+- `src/routes/repos/[id]/+page.server.ts` — load function
+- `src/routes/settings/+page.svelte` — settings page
+- `src/lib/components/RepositoryCard.svelte`
+- `src/lib/components/AddRepositoryModal.svelte`
+- `src/lib/components/IndexingProgress.svelte`
+- `src/lib/components/ConfirmDialog.svelte`
+- `src/lib/components/StatBadge.svelte`
+- `src/routes/+layout.svelte` — nav + global layout
--- a/docs/features/TRUEREF-0016.md
+++ b/docs/features/TRUEREF-0016.md
@@ -0,0 +1,194 @@
+# TRUEREF-0016 — Web UI: Search Explorer
+
+**Priority:** P2
+**Status:** Pending
+**Depends On:** TRUEREF-0010, TRUEREF-0015
+**Blocks:** —
+
+---
+
+## Overview
+
+An interactive search interface within the web UI that lets users test the documentation retrieval system directly from the browser. Mirrors the two-step context7 flow: first resolve a library ID, then query documentation. Results are displayed with syntax highlighting. Useful for validating indexing quality and demonstrating the system to stakeholders.
+
+---
+
+## Acceptance Criteria
+
+- [ ] Search page at `/search` with two-step workflow
+- [ ] Step 1: Library name input → displays matching libraries with IDs and scores
+- [ ] Step 2: Click library → query input → displays ranked snippets
+- [ ] Syntax-highlighted code blocks (using a lightweight highlighter)
+- [ ] Snippet type badge (code vs info)
+- [ ] Breadcrumb display per snippet
+- [ ] Token count per snippet and total
+- [ ] "Copy as Markdown" button for the full response
+- [ ] Library switcher (return to step 1 without full page reload)
+- [ ] URL reflects current state (`/search?lib=/facebook/react&q=useState`)
+- [ ] No server-side rendering needed for this page (can be client-side)
+
+---
+
+## Page Layout
+
+```
+/search
+├── Header: "Search Documentation"
+├── Step 1: Library Search
+│   ├── Input: "Library name..." + Search button
+│   └── Results list: library cards with ID, description, snippet count, trust score
+│       └── [Click to select]
+├── Step 2: Documentation Query (shown after library selected)
+│   ├── Selected library badge + "Change" button
+│   ├── Input: "What would you like to know?" + Search button
+│   └── Results:
+│       ├── Token count summary
+│       ├── "Copy as Markdown" button
+│       └── Snippet list:
+│           ├── Code snippets: syntax-highlighted code block
+│           └── Info snippets: formatted markdown
+└── (loading states + empty states throughout)
+```
+
+---
+
+## Component: LibrarySearchResult
+
+```svelte
+<!-- src/lib/components/search/LibraryResult.svelte -->
+<script lang="ts">
+  let { result, onSelect } = $props<{
+    result: { id: string; title: string; description: string; totalSnippets: number; trustScore: number };
+    onSelect: (id: string) => void;
+  }>();
+</script>
+
+<button
+  onclick={() => onSelect(result.id)}
+  class="w-full rounded-xl border border-gray-200 bg-white p-4 text-left shadow-sm hover:border-blue-300 hover:shadow-md transition-all"
+>
+  <div class="flex items-center justify-between">
+    <span class="font-semibold text-gray-900">{result.title}</span>
+    <span class="text-xs text-gray-400">Trust {result.trustScore.toFixed(1)}/10</span>
+  </div>
+  <p class="font-mono text-xs text-gray-400">{result.id}</p>
+  {#if result.description}
+    <p class="mt-1.5 text-sm text-gray-600 line-clamp-2">{result.description}</p>
+  {/if}
+  <p class="mt-2 text-xs text-gray-400">{result.totalSnippets.toLocaleString()} snippets</p>
+</button>
+```
+
+---
+
+## Component: SnippetCard
+
+```svelte
+<!-- src/lib/components/search/SnippetCard.svelte -->
+<script lang="ts">
+  import type { Snippet } from '$lib/types';
+
+  let { snippet } = $props<{ snippet: Snippet }>();
+</script>
+
+<div class="rounded-xl border border-gray-200 bg-white overflow-hidden">
+  <div class="flex items-center justify-between border-b border-gray-100 px-4 py-2.5">
+    <div class="flex items-center gap-2">
+      {#if snippet.type === 'code'}
+        <span class="rounded bg-purple-100 px-1.5 py-0.5 text-xs text-purple-700">code</span>
+      {:else}
+        <span class="rounded bg-blue-100 px-1.5 py-0.5 text-xs text-blue-700">info</span>
+      {/if}
+      {#if snippet.title}
+        <span class="text-sm font-medium text-gray-800">{snippet.title}</span>
+      {/if}
+    </div>
+    <span class="text-xs text-gray-400">{snippet.tokenCount} tokens</span>
+  </div>
+
+  {#if snippet.breadcrumb}
+    <p class="bg-gray-50 px-4 py-1.5 text-xs text-gray-500 italic">{snippet.breadcrumb}</p>
+  {/if}
+
+  <div class="p-4">
+    {#if snippet.type === 'code'}
+      <pre class="overflow-x-auto rounded bg-gray-950 p-4 text-sm text-gray-100"><code>{snippet.content}</code></pre>
+    {:else}
+      <div class="prose prose-sm max-w-none text-gray-700">{snippet.content}</div>
+    {/if}
+  </div>
+</div>
+```
+
+---
+
+## Search Page Logic
+
+```svelte
+<!-- src/routes/search/+page.svelte -->
+<script lang="ts">
+  import { page } from '$app/stores';
+  import { goto } from '$app/navigation';
+
+  let libraryName = $state('');
+  let selectedLibraryId = $state<string | null>(null);
+  let query = $state('');
+  let libraryResults = $state<LibrarySearchResult[]>([]);
+  let snippets = $state<Snippet[]>([]);
+  let loadingLibraries = $state(false);
+  let loadingSnippets = $state(false);
+
+  async function searchLibraries() {
+    loadingLibraries = true;
+    const res = await fetch(
+      `/api/v1/libs/search?libraryName=${encodeURIComponent(libraryName)}&query=${encodeURIComponent(query)}`
+    );
+    const data = await res.json();
+    libraryResults = data.results;
+    loadingLibraries = false;
+  }
+
+  async function searchDocs() {
+    if (!selectedLibraryId) return;
+    loadingSnippets = true;
+    const url = new URL('/api/v1/context', window.location.origin);
+    url.searchParams.set('libraryId', selectedLibraryId);
+    url.searchParams.set('query', query);
+    const res = await fetch(url);
+    const data = await res.json();
+    snippets = data.snippets;
+    loadingSnippets = false;
+
+    // Update URL
+    goto(`/search?lib=${encodeURIComponent(selectedLibraryId)}&q=${encodeURIComponent(query)}`, {
+      replaceState: true,
+      keepFocus: true,
+    });
+  }
+</script>
+```
+
+---
+
+## Syntax Highlighting
+
+Use a minimal, zero-dependency approach for v1 — wrap code blocks in `<pre><code>` with a CSS-based theme. Optionally integrate `highlight.js` (lightweight) as an enhancement:
+
+```typescript
+// Optional: lazy-load highlight.js only when code snippets are present
+async function highlightCode(code: string, language: string): Promise<string> {
+  const hljs = await import('highlight.js/lib/core');
+  // Register only needed languages
+  return hljs.highlight(code, { language }).value;
+}
+```
+
+---
+
+## Files to Create
+
+- `src/routes/search/+page.svelte`
+- `src/lib/components/search/LibraryResult.svelte`
+- `src/lib/components/search/SnippetCard.svelte`
+- `src/lib/components/search/SearchInput.svelte`
+- `src/lib/utils/copy-to-clipboard.ts`
--- a/docs/features/TRUEREF-0017.md
+++ b/docs/features/TRUEREF-0017.md
@@ -0,0 +1,136 @@
+# TRUEREF-0017 — Incremental Re-indexing (Checksum Diff)
+
+**Priority:** P1
+**Status:** Pending
+**Depends On:** TRUEREF-0009
+**Blocks:** —
+
+---
+
+## Overview
+
+Optimize re-indexing by skipping files that haven't changed since the last indexing run. Uses file checksums (SHA-256) to detect changes. Only modified, added, or deleted files trigger parser/embedding work. This dramatically reduces re-indexing time for large repositories.
+
+---
+
+## Acceptance Criteria
+
+- [ ] Checksum comparison before parsing each file
+- [ ] Unchanged files reuse existing `Document` and `Snippet` records (no re-parse, no re-embed)
+- [ ] New files: full parse + embed
+- [ ] Modified files: delete old snippets, parse new ones, re-embed
+- [ ] Deleted files (present in DB but not in new crawl): delete documents and snippets
+- [ ] Job progress reflects total files (including skipped), not just processed
+- [ ] Statistics updated correctly after incremental run
+- [ ] Integration test covering unchanged, modified, added, and deleted files
+
+---
+
+## Diff Algorithm
+
+```typescript
+interface FileDiff {
+  added: CrawledFile[];      // new files not in DB
+  modified: CrawledFile[];   // files with changed checksum
+  deleted: string[];         // file paths in DB but not in crawl
+  unchanged: string[];       // file paths with matching checksum
+}
+
+function computeDiff(
+  crawledFiles: CrawledFile[],
+  existingDocs: Document[]   // documents currently in DB for this repo
+): FileDiff {
+  const existingMap = new Map(existingDocs.map(d => [d.filePath, d]));
+  const crawledMap = new Map(crawledFiles.map(f => [f.path, f]));
+
+  const added: CrawledFile[] = [];
+  const modified: CrawledFile[] = [];
+  const unchanged: string[] = [];
+
+  for (const file of crawledFiles) {
+    const existing = existingMap.get(file.path);
+    if (!existing) {
+      added.push(file);
+    } else if (existing.checksum !== file.sha) {
+      modified.push(file);
+    } else {
+      unchanged.push(file.path);
+    }
+  }
+
+  const deleted = existingDocs
+    .filter(doc => !crawledMap.has(doc.filePath))
+    .map(doc => doc.filePath);
+
+  return { added, modified, deleted, unchanged };
+}
+```
+
+---
+
+## Integration with IndexingPipeline
+
+```typescript
+// In IndexingPipeline.run(), after crawling:
+
+const existingDocs = this.getExistingDocuments(repo.id, job.versionId);
+const diff = computeDiff(crawledResult.files, existingDocs);
+
+// Log diff summary
+this.updateJob(job.id, {
+  totalFiles: crawledResult.files.length,
+});
+
+// Process only changed/new files
+const filesToProcess = [...diff.added, ...diff.modified];
+const newSnippets: NewSnippet[] = [];
+const newDocuments: NewDocument[] = [];
+const docIdsToDelete: string[] = [];
+
+// Map modified files to their existing document IDs for deletion
+for (const file of diff.modified) {
+  const existing = existingDocs.find(d => d.filePath === file.path);
+  if (existing) docIdsToDelete.push(existing.id);
+}
+
+// Map deleted file paths to document IDs
+for (const filePath of diff.deleted) {
+  const existing = existingDocs.find(d => d.filePath === filePath);
+  if (existing) docIdsToDelete.push(existing.id);
+}
+
+// Parse new/modified files
+for (const [i, file] of filesToProcess.entries()) {
+  const docId = crypto.randomUUID();
+  newDocuments.push({ id: docId, ...buildDocument(file, repo.id, job.versionId) });
+  newSnippets.push(...parseFile(file, { repositoryId: repo.id, documentId: docId }));
+
+  // Count ALL files (including skipped) in progress
+  const totalProcessed = diff.unchanged.length + i + 1;
+  const progress = Math.round((totalProcessed / crawledResult.files.length) * 80);
+  this.updateJob(job.id, {
+    processedFiles: totalProcessed,
+    progress,
+  });
+}
+
+// Atomic replacement of only changed documents
+this.replaceSnippets(repo.id, docIdsToDelete, newDocuments, newSnippets);
+```
+
+---
+
+## Performance Impact
+
+For a typical repository with 1,000 files where 50 changed:
+- **Without incremental**: 1,000 files parsed + 1,000 embed batches
+- **With incremental**: 50 files parsed + 50 embed batches
+- Estimated speedup: ~20x for re-indexing
+
+---
+
+## Files to Modify
+
+- `src/lib/server/pipeline/indexing.pipeline.ts` — add diff computation
+- `src/lib/server/pipeline/diff.ts` — `computeDiff` function (new file)
+- `src/lib/server/pipeline/diff.test.ts` — unit tests
--- a/docs/features/TRUEREF-0018.md
+++ b/docs/features/TRUEREF-0018.md
@@ -0,0 +1,213 @@
+# TRUEREF-0018 — Embedding Provider Configuration UI
+
+**Priority:** P2
+**Status:** Pending
+**Depends On:** TRUEREF-0007, TRUEREF-0015
+**Blocks:** —
+
+---
+
+## Overview
+
+A settings page within the web UI that allows users to configure the embedding provider without editing environment variables or config files. Supports switching between "None" (FTS5-only), OpenAI-compatible API, and local model (if available). Includes a live connectivity test before saving.
+
+---
+
+## Acceptance Criteria
+
+- [ ] Settings page at `/settings` with embedding provider section
+- [ ] Provider selector: None / OpenAI-compatible / Local model
+- [ ] OpenAI provider form: base URL, API key (masked), model name, dimensions
+- [ ] "Test Connection" button that validates the API key and model before saving
+- [ ] Success/error feedback from connection test
+- [ ] Save configuration (calls `PUT /api/v1/settings/embedding`)
+- [ ] Current configuration loaded from `GET /api/v1/settings/embedding`
+- [ ] Warning shown when "None" is selected (search will be FTS5-only, lower quality)
+- [ ] Local model option shows whether `@xenova/transformers` is installed
+- [ ] Preset buttons for common providers (OpenAI, Ollama, Azure OpenAI)
+
+---
+
+## Provider Presets
+
+```typescript
+const PROVIDER_PRESETS = [
+  {
+    name: 'OpenAI',
+    baseUrl: 'https://api.openai.com/v1',
+    model: 'text-embedding-3-small',
+    dimensions: 1536,
+  },
+  {
+    name: 'Ollama (local)',
+    baseUrl: 'http://localhost:11434/v1',
+    model: 'nomic-embed-text',
+    dimensions: 768,
+  },
+  {
+    name: 'Azure OpenAI',
+    baseUrl: 'https://{resource}.openai.azure.com/openai/deployments/{deployment}/v1',
+    model: 'text-embedding-3-small',
+    dimensions: 1536,
+  },
+];
+```
+
+---
+
+## Settings Page Component
+
+```svelte
+<!-- src/routes/settings/+page.svelte -->
+<script lang="ts">
+  let provider = $state<'none' | 'openai' | 'local'>('none');
+  let baseUrl = $state('https://api.openai.com/v1');
+  let apiKey = $state('');
+  let model = $state('text-embedding-3-small');
+  let dimensions = $state<number | undefined>(1536);
+  let testStatus = $state<'idle' | 'testing' | 'ok' | 'error'>('idle');
+  let testError = $state<string | null>(null);
+  let saving = $state(false);
+
+  async function testConnection() {
+    testStatus = 'testing';
+    testError = null;
+    try {
+      const res = await fetch('/api/v1/settings/embedding/test', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ provider, openai: { baseUrl, apiKey, model, dimensions } }),
+      });
+      if (res.ok) {
+        testStatus = 'ok';
+      } else {
+        const data = await res.json();
+        testStatus = 'error';
+        testError = data.error;
+      }
+    } catch (e) {
+      testStatus = 'error';
+      testError = (e as Error).message;
+    }
+  }
+
+  async function save() {
+    saving = true;
+    await fetch('/api/v1/settings/embedding', {
+      method: 'PUT',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ provider, openai: { baseUrl, apiKey, model, dimensions } }),
+    });
+    saving = false;
+  }
+</script>
+
+<div class="mx-auto max-w-2xl py-8">
+  <h1 class="mb-6 text-2xl font-bold text-gray-900">Settings</h1>
+
+  <section class="rounded-xl border border-gray-200 bg-white p-6">
+    <h2 class="mb-1 text-lg font-semibold">Embedding Provider</h2>
+    <p class="mb-4 text-sm text-gray-500">
+      Embeddings enable semantic search. Without them, only keyword search (FTS5) is used.
+    </p>
+
+    <div class="mb-4 flex gap-2">
+      {#each ['none', 'openai', 'local'] as p}
+        <button
+          onclick={() => provider = p}
+          class="rounded-lg px-4 py-2 text-sm {provider === p
+            ? 'bg-blue-600 text-white'
+            : 'border border-gray-200 text-gray-700 hover:bg-gray-50'}"
+        >
+          {p === 'none' ? 'None (FTS5 only)' : p === 'openai' ? 'OpenAI-compatible' : 'Local Model'}
+        </button>
+      {/each}
+    </div>
+
+    {#if provider === 'none'}
+      <div class="rounded-lg bg-amber-50 border border-amber-200 p-3 text-sm text-amber-700">
+        Search will use keyword matching only. Results may be less relevant for complex questions.
+      </div>
+    {/if}
+
+    {#if provider === 'openai'}
+      <div class="space-y-3">
+        <!-- Preset buttons -->
+        <div class="flex gap-2 flex-wrap">
+          {#each PROVIDER_PRESETS as preset}
+            <button
+              onclick={() => { baseUrl = preset.baseUrl; model = preset.model; dimensions = preset.dimensions; }}
+              class="rounded border border-gray-200 px-2.5 py-1 text-xs text-gray-600 hover:bg-gray-50"
+            >
+              {preset.name}
+            </button>
+          {/each}
+        </div>
+
+        <label class="block">
+          <span class="text-sm font-medium">Base URL</span>
+          <input type="text" bind:value={baseUrl} class="mt-1 w-full rounded-lg border px-3 py-2 text-sm" />
+        </label>
+
+        <label class="block">
+          <span class="text-sm font-medium">API Key</span>
+          <input type="password" bind:value={apiKey} class="mt-1 w-full rounded-lg border px-3 py-2 text-sm" placeholder="sk-..." />
+        </label>
+
+        <label class="block">
+          <span class="text-sm font-medium">Model</span>
+          <input type="text" bind:value={model} class="mt-1 w-full rounded-lg border px-3 py-2 text-sm" />
+        </label>
+
+        <label class="block">
+          <span class="text-sm font-medium">Dimensions (optional override)</span>
+          <input type="number" bind:value={dimensions} class="mt-1 w-full rounded-lg border px-3 py-2 text-sm" />
+        </label>
+
+        <div class="flex items-center gap-3">
+          <button onclick={testConnection} class="rounded-lg border border-gray-300 px-3 py-1.5 text-sm">
+            {testStatus === 'testing' ? 'Testing...' : 'Test Connection'}
+          </button>
+          {#if testStatus === 'ok'}
+            <span class="text-sm text-green-600">✓ Connection successful</span>
+          {:else if testStatus === 'error'}
+            <span class="text-sm text-red-600">✗ {testError}</span>
+          {/if}
+        </div>
+      </div>
+    {/if}
+
+    <div class="mt-6 flex justify-end">
+      <button
+        onclick={save}
+        disabled={saving}
+        class="rounded-lg bg-blue-600 px-4 py-2 text-sm text-white disabled:opacity-50"
+      >
+        {saving ? 'Saving...' : 'Save Settings'}
+      </button>
+    </div>
+  </section>
+</div>
+```
+
+---
+
+## Test Connection Endpoint
+
+`POST /api/v1/settings/embedding/test`
+
+Request body: same as `PUT /api/v1/settings/embedding`
+Action: create a provider instance, call `embed(['test'])`, return success/failure
+Response `200`: `{ "ok": true, "dimensions": 1536 }`
+Response `400`: `{ "error": "API key is invalid" }`
+
+---
+
+## Files to Create
+
+- `src/routes/settings/+page.svelte`
+- `src/routes/api/v1/settings/embedding/test/+server.ts`
+
+## Files to Modify
+
+- `src/routes/api/v1/settings/embedding/+server.ts` — already defined in TRUEREF-0007