chore: initial project scaffold

This commit is contained in:
Giancarmine Salucci
2026-03-22 17:08:15 +01:00
commit 18437dfa7c
53 changed files with 12002 additions and 0 deletions

View File

@@ -0,0 +1,274 @@
# TRUEREF-0013 — `trueref.json` Config File Support
**Priority:** P0
**Status:** Pending
**Depends On:** TRUEREF-0003
**Blocks:** TRUEREF-0009
---
## Overview
Support `trueref.json` configuration files placed in the root of a repository. This file allows repository owners to control how their repo is indexed: which folders to include, which to exclude, best-practice rules to inject into LLM responses, and versioning information. Also support `context7.json` as an alias for backward compatibility.
---
## Acceptance Criteria
- [ ] Parser for `trueref.json` and `context7.json` (same schema, `trueref.json` takes precedence)
- [ ] Config applied during crawling to filter files (TRUEREF-0003, TRUEREF-0004)
- [ ] Config stored in `repository_configs` table (TRUEREF-0001)
- [ ] Config validated with descriptive error messages (invalid paths, oversized rules, etc.)
- [ ] `rules` injected at the top of every `query-docs` response
- [ ] `previousVersions` registered as `RepositoryVersion` records
- [ ] JSON Schema published at `/api/v1/schema/trueref-config.json`
- [ ] Unit tests for parser and validator
---
## Config File Schema
```typescript
// src/lib/server/config/trueref-config.schema.ts
export interface TrueRefConfig {
/**
* Override the display name for this library.
* 1100 characters.
*/
projectTitle?: string;
/**
* Description of the library for search ranking.
* 10500 characters.
*/
description?: string;
/**
* Folders to include in indexing (allowlist).
* Each entry is a path prefix or regex string.
* If empty/absent, all folders are included.
* Examples: ["src/", "docs/", "^packages/core"]
*/
folders?: string[];
/**
* Folders to exclude from indexing.
* Applied after `folders` allowlist.
* Examples: ["test/", "fixtures/", "__mocks__"]
*/
excludeFolders?: string[];
/**
* Exact filenames to exclude (no path, no regex).
* Examples: ["README.md", "CHANGELOG.md", "jest.config.ts"]
*/
excludeFiles?: string[];
/**
* Best practices / rules to inject at the top of every query-docs response.
* Each rule: 5500 characters.
* Maximum 20 rules.
*/
rules?: string[];
/**
* Previously released versions to make available for versioned queries.
*/
previousVersions?: Array<{
tag: string; // git tag (e.g. "v1.2.3")
title: string; // human-readable (e.g. "Version 1.2.3")
}>;
}
```
---
## Validation Rules
```typescript
const CONFIG_CONSTRAINTS = {
projectTitle: { minLength: 1, maxLength: 100 },
description: { minLength: 10, maxLength: 500 },
folders: { maxItems: 50, maxLength: 200 }, // per entry
excludeFolders: { maxItems: 50, maxLength: 200 },
excludeFiles: { maxItems: 100, maxLength: 200 },
rules: { maxItems: 20, minLength: 5, maxLength: 500 },
previousVersions: { maxItems: 50 },
versionTag: { pattern: /^v?\d+\.\d+(\.\d+)?(-.*)?$/ },
};
```
---
## Parser Implementation
```typescript
// src/lib/server/config/config-parser.ts
export interface ParsedConfig {
config: TrueRefConfig;
source: 'trueref.json' | 'context7.json';
warnings: string[];
}
export function parseConfigFile(content: string, filename: string): ParsedConfig {
let raw: unknown;
try {
raw = JSON.parse(content);
} catch (e) {
throw new ConfigParseError(`${filename} is not valid JSON: ${(e as Error).message}`);
}
if (typeof raw !== 'object' || raw === null) {
throw new ConfigParseError(`${filename} must be a JSON object`);
}
const config = raw as Record<string, unknown>;
const validated: TrueRefConfig = {};
const warnings: string[] = [];
// projectTitle
if (config.projectTitle !== undefined) {
if (typeof config.projectTitle !== 'string') {
warnings.push('projectTitle must be a string, ignoring');
} else if (config.projectTitle.length > 100) {
validated.projectTitle = config.projectTitle.slice(0, 100);
warnings.push('projectTitle truncated to 100 characters');
} else {
validated.projectTitle = config.projectTitle;
}
}
// description
if (config.description !== undefined) {
if (typeof config.description === 'string') {
validated.description = config.description.slice(0, 500);
}
}
// folders / excludeFolders / excludeFiles — validated as string arrays
for (const field of ['folders', 'excludeFolders', 'excludeFiles'] as const) {
if (config[field] !== undefined) {
if (!Array.isArray(config[field])) {
warnings.push(`${field} must be an array, ignoring`);
} else {
validated[field] = (config[field] as unknown[])
.filter((item): item is string => {
if (typeof item !== 'string') {
warnings.push(`${field} entry must be a string, skipping: ${item}`);
return false;
}
return true;
})
.slice(0, field === 'excludeFiles' ? 100 : 50);
}
}
}
// rules
if (config.rules !== undefined) {
if (Array.isArray(config.rules)) {
validated.rules = (config.rules as unknown[])
.filter((r): r is string => typeof r === 'string' && r.length >= 5)
.map(r => r.slice(0, 500))
.slice(0, 20);
}
}
// previousVersions
if (config.previousVersions !== undefined) {
if (Array.isArray(config.previousVersions)) {
validated.previousVersions = (config.previousVersions as unknown[])
.filter((v): v is { tag: string; title: string } =>
typeof v === 'object' && v !== null &&
typeof (v as Record<string, unknown>).tag === 'string' &&
typeof (v as Record<string, unknown>).title === 'string'
)
.slice(0, 50);
}
}
return {
config: validated,
source: filename.startsWith('trueref') ? 'trueref.json' : 'context7.json',
warnings,
};
}
```
---
## Integration with Crawlers
During crawl, the crawler:
1. Looks for `trueref.json` first, then `context7.json` in the root.
2. Downloads and parses the config before downloading other files.
3. Applies `folders`, `excludeFolders`, `excludeFiles` to the file list.
4. Returns the parsed config alongside crawl results.
The `IndexingPipeline` stores the config in `repository_configs` table and registers `previousVersions` as `RepositoryVersion` records (with state `pending`, triggering additional indexing jobs if configured).
---
## Rules Injection
When `query-docs` returns results, `rules` from `repository_configs` are prepended:
```typescript
// In formatters.ts
function buildContextResponse(
snippets: Snippet[],
config: RepositoryConfig | null
): string {
const parts: string[] = [];
if (config?.rules?.length) {
parts.push(
'## Library Best Practices\n' +
config.rules.map(r => `- ${r}`).join('\n')
);
}
// ... append snippet content
return parts.join('\n\n---\n\n');
}
```
---
## JSON Schema Endpoint
`GET /api/v1/schema/trueref-config.json`
Returns the full JSON Schema for `trueref.json` so IDE validation (VS Code, etc.) can provide autocomplete:
```json
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "TrueRef Repository Configuration",
"type": "object",
"properties": {
"projectTitle": {
"type": "string",
"minLength": 1,
"maxLength": 100,
"description": "Override the display name for this library"
},
...
}
}
```
---
## Files to Create
- `src/lib/server/config/config-parser.ts`
- `src/lib/server/config/config-validator.ts`
- `src/lib/server/config/trueref-config.schema.ts` — TypeScript types
- `src/lib/server/config/trueref-config.json` — JSON Schema
- `src/routes/api/v1/schema/trueref-config.json/+server.ts`
- `src/lib/server/config/config-parser.test.ts`