feat(TRUEREF-0009-0010): implement indexing pipeline job queue and public REST API

- SQLite-backed job queue with sequential processing and startup recovery
- Atomic snippet replacement in single transaction
- context7-compatible GET /api/v1/libs/search and GET /api/v1/context
- Token budget limiting and JSON/txt response format support
- CORS headers on all API routes via SvelteKit handle hook
- Library ID parser supporting /owner/repo and /owner/repo/version

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Giancarmine Salucci
2026-03-23 09:06:35 +01:00
parent d3d577a2e2
commit 21f6acbfa3
9 changed files with 1007 additions and 2 deletions

View File

@@ -0,0 +1,277 @@
/**
* Unit tests for API formatters (TRUEREF-0010).
*
* Covers state mapping, library search JSON formatting, and context
* JSON/txt response formatting.
*/
import { describe, it, expect } from 'vitest';
import {
mapState,
formatLibrarySearchJson,
formatContextJson,
formatContextTxt
} from './formatters';
import type { LibrarySearchResult } from '$lib/server/search/search.service';
import type { SnippetSearchResult } from '$lib/server/search/search.service';
import type { Repository, RepositoryVersion, Snippet } from '$lib/types';
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function makeRepo(overrides: Partial<Repository> = {}): Repository {
return {
id: '/facebook/react',
title: 'React',
description: 'A JavaScript library for building user interfaces',
source: 'github',
sourceUrl: 'https://github.com/facebook/react',
branch: 'main',
state: 'indexed',
totalSnippets: 1247,
totalTokens: 142000,
trustScore: 9.2,
benchmarkScore: 87,
stars: 228000,
githubToken: null,
lastIndexedAt: new Date('2026-03-22T10:00:00Z'),
createdAt: new Date('2024-01-01T00:00:00Z'),
updatedAt: new Date('2026-03-22T10:00:00Z'),
...overrides
};
}
function makeVersion(tag: string): RepositoryVersion {
return {
id: `/facebook/react/${tag}`,
repositoryId: '/facebook/react',
tag,
title: null,
state: 'indexed',
totalSnippets: 100,
indexedAt: new Date(),
createdAt: new Date()
};
}
function makeSnippet(overrides: Partial<Snippet> = {}): Snippet {
return {
id: 'snippet-1',
documentId: 'doc-1',
repositoryId: '/facebook/react',
versionId: null,
type: 'code',
title: 'Basic Component',
content: 'function MyComponent() {\n return <div>Hello</div>;\n}',
language: 'tsx',
breadcrumb: 'Getting Started > Components',
tokenCount: 45,
createdAt: new Date(),
...overrides
};
}
function makeSnippetResult(snippet: Snippet): SnippetSearchResult {
return {
snippet,
score: -1.5,
repository: { id: snippet.repositoryId, title: 'React' }
};
}
// ---------------------------------------------------------------------------
// mapState
// ---------------------------------------------------------------------------
describe('mapState', () => {
it('maps indexed → finalized', () => {
expect(mapState('indexed')).toBe('finalized');
});
it('maps pending → initial', () => {
expect(mapState('pending')).toBe('initial');
});
it('maps indexing → initial', () => {
expect(mapState('indexing')).toBe('initial');
});
it('maps error → error', () => {
expect(mapState('error')).toBe('error');
});
});
// ---------------------------------------------------------------------------
// formatLibrarySearchJson
// ---------------------------------------------------------------------------
describe('formatLibrarySearchJson', () => {
it('returns results array with correct shape', () => {
const results: LibrarySearchResult[] = [
{
repository: makeRepo(),
versions: [makeVersion('v18.3.0'), makeVersion('v17.0.2')],
score: 150
}
];
const response = formatLibrarySearchJson(results);
expect(response.results).toHaveLength(1);
const r = response.results[0];
expect(r.id).toBe('/facebook/react');
expect(r.title).toBe('React');
expect(r.state).toBe('finalized');
expect(r.totalTokens).toBe(142000);
expect(r.totalSnippets).toBe(1247);
expect(r.versions).toEqual(['v18.3.0', 'v17.0.2']);
expect(r.stars).toBe(228000);
expect(r.lastUpdateDate).toBe('2026-03-22T10:00:00.000Z');
expect(r.source).toBe('https://github.com/facebook/react');
});
it('returns empty results array when no results', () => {
const response = formatLibrarySearchJson([]);
expect(response.results).toEqual([]);
});
it('maps non-indexed state to initial', () => {
const results: LibrarySearchResult[] = [
{ repository: makeRepo({ state: 'pending' }), versions: [], score: 0 }
];
const response = formatLibrarySearchJson(results);
expect(response.results[0].state).toBe('initial');
});
it('handles null lastIndexedAt', () => {
const results: LibrarySearchResult[] = [
{ repository: makeRepo({ lastIndexedAt: null }), versions: [], score: 0 }
];
const response = formatLibrarySearchJson(results);
expect(response.results[0].lastUpdateDate).toBeNull();
});
});
// ---------------------------------------------------------------------------
// formatContextJson
// ---------------------------------------------------------------------------
describe('formatContextJson', () => {
it('formats code snippets correctly', () => {
const snippet = makeSnippet({ type: 'code' });
const results = [makeSnippetResult(snippet)];
const response = formatContextJson(results, []);
expect(response.snippets).toHaveLength(1);
const s = response.snippets[0];
expect(s.type).toBe('code');
if (s.type === 'code') {
expect(s.title).toBe('Basic Component');
expect(s.language).toBe('tsx');
expect(s.codeList).toHaveLength(1);
expect(s.codeList[0].code).toContain('MyComponent');
expect(s.pageTitle).toBe('Getting Started');
}
});
it('formats info snippets correctly', () => {
const snippet = makeSnippet({
id: 'info-1',
type: 'info',
title: null,
content: 'React components let you split the UI...',
language: null,
breadcrumb: 'Core Concepts > Components'
});
const results = [makeSnippetResult(snippet)];
const response = formatContextJson(results, []);
const s = response.snippets[0];
expect(s.type).toBe('info');
if (s.type === 'info') {
expect(s.text).toContain('React components');
expect(s.breadcrumb).toBe('Core Concepts > Components');
expect(s.pageId).toBe('info-1');
}
});
it('includes rules in response', () => {
const rules = ['Always use functional components', 'Use hooks for state management'];
const response = formatContextJson([], rules);
expect(response.rules).toEqual(rules);
});
it('computes totalTokens correctly', () => {
const snippets = [
makeSnippetResult(makeSnippet({ id: 'a', tokenCount: 100 })),
makeSnippetResult(makeSnippet({ id: 'b', tokenCount: 200 }))
];
const response = formatContextJson(snippets, []);
expect(response.totalTokens).toBe(300);
});
it('handles null tokenCount in totalTokens sum', () => {
const snippets = [makeSnippetResult(makeSnippet({ tokenCount: null }))];
const response = formatContextJson(snippets, []);
expect(response.totalTokens).toBe(0);
});
});
// ---------------------------------------------------------------------------
// formatContextTxt
// ---------------------------------------------------------------------------
describe('formatContextTxt', () => {
it('prepends rules section when rules are present', () => {
const rules = ['Always use functional components'];
const txt = formatContextTxt([], rules);
expect(txt).toContain('## Library Rules');
expect(txt).toContain('- Always use functional components');
});
it('omits rules section when no rules', () => {
const snippet = makeSnippet();
const txt = formatContextTxt([makeSnippetResult(snippet)], []);
expect(txt).not.toContain('## Library Rules');
});
it('formats code snippets with fenced code block', () => {
const snippet = makeSnippet({ type: 'code', language: 'tsx' });
const txt = formatContextTxt([makeSnippetResult(snippet)], []);
expect(txt).toContain('```tsx');
expect(txt).toContain('MyComponent');
expect(txt).toContain('```');
});
it('formats info snippets as plain text', () => {
const snippet = makeSnippet({
type: 'info',
content: 'React is a UI library.',
language: null
});
const txt = formatContextTxt([makeSnippetResult(snippet)], []);
expect(txt).toContain('React is a UI library.');
expect(txt).not.toContain('```');
});
it('includes breadcrumb as italic line', () => {
const snippet = makeSnippet({ breadcrumb: 'Getting Started > Components' });
const txt = formatContextTxt([makeSnippetResult(snippet)], []);
expect(txt).toContain('*Getting Started > Components*');
});
it('separates snippets with ---', () => {
const s1 = makeSnippetResult(makeSnippet({ id: 'a' }));
const s2 = makeSnippetResult(makeSnippet({ id: 'b', type: 'info', content: 'hello' }));
const txt = formatContextTxt([s1, s2], []);
expect(txt).toContain('---');
});
it('returns empty string for empty inputs with no rules', () => {
const txt = formatContextTxt([], []);
expect(txt).toBe('');
});
});

View File

@@ -0,0 +1,237 @@
/**
* Response formatters for the context7-compatible REST API.
*
* Provides two output shapes for each endpoint:
* - JSON (`type=json`, default): structured data for programmatic consumers.
* - Text (`type=txt`): plain Markdown formatted for direct LLM injection.
*
* State mapping (TrueRef → context7):
* pending → initial
* indexing → initial
* indexed → finalized
* error → error
*/
import type { Repository, RepositoryVersion, Snippet } from '$lib/types';
import type { LibrarySearchResult } from '$lib/server/search/search.service';
import type { SnippetSearchResult } from '$lib/server/search/search.service';
// ---------------------------------------------------------------------------
// State mapping
// ---------------------------------------------------------------------------
type TrueRefState = 'pending' | 'indexing' | 'indexed' | 'error';
type Context7State = 'initial' | 'finalized' | 'error';
export function mapState(state: TrueRefState): Context7State {
switch (state) {
case 'indexed':
return 'finalized';
case 'error':
return 'error';
default:
return 'initial';
}
}
// ---------------------------------------------------------------------------
// CORS headers
// ---------------------------------------------------------------------------
export const CORS_HEADERS = {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET, POST, PATCH, DELETE, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type, Authorization'
} as const;
// ---------------------------------------------------------------------------
// /api/v1/libs/search — JSON response shape
// ---------------------------------------------------------------------------
export interface LibrarySearchJsonResult {
id: string;
title: string;
description: string | null;
branch: string | null;
lastUpdateDate: string | null;
state: Context7State;
totalTokens: number | null;
totalSnippets: number | null;
stars: number | null;
trustScore: number | null;
benchmarkScore: number | null;
versions: string[];
source: string;
}
export interface LibrarySearchJsonResponse {
results: LibrarySearchJsonResult[];
}
/**
* Convert internal LibrarySearchResult[] to the context7-compatible JSON body.
*/
export function formatLibrarySearchJson(results: LibrarySearchResult[]): LibrarySearchJsonResponse {
return {
results: results.map(({ repository, versions }) =>
formatSingleLibraryJson(repository, versions)
)
};
}
export function formatSingleLibraryJson(
repository: Repository,
versions: RepositoryVersion[]
): LibrarySearchJsonResult {
return {
id: repository.id,
title: repository.title,
description: repository.description ?? null,
branch: repository.branch ?? null,
lastUpdateDate: repository.lastIndexedAt ? repository.lastIndexedAt.toISOString() : null,
state: mapState(repository.state as TrueRefState),
totalTokens: repository.totalTokens ?? null,
totalSnippets: repository.totalSnippets ?? null,
stars: repository.stars ?? null,
trustScore: repository.trustScore ?? null,
benchmarkScore: repository.benchmarkScore ?? null,
versions: versions.map((v) => v.tag),
source: repository.sourceUrl
};
}
// ---------------------------------------------------------------------------
// /api/v1/context — JSON response shapes
// ---------------------------------------------------------------------------
export interface CodeListItem {
language: string;
code: string;
}
export interface CodeSnippetJson {
type: 'code';
title: string | null;
description: string | null;
language: string | null;
codeList: CodeListItem[];
id: string;
tokenCount: number | null;
pageTitle: string | null;
}
export interface InfoSnippetJson {
type: 'info';
text: string;
breadcrumb: string | null;
pageId: string;
tokenCount: number | null;
}
export type SnippetJson = CodeSnippetJson | InfoSnippetJson;
export interface ContextJsonResponse {
snippets: SnippetJson[];
rules: string[];
totalTokens: number;
}
/**
* Convert a ranked list of snippets to the context7-compatible JSON body.
*
* @param snippets - Ranked snippet search results (already token-budget trimmed).
* @param rules - Rules from `trueref.json` / `repository_configs`.
*/
export function formatContextJson(
snippets: SnippetSearchResult[],
rules: string[]
): ContextJsonResponse {
const mapped: SnippetJson[] = snippets.map(({ snippet }) => {
if (snippet.type === 'code') {
const codeSnippet: CodeSnippetJson = {
type: 'code',
title: snippet.title ?? null,
description: snippet.breadcrumb ?? null,
language: snippet.language ?? null,
codeList: [
{
language: snippet.language ?? '',
code: snippet.content
}
],
id: snippet.id,
tokenCount: snippet.tokenCount ?? null,
pageTitle: extractPageTitle(snippet.breadcrumb)
};
return codeSnippet;
} else {
const infoSnippet: InfoSnippetJson = {
type: 'info',
text: snippet.content,
breadcrumb: snippet.breadcrumb ?? null,
pageId: snippet.id,
tokenCount: snippet.tokenCount ?? null
};
return infoSnippet;
}
});
const totalTokens = snippets.reduce((sum, { snippet }) => sum + (snippet.tokenCount ?? 0), 0);
return {
snippets: mapped,
rules,
totalTokens
};
}
/**
* Extract the top-level page title from a breadcrumb string.
* e.g. "Getting Started > Components" → "Getting Started"
*/
function extractPageTitle(breadcrumb: string | null | undefined): string | null {
if (!breadcrumb) return null;
const parts = breadcrumb.split('>');
return parts[0].trim() || null;
}
// ---------------------------------------------------------------------------
// /api/v1/context — txt response
// ---------------------------------------------------------------------------
/**
* Format snippets as plain Markdown text suitable for direct LLM injection.
*
* @param snippets - Ranked snippet search results (already token-budget trimmed).
* @param rules - Rules from `trueref.json` / `repository_configs`.
*/
export function formatContextTxt(snippets: SnippetSearchResult[], rules: string[]): string {
const parts: string[] = [];
if (rules.length > 0) {
parts.push('## Library Rules\n' + rules.map((r) => `- ${r}`).join('\n'));
parts.push('---');
}
for (const { snippet } of snippets) {
const section: string[] = [];
if (snippet.type === 'code') {
if (snippet.title) section.push(`### ${snippet.title}`);
if (snippet.breadcrumb) section.push(`*${snippet.breadcrumb}*`);
section.push(`\`\`\`${snippet.language ?? ''}\n${snippet.content}\n\`\`\``);
} else {
if (snippet.title) section.push(`### ${snippet.title}`);
if (snippet.breadcrumb) section.push(`*${snippet.breadcrumb}*`);
section.push(snippet.content);
}
parts.push(section.filter(Boolean).join('\n'));
parts.push('---');
}
// Remove trailing separator
if (parts.at(-1) === '---') parts.pop();
return parts.join('\n\n');
}

View File

@@ -0,0 +1,48 @@
/**
* Unit tests for parseLibraryId (TRUEREF-0010).
*/
import { describe, it, expect } from 'vitest';
import { parseLibraryId } from './library-id';
describe('parseLibraryId', () => {
it('parses /owner/repo (default branch)', () => {
const result = parseLibraryId('/facebook/react');
expect(result.repositoryId).toBe('/facebook/react');
expect(result.version).toBeUndefined();
});
it('parses /owner/repo/version', () => {
const result = parseLibraryId('/facebook/react/v18.3.0');
expect(result.repositoryId).toBe('/facebook/react');
expect(result.version).toBe('v18.3.0');
});
it('parses /owner/repo/version with dot-separated version', () => {
const result = parseLibraryId('/sveltejs/svelte/4.0.0');
expect(result.repositoryId).toBe('/sveltejs/svelte');
expect(result.version).toBe('4.0.0');
});
it('parses /owner/repo/branch-name as version', () => {
const result = parseLibraryId('/vercel/next.js/canary');
expect(result.repositoryId).toBe('/vercel/next.js');
expect(result.version).toBe('canary');
});
it('throws on missing leading slash', () => {
expect(() => parseLibraryId('facebook/react')).toThrow('Invalid libraryId');
});
it('throws on empty string', () => {
expect(() => parseLibraryId('')).toThrow('Invalid libraryId');
});
it('throws on single-segment path', () => {
expect(() => parseLibraryId('/react')).toThrow('Invalid libraryId');
});
it('throws on path with only a slash', () => {
expect(() => parseLibraryId('/')).toThrow('Invalid libraryId');
});
});

View File

@@ -0,0 +1,32 @@
/**
* Library ID parsing utilities.
*
* Parses the `libraryId` query parameter used by the context7-compatible API.
* Supports two formats:
* - /owner/repo (default branch)
* - /owner/repo/version (specific version tag)
*/
export interface ParsedLibraryId {
/** The canonical repository ID, e.g. "/facebook/react" */
repositoryId: string;
/** The version tag, e.g. "v18.3.0" — absent for default branch queries */
version?: string;
}
/**
* Parse a libraryId string into its constituent parts.
*
* @throws Error when the string does not match the expected pattern.
*/
export function parseLibraryId(libraryId: string): ParsedLibraryId {
const match = libraryId.match(/^(\/[^/]+\/[^/]+)(\/(.+))?$/);
if (!match) {
throw new Error(`Invalid libraryId: ${libraryId}`);
}
return {
repositoryId: match[1],
version: match[3]
};
}

View File

@@ -0,0 +1,75 @@
/**
* Unit tests for selectSnippetsWithinBudget (TRUEREF-0010).
*/
import { describe, it, expect } from 'vitest';
import { selectSnippetsWithinBudget, DEFAULT_TOKEN_BUDGET } from './token-budget';
import type { Snippet } from '$lib/types';
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function makeSnippet(id: string, tokenCount: number | null): Snippet {
return {
id,
documentId: 'doc-1',
repositoryId: '/test/repo',
versionId: null,
type: 'info',
title: null,
content: 'content',
language: null,
breadcrumb: null,
tokenCount,
createdAt: new Date()
};
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
describe('selectSnippetsWithinBudget', () => {
it('returns all snippets when total tokens are within budget', () => {
const snippets = [makeSnippet('a', 100), makeSnippet('b', 200), makeSnippet('c', 300)];
const result = selectSnippetsWithinBudget(snippets, 1000);
expect(result.map((s) => s.id)).toEqual(['a', 'b', 'c']);
});
it('stops adding when next snippet exceeds the budget', () => {
const snippets = [makeSnippet('a', 100), makeSnippet('b', 500), makeSnippet('c', 200)];
// budget = 550 → a (100) + b (500) = 600 exceeds; only a fits then b would push over
const result = selectSnippetsWithinBudget(snippets, 550);
// a(100) fits; a+b=600 > 550, stop
expect(result.map((s) => s.id)).toEqual(['a']);
});
it('includes exactly one snippet when it fits the budget precisely', () => {
const snippets = [makeSnippet('a', 100)];
const result = selectSnippetsWithinBudget(snippets, 100);
expect(result.map((s) => s.id)).toEqual(['a']);
});
it('returns empty array when first snippet already exceeds budget', () => {
const snippets = [makeSnippet('a', 200), makeSnippet('b', 50)];
const result = selectSnippetsWithinBudget(snippets, 100);
expect(result).toHaveLength(0);
});
it('treats null tokenCount as 0', () => {
const snippets = [makeSnippet('a', null), makeSnippet('b', null), makeSnippet('c', null)];
const result = selectSnippetsWithinBudget(snippets, 0);
// 0 + 0 = 0 which does NOT exceed 0, so all three pass
expect(result.map((s) => s.id)).toEqual(['a', 'b', 'c']);
});
it('returns empty array for empty input', () => {
const result = selectSnippetsWithinBudget([], 10_000);
expect(result).toHaveLength(0);
});
it('DEFAULT_TOKEN_BUDGET is 10000', () => {
expect(DEFAULT_TOKEN_BUDGET).toBe(10_000);
});
});

View File

@@ -0,0 +1,36 @@
/**
* Token budget selection for context responses.
*
* Implements a greedy selection algorithm: snippets are added in ranked order
* until adding the next snippet would exceed the token budget.
*/
import type { Snippet } from '$lib/types';
/**
* Select snippets from a ranked list up to a maximum token budget.
*
* Snippets are evaluated in order. A snippet is included when its token count
* does not push the running total past `maxTokens`. The loop halts at the
* first snippet that would exceed the budget.
*
* @param snippets - Ranked list of snippets (best first).
* @param maxTokens - Inclusive upper bound on total token count.
* @returns The largest prefix of `snippets` whose combined token count
* does not exceed `maxTokens`.
*/
export function selectSnippetsWithinBudget(snippets: Snippet[], maxTokens: number): Snippet[] {
const selected: Snippet[] = [];
let usedTokens = 0;
for (const snippet of snippets) {
if (usedTokens + (snippet.tokenCount ?? 0) > maxTokens) break;
selected.push(snippet);
usedTokens += snippet.tokenCount ?? 0;
}
return selected;
}
/** Default token budget when the caller does not specify `tokens`. */
export const DEFAULT_TOKEN_BUDGET = 10_000;