Merge branch 'fix/MULTIVERSION-0001-trueref-config-crawl-result'

This commit is contained in:
Giancarmine Salucci
2026-03-29 12:44:47 +02:00
13 changed files with 1158 additions and 53 deletions

View File

@@ -24,7 +24,12 @@ import type { Handle } from '@sveltejs/kit';
try {
initializeDatabase();
} catch (err) {
console.error('[hooks.server] FATAL: database initialisation failed:', err);
process.exit(1);
}
try {
const db = getClient();
const activeProfileRow = db
.prepare<[], EmbeddingProfileEntityProps>(
@@ -46,7 +51,8 @@ try {
console.log('[hooks.server] Indexing pipeline initialised.');
} catch (err) {
console.error(
`[hooks.server] Failed to initialise server: ${err instanceof Error ? err.message : String(err)}`
'[hooks.server] Failed to initialise pipeline:',
err instanceof Error ? err.message : String(err)
);
}

View File

@@ -1,13 +1,14 @@
<script lang="ts">
import type { IndexingJob } from '$lib/types';
let { jobId }: { jobId: string } = $props();
let { jobId, oncomplete }: { jobId: string; oncomplete?: () => void } = $props();
let job = $state<IndexingJob | null>(null);
$effect(() => {
job = null;
let stopped = false;
let completeFired = false;
async function poll() {
if (stopped) return;
@@ -16,6 +17,10 @@
if (res.ok) {
const data = await res.json();
job = data.job;
if (!completeFired && (job?.status === 'done' || job?.status === 'failed')) {
completeFired = true;
oncomplete?.();
}
}
} catch {
// ignore transient errors

View File

@@ -5,7 +5,7 @@ import RepositoryCard from './RepositoryCard.svelte';
describe('RepositoryCard.svelte', () => {
it('encodes slash-bearing repository ids in the details href', async () => {
render(RepositoryCard, {
const { container } = await render(RepositoryCard, {
repo: {
id: '/facebook/react',
title: 'React',
@@ -26,7 +26,8 @@ describe('RepositoryCard.svelte', () => {
.element(page.getByRole('link', { name: 'Details' }))
.toHaveAttribute('href', '/repos/%2Ffacebook%2Freact');
await expect.element(page.getByText('1,200 embeddings')).toBeInTheDocument();
await expect.element(page.getByText('Indexed: main, v18.3.0')).toBeInTheDocument();
const text = container.textContent ?? '';
expect(text).toMatch(/1[,.\u00a0\u202f]?200 embeddings/);
expect(text).toContain('Indexed: main, v18.3.0');
});
});

View File

@@ -413,6 +413,59 @@ describe('LocalCrawler.crawl() — config file detection', () => {
const result = await crawlRoot();
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
it('populates CrawlResult.config with the parsed trueref.json even when folders allowlist excludes the root', async () => {
// Regression test for MULTIVERSION-0001:
// When folders: ["src/"] is set, trueref.json at the root is excluded from
// files[] by shouldIndexFile(). The config must still be returned in
// CrawlResult.config so the indexing pipeline can persist rules.
root = await makeTempRepo({
'trueref.json': JSON.stringify({
folders: ['src/'],
rules: ['Always document public APIs.']
}),
'src/index.ts': 'export {};',
'docs/guide.md': '# Guide'
});
const result = await crawlRoot();
// trueref.json must NOT appear in files (excluded by folders allowlist).
expect(result.files.some((f) => f.path === 'trueref.json')).toBe(false);
// docs/guide.md must NOT appear (outside src/).
expect(result.files.some((f) => f.path === 'docs/guide.md')).toBe(false);
// src/index.ts must appear (inside src/).
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
// CrawlResult.config must carry the parsed config.
expect(result.config).toBeDefined();
expect(result.config?.rules).toEqual(['Always document public APIs.']);
});
it('populates CrawlResult.config with the parsed context7.json', async () => {
root = await makeTempRepo({
'context7.json': JSON.stringify({ rules: ['Rule from context7.'] }),
'src/index.ts': 'export {};'
});
const result = await crawlRoot();
expect(result.config).toBeDefined();
expect(result.config?.rules).toEqual(['Rule from context7.']);
});
it('CrawlResult.config is undefined when no config file is present', async () => {
root = await makeTempRepo({ 'src/index.ts': 'export {};' });
const result = await crawlRoot();
expect(result.config).toBeUndefined();
});
it('CrawlResult.config is undefined when caller supplies config (caller-provided takes precedence, no auto-detect)', async () => {
root = await makeTempRepo({
'trueref.json': JSON.stringify({ rules: ['From file.'] }),
'src/index.ts': 'export {};'
});
// Caller-supplied config prevents auto-detection; CrawlResult.config
// should carry the caller config (not the file content).
const result = await crawlRoot({ config: { rules: ['From caller.'] } });
expect(result.config?.rules).toEqual(['From caller.']);
});
});
// ---------------------------------------------------------------------------

View File

@@ -230,7 +230,11 @@ export class LocalCrawler {
totalFiles: filteredPaths.length,
skippedFiles: allRelPaths.length - filteredPaths.length,
branch,
commitSha
commitSha,
// Surface the pre-parsed config so the indexing pipeline can read rules
// without needing to find trueref.json inside crawledFiles (which fails
// when a `folders` allowlist excludes the repo root).
config: config ?? undefined
};
}

View File

@@ -35,6 +35,13 @@ export interface CrawlResult {
branch: string;
/** HEAD commit SHA */
commitSha: string;
/**
* Pre-parsed trueref.json / context7.json configuration found at the repo
* root during crawling. Carried here so the indexing pipeline can consume it
* directly without having to locate the config file in `files` — which fails
* when a `folders` allowlist excludes the repo root.
*/
config?: RepoConfig;
}
export interface CrawlOptions {

View File

@@ -30,6 +30,7 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
*/
export function initializeDatabase(): void {
const migrationsFolder = join(__dirname, 'migrations');
console.log(`[db] Running migrations from ${migrationsFolder}...`);
migrate(db, { migrationsFolder });
// Apply FTS5 virtual table and trigger DDL (not expressible via Drizzle).

View File

@@ -0,0 +1,835 @@
{
"version": "6",
"dialect": "sqlite",
"id": "a7c2e4f8-3b1d-4e9a-8f0c-6d5e2a1b9c7f",
"prevId": "31531dab-a199-4fc5-a889-1884940039cd",
"tables": {
"documents": {
"name": "documents",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"file_path": {
"name": "file_path",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"language": {
"name": "language",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"token_count": {
"name": "token_count",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"checksum": {
"name": "checksum",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"indexed_at": {
"name": "indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"documents_repository_id_repositories_id_fk": {
"name": "documents_repository_id_repositories_id_fk",
"tableFrom": "documents",
"tableTo": "repositories",
"columnsFrom": ["repository_id"],
"columnsTo": ["id"],
"onDelete": "cascade",
"onUpdate": "no action"
},
"documents_version_id_repository_versions_id_fk": {
"name": "documents_version_id_repository_versions_id_fk",
"tableFrom": "documents",
"tableTo": "repository_versions",
"columnsFrom": ["version_id"],
"columnsTo": ["id"],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"embedding_profiles": {
"name": "embedding_profiles",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"provider_kind": {
"name": "provider_kind",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"enabled": {
"name": "enabled",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": true
},
"is_default": {
"name": "is_default",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": false
},
"model": {
"name": "model",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"dimensions": {
"name": "dimensions",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"config": {
"name": "config",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"indexing_jobs": {
"name": "indexing_jobs",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"status": {
"name": "status",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'queued'"
},
"progress": {
"name": "progress",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"total_files": {
"name": "total_files",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"processed_files": {
"name": "processed_files",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"error": {
"name": "error",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"started_at": {
"name": "started_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"completed_at": {
"name": "completed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"indexing_jobs_repository_id_repositories_id_fk": {
"name": "indexing_jobs_repository_id_repositories_id_fk",
"tableFrom": "indexing_jobs",
"tableTo": "repositories",
"columnsFrom": ["repository_id"],
"columnsTo": ["id"],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repositories": {
"name": "repositories",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"source": {
"name": "source",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"source_url": {
"name": "source_url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"branch": {
"name": "branch",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": "'main'"
},
"state": {
"name": "state",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'pending'"
},
"total_snippets": {
"name": "total_snippets",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"total_tokens": {
"name": "total_tokens",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"trust_score": {
"name": "trust_score",
"type": "real",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"benchmark_score": {
"name": "benchmark_score",
"type": "real",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"stars": {
"name": "stars",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"github_token": {
"name": "github_token",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"last_indexed_at": {
"name": "last_indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repository_configs": {
"name": "repository_configs",
"columns": {
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"project_title": {
"name": "project_title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"folders": {
"name": "folders",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"exclude_folders": {
"name": "exclude_folders",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"exclude_files": {
"name": "exclude_files",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"rules": {
"name": "rules",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"previous_versions": {
"name": "previous_versions",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"uniq_repo_config_base": {
"name": "uniq_repo_config_base",
"columns": ["repository_id"],
"isUnique": true,
"where": "`version_id` IS NULL"
},
"uniq_repo_config_version": {
"name": "uniq_repo_config_version",
"columns": ["repository_id", "version_id"],
"isUnique": true,
"where": "`version_id` IS NOT NULL"
}
},
"foreignKeys": {
"repository_configs_repository_id_repositories_id_fk": {
"name": "repository_configs_repository_id_repositories_id_fk",
"tableFrom": "repository_configs",
"tableTo": "repositories",
"columnsFrom": ["repository_id"],
"columnsTo": ["id"],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repository_versions": {
"name": "repository_versions",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"tag": {
"name": "tag",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"commit_hash": {
"name": "commit_hash",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"state": {
"name": "state",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'pending'"
},
"total_snippets": {
"name": "total_snippets",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"indexed_at": {
"name": "indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"repository_versions_repository_id_repositories_id_fk": {
"name": "repository_versions_repository_id_repositories_id_fk",
"tableFrom": "repository_versions",
"tableTo": "repositories",
"columnsFrom": ["repository_id"],
"columnsTo": ["id"],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"settings": {
"name": "settings",
"columns": {
"key": {
"name": "key",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"value": {
"name": "value",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"snippet_embeddings": {
"name": "snippet_embeddings",
"columns": {
"snippet_id": {
"name": "snippet_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"profile_id": {
"name": "profile_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"model": {
"name": "model",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"dimensions": {
"name": "dimensions",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"embedding": {
"name": "embedding",
"type": "blob",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"snippet_embeddings_snippet_id_snippets_id_fk": {
"name": "snippet_embeddings_snippet_id_snippets_id_fk",
"tableFrom": "snippet_embeddings",
"tableTo": "snippets",
"columnsFrom": ["snippet_id"],
"columnsTo": ["id"],
"onDelete": "cascade",
"onUpdate": "no action"
},
"snippet_embeddings_profile_id_embedding_profiles_id_fk": {
"name": "snippet_embeddings_profile_id_embedding_profiles_id_fk",
"tableFrom": "snippet_embeddings",
"tableTo": "embedding_profiles",
"columnsFrom": ["profile_id"],
"columnsTo": ["id"],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {
"snippet_embeddings_snippet_id_profile_id_pk": {
"columns": ["snippet_id", "profile_id"],
"name": "snippet_embeddings_snippet_id_profile_id_pk"
}
},
"uniqueConstraints": {},
"checkConstraints": {}
},
"snippets": {
"name": "snippets",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"document_id": {
"name": "document_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"type": {
"name": "type",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"content": {
"name": "content",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"language": {
"name": "language",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"breadcrumb": {
"name": "breadcrumb",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"token_count": {
"name": "token_count",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"snippets_document_id_documents_id_fk": {
"name": "snippets_document_id_documents_id_fk",
"tableFrom": "snippets",
"tableTo": "documents",
"columnsFrom": ["document_id"],
"columnsTo": ["id"],
"onDelete": "cascade",
"onUpdate": "no action"
},
"snippets_repository_id_repositories_id_fk": {
"name": "snippets_repository_id_repositories_id_fk",
"tableFrom": "snippets",
"tableTo": "repositories",
"columnsFrom": ["repository_id"],
"columnsTo": ["id"],
"onDelete": "cascade",
"onUpdate": "no action"
},
"snippets_version_id_repository_versions_id_fk": {
"name": "snippets_version_id_repository_versions_id_fk",
"tableFrom": "snippets",
"tableTo": "repository_versions",
"columnsFrom": ["version_id"],
"columnsTo": ["id"],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
}
},
"views": {},
"enums": {},
"schemas": {},
"sequences": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
},
"internal": {
"indexes": {}
}
}

View File

@@ -268,6 +268,8 @@ describe('IndexingPipeline', () => {
crawlResult: {
files: Array<{ path: string; content: string; sha: string; language: string }>;
totalFiles: number;
/** Optional pre-parsed config — simulates LocalCrawler returning CrawlResult.config. */
config?: Record<string, unknown>;
} = { files: [], totalFiles: 0 },
embeddingService: EmbeddingService | null = null
) {
@@ -867,15 +869,17 @@ describe('IndexingPipeline', () => {
await pipeline.run(job as never);
// Repo-wide row (version_id IS NULL) must exist.
// Repo-wide row (version_id IS NULL) must NOT be written by a version job —
// writing it here would contaminate the NULL entry with version-specific rules
// (Bug 5b regression guard).
const repoRow = db
.prepare(
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
)
.get() as { rules: string } | undefined;
expect(repoRow).toBeDefined();
expect(repoRow).toBeUndefined();
// Version-specific row must also exist.
// Version-specific row must exist with the correct rules.
const versionRow = db
.prepare(
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id = ?`
@@ -885,4 +889,133 @@ describe('IndexingPipeline', () => {
const rules = JSON.parse(versionRow!.rules);
expect(rules).toEqual(['This is v2. Use the new Builder API.']);
});
it('regression(Bug5b): version job does not overwrite the repo-wide NULL rules entry', async () => {
// Arrange: index the main branch first to establish a repo-wide rules entry.
const mainBranchRules = ['Always use TypeScript strict mode.'];
const mainPipeline = makePipeline({
files: [
{
path: 'trueref.json',
content: JSON.stringify({ rules: mainBranchRules }),
sha: 'sha-main-config',
language: 'json'
}
],
totalFiles: 1
});
const mainJob = makeJob('/test/repo'); // no versionId → main-branch job
await mainPipeline.run(mainJob as never);
// Confirm the repo-wide entry was written.
const afterMain = db
.prepare(
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
)
.get() as { rules: string } | undefined;
expect(afterMain).toBeDefined();
expect(JSON.parse(afterMain!.rules)).toEqual(mainBranchRules);
// Act: index a version with different rules.
const versionId = insertVersion(db, { tag: 'v3.0.0', state: 'pending' });
const versionRules = ['v3 only: use the streaming API.'];
const versionPipeline = makePipeline({
files: [
{
path: 'trueref.json',
content: JSON.stringify({ rules: versionRules }),
sha: 'sha-v3-config',
language: 'json'
}
],
totalFiles: 1
});
const versionJob = makeJob('/test/repo', versionId);
await versionPipeline.run(versionJob as never);
// Assert: the repo-wide NULL entry must still contain the main-branch rules,
// not the version-specific ones.
const afterVersion = db
.prepare(
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
)
.get() as { rules: string } | undefined;
expect(afterVersion).toBeDefined();
expect(JSON.parse(afterVersion!.rules)).toEqual(mainBranchRules);
// And the version-specific row must contain the version rules.
const versionRow = db
.prepare(
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id = ?`
)
.get(versionId) as { rules: string } | undefined;
expect(versionRow).toBeDefined();
expect(JSON.parse(versionRow!.rules)).toEqual(versionRules);
});
it('persists rules from CrawlResult.config even when trueref.json is absent from files (folders allowlist bug)', async () => {
// Regression test for MULTIVERSION-0001:
// When trueref.json specifies a `folders` allowlist (e.g. ["src/"]),
// shouldIndexFile() excludes trueref.json itself because it lives at the
// repo root. The LocalCrawler now carries the pre-parsed config in
// CrawlResult.config so the pipeline no longer needs to find the file in
// crawlResult.files[].
const pipeline = makePipeline({
// trueref.json is NOT in files — simulates it being excluded by folders allowlist.
files: [
{
path: 'src/index.ts',
content: 'export const x = 1;',
sha: 'sha-src',
language: 'typescript'
}
],
totalFiles: 1,
// The pre-parsed config is carried here instead (set by LocalCrawler).
config: { rules: ['Use strict TypeScript.', 'Avoid any.'] }
});
const job = makeJob();
await pipeline.run(job as never);
const row = db
.prepare(
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
)
.get() as { rules: string } | undefined;
expect(row).toBeDefined();
const rules = JSON.parse(row!.rules);
expect(rules).toEqual(['Use strict TypeScript.', 'Avoid any.']);
});
it('persists version-specific rules from CrawlResult.config when trueref.json is excluded by folders allowlist', async () => {
const versionId = insertVersion(db, { tag: 'v3.0.0', state: 'pending' });
const pipeline = makePipeline({
files: [
{
path: 'src/index.ts',
content: 'export const x = 1;',
sha: 'sha-src',
language: 'typescript'
}
],
totalFiles: 1,
config: { rules: ['v3: use the streaming API.'] }
});
const job = makeJob('/test/repo', versionId);
await pipeline.run(job as never);
const versionRow = db
.prepare(
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id = ?`
)
.get(versionId) as { rules: string } | undefined;
expect(versionRow).toBeDefined();
const rules = JSON.parse(versionRow!.rules);
expect(rules).toEqual(['v3: use the streaming API.']);
});
});

View File

@@ -15,14 +15,14 @@
import { createHash, randomUUID } from 'node:crypto';
import type Database from 'better-sqlite3';
import type { Document, NewDocument, NewSnippet } from '$lib/types';
import type { Document, NewDocument, NewSnippet, TrueRefConfig } from '$lib/types';
import type { crawl as GithubCrawlFn } from '$lib/server/crawler/github.crawler.js';
import type { LocalCrawler } from '$lib/server/crawler/local.crawler.js';
import type { EmbeddingService } from '$lib/server/embeddings/embedding.service.js';
import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
import { IndexingJob } from '$lib/server/models/indexing-job.js';
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
import { resolveConfig } from '$lib/server/config/config-parser.js';
import { resolveConfig, type ParsedConfig } from '$lib/server/config/config-parser.js';
import { parseFile } from '$lib/server/parser/index.js';
import { computeTrustScore } from '$lib/server/search/trust-score.js';
import { computeDiff } from './diff.js';
@@ -101,13 +101,25 @@ export class IndexingPipeline {
: undefined;
const crawlResult = await this.crawl(repo, versionTag);
// Parse trueref.json / context7.json if present in the crawl results.
// Resolve trueref.json / context7.json configuration.
// Prefer the pre-parsed config carried in the CrawlResult (set by
// LocalCrawler so it is available even when a `folders` allowlist
// excludes the repo root and trueref.json never appears in files[]).
// Fall back to locating the file in crawlResult.files for GitHub crawls
// which do not yet populate CrawlResult.config.
let parsedConfig: ReturnType<typeof resolveConfig> | null = null;
if (crawlResult.config) {
// Config was pre-parsed by the crawler — wrap it in a ParsedConfig
// shell so the rest of the pipeline can use it uniformly.
parsedConfig = { config: crawlResult.config, source: 'trueref.json', warnings: [] } satisfies ParsedConfig;
} else {
const configFile = crawlResult.files.find(
(f) => f.path === 'trueref.json' || f.path === 'context7.json'
);
const parsedConfig = configFile
parsedConfig = configFile
? resolveConfig([{ filename: configFile.path, content: configFile.content }])
: null;
}
const excludeFiles: string[] = parsedConfig?.config.excludeFiles ?? [];
// Filter out excluded files before diff computation.
@@ -264,10 +276,13 @@ export class IndexingPipeline {
// ---- Stage 6: Persist rules from config ----------------------------
if (parsedConfig?.config.rules?.length) {
// Repo-wide rules (versionId = null).
if (!normJob.versionId) {
// Main-branch job: write the repo-wide entry only.
this.upsertRepoConfig(repo.id, null, parsedConfig.config.rules);
// Version-specific rules stored separately when indexing a version.
if (normJob.versionId) {
} else {
// Version job: write only the version-specific entry.
// Writing to the NULL row here would overwrite repo-wide rules
// with whatever the last-indexed version happened to carry.
this.upsertRepoConfig(repo.id, normJob.versionId, parsedConfig.config.rules);
}
}
@@ -304,6 +319,8 @@ export class IndexingPipeline {
private async crawl(repo: Repository, ref?: string): Promise<{
files: Array<{ path: string; content: string; sha: string; size: number; language: string }>;
totalFiles: number;
/** Pre-parsed trueref.json / context7.json, or undefined when absent. */
config?: TrueRefConfig;
}> {
if (repo.source === 'github') {
// Parse owner/repo from the canonical ID: "/owner/repo"
@@ -330,7 +347,7 @@ export class IndexingPipeline {
ref: ref ?? (repo.branch !== 'main' ? (repo.branch ?? undefined) : undefined)
});
return { files: result.files, totalFiles: result.totalFiles };
return { files: result.files, totalFiles: result.totalFiles, config: result.config };
}
}

View File

@@ -446,7 +446,11 @@ describe('API contract integration', () => {
const repositoryId = seedRepo(db);
const versionId = seedVersion(db, repositoryId, 'v18.3.0');
const documentId = seedDocument(db, repositoryId, versionId);
seedRules(db, repositoryId, ['Prefer hooks over classes']);
// Insert version-specific rules (versioned queries no longer inherit the NULL row).
db.prepare(
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
VALUES (?, ?, ?, ?)`
).run(repositoryId, versionId, JSON.stringify(['Prefer hooks over classes']), NOW_S);
seedSnippet(db, {
documentId,
repositoryId,
@@ -497,12 +501,12 @@ describe('API contract integration', () => {
});
});
it('GET /api/v1/context returns merged repo-wide and version-specific rules', async () => {
it('GET /api/v1/context returns only version-specific rules for versioned queries (no NULL row contamination)', async () => {
const repositoryId = seedRepo(db);
const versionId = seedVersion(db, repositoryId, 'v2.0.0');
const documentId = seedDocument(db, repositoryId, versionId);
// Insert repo-wide rules (version_id IS NULL).
// Insert repo-wide rules (version_id IS NULL) — these must NOT appear in versioned queries.
db.prepare(
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
VALUES (?, NULL, ?, ?)`
@@ -529,8 +533,8 @@ describe('API contract integration', () => {
expect(response.status).toBe(200);
const body = await response.json();
// Both repo-wide and version-specific rules should appear (deduped).
expect(body.rules).toEqual(['Repo-wide rule', 'Version-specific rule']);
// Only the version-specific rule should appear — NULL row must not contaminate.
expect(body.rules).toEqual(['Version-specific rule']);
});
it('GET /api/v1/context returns only repo-wide rules when no version is requested', async () => {
@@ -556,12 +560,13 @@ describe('API contract integration', () => {
expect(body.rules).toEqual(['Repo-wide rule only']);
});
it('GET /api/v1/context deduplicates rules that appear in both repo-wide and version config', async () => {
it('GET /api/v1/context versioned query returns only the version-specific rules row', async () => {
const repositoryId = seedRepo(db);
const versionId = seedVersion(db, repositoryId, 'v3.0.0');
const documentId = seedDocument(db, repositoryId, versionId);
const sharedRule = 'Use TypeScript strict mode';
// Insert repo-wide NULL row — must NOT bleed into versioned query results.
db.prepare(
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
VALUES (?, NULL, ?, ?)`
@@ -582,10 +587,35 @@ describe('API contract integration', () => {
expect(response.status).toBe(200);
const body = await response.json();
// sharedRule appears once, version-only rule appended.
// Returns only the version-specific row as stored — no NULL row merge.
expect(body.rules).toEqual([sharedRule, 'Version-only rule']);
});
it('GET /api/v1/context versioned query returns empty rules when only NULL row exists (no NULL contamination)', async () => {
const repositoryId = seedRepo(db);
const versionId = seedVersion(db, repositoryId, 'v1.0.0');
const documentId = seedDocument(db, repositoryId, versionId);
// Only a repo-wide NULL row exists — no version-specific config.
db.prepare(
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
VALUES (?, NULL, ?, ?)`
).run(repositoryId, JSON.stringify(['HEAD rules that must not contaminate v1']), NOW_S);
seedSnippet(db, { documentId, repositoryId, versionId, content: 'v1 content' });
const response = await getContext({
url: new URL(
`http://test/api/v1/context?libraryId=${encodeURIComponent(`${repositoryId}/v1.0.0`)}&query=${encodeURIComponent('v1 content')}`
)
} as never);
expect(response.status).toBe(200);
const body = await response.json();
// No version-specific config row → empty rules. NULL row must not bleed in.
expect(body.rules).toEqual([]);
});
it('GET /api/v1/context returns 404 with VERSION_NOT_FOUND when version does not exist', async () => {
const repositoryId = seedRepo(db);

View File

@@ -69,35 +69,25 @@ function getRules(
repositoryId: string,
versionId?: string
): string[] {
// Repo-wide rules (version_id IS NULL).
const repoRow = db
if (!versionId) {
// Unversioned query: return repo-wide (HEAD) rules only.
const row = db
.prepare<
[string],
RawRepoConfig
>(`SELECT rules FROM repository_configs WHERE repository_id = ? AND version_id IS NULL`)
.get(repositoryId);
return parseRulesJson(row?.rules);
}
const repoRules = parseRulesJson(repoRow?.rules);
if (!versionId) return repoRules;
// Version-specific rules.
const versionRow = db
// Versioned query: return only version-specific rules (no NULL row merge).
const row = db
.prepare<
[string, string],
RawRepoConfig
>(`SELECT rules FROM repository_configs WHERE repository_id = ? AND version_id = ?`)
.get(repositoryId, versionId);
const versionRules = parseRulesJson(versionRow?.rules);
// Merge: repo-wide first, then version-specific (deduped by content).
const seen = new Set(repoRules);
const merged = [...repoRules];
for (const r of versionRules) {
if (!seen.has(r)) merged.push(r);
}
return merged;
return parseRulesJson(row?.rules);
}
interface RawRepoState {

View File

@@ -524,6 +524,29 @@
</button>
</div>
</div>
{#if version.totalSnippets > 0 || version.commitHash || version.indexedAt}
{@const metaParts = (
[
version.totalSnippets > 0
? { text: `${version.totalSnippets} snippets`, mono: false }
: null,
version.commitHash
? { text: version.commitHash.slice(0, 8), mono: true }
: null,
version.indexedAt
? { text: formatDate(version.indexedAt), mono: false }
: null
] as Array<{ text: string; mono: boolean } | null>
).filter((p): p is { text: string; mono: boolean } => p !== null)}
<div class="mt-1 flex items-center gap-1.5">
{#each metaParts as part, i (i)}
{#if i > 0}
<span class="text-xs text-gray-300">·</span>
{/if}
<span class="text-xs text-gray-400{part.mono ? ' font-mono' : ''}">{part.text}</span>
{/each}
</div>
{/if}
{#if !!activeVersionJobs[version.tag]}
<IndexingProgress
jobId={activeVersionJobs[version.tag]!}