Merge branch 'fix/MULTIVERSION-0001-trueref-config-crawl-result'
This commit is contained in:
@@ -24,7 +24,12 @@ import type { Handle } from '@sveltejs/kit';
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
initializeDatabase();
|
initializeDatabase();
|
||||||
|
} catch (err) {
|
||||||
|
console.error('[hooks.server] FATAL: database initialisation failed:', err);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
const db = getClient();
|
const db = getClient();
|
||||||
const activeProfileRow = db
|
const activeProfileRow = db
|
||||||
.prepare<[], EmbeddingProfileEntityProps>(
|
.prepare<[], EmbeddingProfileEntityProps>(
|
||||||
@@ -46,7 +51,8 @@ try {
|
|||||||
console.log('[hooks.server] Indexing pipeline initialised.');
|
console.log('[hooks.server] Indexing pipeline initialised.');
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(
|
console.error(
|
||||||
`[hooks.server] Failed to initialise server: ${err instanceof Error ? err.message : String(err)}`
|
'[hooks.server] Failed to initialise pipeline:',
|
||||||
|
err instanceof Error ? err.message : String(err)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,13 +1,14 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import type { IndexingJob } from '$lib/types';
|
import type { IndexingJob } from '$lib/types';
|
||||||
|
|
||||||
let { jobId }: { jobId: string } = $props();
|
let { jobId, oncomplete }: { jobId: string; oncomplete?: () => void } = $props();
|
||||||
|
|
||||||
let job = $state<IndexingJob | null>(null);
|
let job = $state<IndexingJob | null>(null);
|
||||||
|
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
job = null;
|
job = null;
|
||||||
let stopped = false;
|
let stopped = false;
|
||||||
|
let completeFired = false;
|
||||||
|
|
||||||
async function poll() {
|
async function poll() {
|
||||||
if (stopped) return;
|
if (stopped) return;
|
||||||
@@ -16,6 +17,10 @@
|
|||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
job = data.job;
|
job = data.job;
|
||||||
|
if (!completeFired && (job?.status === 'done' || job?.status === 'failed')) {
|
||||||
|
completeFired = true;
|
||||||
|
oncomplete?.();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// ignore transient errors
|
// ignore transient errors
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import RepositoryCard from './RepositoryCard.svelte';
|
|||||||
|
|
||||||
describe('RepositoryCard.svelte', () => {
|
describe('RepositoryCard.svelte', () => {
|
||||||
it('encodes slash-bearing repository ids in the details href', async () => {
|
it('encodes slash-bearing repository ids in the details href', async () => {
|
||||||
render(RepositoryCard, {
|
const { container } = await render(RepositoryCard, {
|
||||||
repo: {
|
repo: {
|
||||||
id: '/facebook/react',
|
id: '/facebook/react',
|
||||||
title: 'React',
|
title: 'React',
|
||||||
@@ -26,7 +26,8 @@ describe('RepositoryCard.svelte', () => {
|
|||||||
.element(page.getByRole('link', { name: 'Details' }))
|
.element(page.getByRole('link', { name: 'Details' }))
|
||||||
.toHaveAttribute('href', '/repos/%2Ffacebook%2Freact');
|
.toHaveAttribute('href', '/repos/%2Ffacebook%2Freact');
|
||||||
|
|
||||||
await expect.element(page.getByText('1,200 embeddings')).toBeInTheDocument();
|
const text = container.textContent ?? '';
|
||||||
await expect.element(page.getByText('Indexed: main, v18.3.0')).toBeInTheDocument();
|
expect(text).toMatch(/1[,.\u00a0\u202f]?200 embeddings/);
|
||||||
|
expect(text).toContain('Indexed: main, v18.3.0');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -413,6 +413,59 @@ describe('LocalCrawler.crawl() — config file detection', () => {
|
|||||||
const result = await crawlRoot();
|
const result = await crawlRoot();
|
||||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('populates CrawlResult.config with the parsed trueref.json even when folders allowlist excludes the root', async () => {
|
||||||
|
// Regression test for MULTIVERSION-0001:
|
||||||
|
// When folders: ["src/"] is set, trueref.json at the root is excluded from
|
||||||
|
// files[] by shouldIndexFile(). The config must still be returned in
|
||||||
|
// CrawlResult.config so the indexing pipeline can persist rules.
|
||||||
|
root = await makeTempRepo({
|
||||||
|
'trueref.json': JSON.stringify({
|
||||||
|
folders: ['src/'],
|
||||||
|
rules: ['Always document public APIs.']
|
||||||
|
}),
|
||||||
|
'src/index.ts': 'export {};',
|
||||||
|
'docs/guide.md': '# Guide'
|
||||||
|
});
|
||||||
|
const result = await crawlRoot();
|
||||||
|
|
||||||
|
// trueref.json must NOT appear in files (excluded by folders allowlist).
|
||||||
|
expect(result.files.some((f) => f.path === 'trueref.json')).toBe(false);
|
||||||
|
// docs/guide.md must NOT appear (outside src/).
|
||||||
|
expect(result.files.some((f) => f.path === 'docs/guide.md')).toBe(false);
|
||||||
|
// src/index.ts must appear (inside src/).
|
||||||
|
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||||
|
// CrawlResult.config must carry the parsed config.
|
||||||
|
expect(result.config).toBeDefined();
|
||||||
|
expect(result.config?.rules).toEqual(['Always document public APIs.']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('populates CrawlResult.config with the parsed context7.json', async () => {
|
||||||
|
root = await makeTempRepo({
|
||||||
|
'context7.json': JSON.stringify({ rules: ['Rule from context7.'] }),
|
||||||
|
'src/index.ts': 'export {};'
|
||||||
|
});
|
||||||
|
const result = await crawlRoot();
|
||||||
|
expect(result.config).toBeDefined();
|
||||||
|
expect(result.config?.rules).toEqual(['Rule from context7.']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('CrawlResult.config is undefined when no config file is present', async () => {
|
||||||
|
root = await makeTempRepo({ 'src/index.ts': 'export {};' });
|
||||||
|
const result = await crawlRoot();
|
||||||
|
expect(result.config).toBeUndefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('CrawlResult.config is undefined when caller supplies config (caller-provided takes precedence, no auto-detect)', async () => {
|
||||||
|
root = await makeTempRepo({
|
||||||
|
'trueref.json': JSON.stringify({ rules: ['From file.'] }),
|
||||||
|
'src/index.ts': 'export {};'
|
||||||
|
});
|
||||||
|
// Caller-supplied config prevents auto-detection; CrawlResult.config
|
||||||
|
// should carry the caller config (not the file content).
|
||||||
|
const result = await crawlRoot({ config: { rules: ['From caller.'] } });
|
||||||
|
expect(result.config?.rules).toEqual(['From caller.']);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -230,7 +230,11 @@ export class LocalCrawler {
|
|||||||
totalFiles: filteredPaths.length,
|
totalFiles: filteredPaths.length,
|
||||||
skippedFiles: allRelPaths.length - filteredPaths.length,
|
skippedFiles: allRelPaths.length - filteredPaths.length,
|
||||||
branch,
|
branch,
|
||||||
commitSha
|
commitSha,
|
||||||
|
// Surface the pre-parsed config so the indexing pipeline can read rules
|
||||||
|
// without needing to find trueref.json inside crawledFiles (which fails
|
||||||
|
// when a `folders` allowlist excludes the repo root).
|
||||||
|
config: config ?? undefined
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -35,6 +35,13 @@ export interface CrawlResult {
|
|||||||
branch: string;
|
branch: string;
|
||||||
/** HEAD commit SHA */
|
/** HEAD commit SHA */
|
||||||
commitSha: string;
|
commitSha: string;
|
||||||
|
/**
|
||||||
|
* Pre-parsed trueref.json / context7.json configuration found at the repo
|
||||||
|
* root during crawling. Carried here so the indexing pipeline can consume it
|
||||||
|
* directly without having to locate the config file in `files` — which fails
|
||||||
|
* when a `folders` allowlist excludes the repo root.
|
||||||
|
*/
|
||||||
|
config?: RepoConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface CrawlOptions {
|
export interface CrawlOptions {
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
|
|||||||
*/
|
*/
|
||||||
export function initializeDatabase(): void {
|
export function initializeDatabase(): void {
|
||||||
const migrationsFolder = join(__dirname, 'migrations');
|
const migrationsFolder = join(__dirname, 'migrations');
|
||||||
|
console.log(`[db] Running migrations from ${migrationsFolder}...`);
|
||||||
migrate(db, { migrationsFolder });
|
migrate(db, { migrationsFolder });
|
||||||
|
|
||||||
// Apply FTS5 virtual table and trigger DDL (not expressible via Drizzle).
|
// Apply FTS5 virtual table and trigger DDL (not expressible via Drizzle).
|
||||||
|
|||||||
835
src/lib/server/db/migrations/meta/0003_snapshot.json
Normal file
835
src/lib/server/db/migrations/meta/0003_snapshot.json
Normal file
@@ -0,0 +1,835 @@
|
|||||||
|
{
|
||||||
|
"version": "6",
|
||||||
|
"dialect": "sqlite",
|
||||||
|
"id": "a7c2e4f8-3b1d-4e9a-8f0c-6d5e2a1b9c7f",
|
||||||
|
"prevId": "31531dab-a199-4fc5-a889-1884940039cd",
|
||||||
|
"tables": {
|
||||||
|
"documents": {
|
||||||
|
"name": "documents",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"repository_id": {
|
||||||
|
"name": "repository_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"version_id": {
|
||||||
|
"name": "version_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"file_path": {
|
||||||
|
"name": "file_path",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"name": "title",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"language": {
|
||||||
|
"name": "language",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"token_count": {
|
||||||
|
"name": "token_count",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": 0
|
||||||
|
},
|
||||||
|
"checksum": {
|
||||||
|
"name": "checksum",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"indexed_at": {
|
||||||
|
"name": "indexed_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {
|
||||||
|
"documents_repository_id_repositories_id_fk": {
|
||||||
|
"name": "documents_repository_id_repositories_id_fk",
|
||||||
|
"tableFrom": "documents",
|
||||||
|
"tableTo": "repositories",
|
||||||
|
"columnsFrom": ["repository_id"],
|
||||||
|
"columnsTo": ["id"],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
},
|
||||||
|
"documents_version_id_repository_versions_id_fk": {
|
||||||
|
"name": "documents_version_id_repository_versions_id_fk",
|
||||||
|
"tableFrom": "documents",
|
||||||
|
"tableTo": "repository_versions",
|
||||||
|
"columnsFrom": ["version_id"],
|
||||||
|
"columnsTo": ["id"],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"embedding_profiles": {
|
||||||
|
"name": "embedding_profiles",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"provider_kind": {
|
||||||
|
"name": "provider_kind",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"name": "title",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"enabled": {
|
||||||
|
"name": "enabled",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": true
|
||||||
|
},
|
||||||
|
"is_default": {
|
||||||
|
"name": "is_default",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": false
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"name": "model",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"dimensions": {
|
||||||
|
"name": "dimensions",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"config": {
|
||||||
|
"name": "config",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"updated_at": {
|
||||||
|
"name": "updated_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"indexing_jobs": {
|
||||||
|
"name": "indexing_jobs",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"repository_id": {
|
||||||
|
"name": "repository_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"version_id": {
|
||||||
|
"name": "version_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"name": "status",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "'queued'"
|
||||||
|
},
|
||||||
|
"progress": {
|
||||||
|
"name": "progress",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": 0
|
||||||
|
},
|
||||||
|
"total_files": {
|
||||||
|
"name": "total_files",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": 0
|
||||||
|
},
|
||||||
|
"processed_files": {
|
||||||
|
"name": "processed_files",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": 0
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"name": "error",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"started_at": {
|
||||||
|
"name": "started_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"completed_at": {
|
||||||
|
"name": "completed_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {
|
||||||
|
"indexing_jobs_repository_id_repositories_id_fk": {
|
||||||
|
"name": "indexing_jobs_repository_id_repositories_id_fk",
|
||||||
|
"tableFrom": "indexing_jobs",
|
||||||
|
"tableTo": "repositories",
|
||||||
|
"columnsFrom": ["repository_id"],
|
||||||
|
"columnsTo": ["id"],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"repositories": {
|
||||||
|
"name": "repositories",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"name": "title",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"name": "description",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"source": {
|
||||||
|
"name": "source",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"source_url": {
|
||||||
|
"name": "source_url",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"branch": {
|
||||||
|
"name": "branch",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "'main'"
|
||||||
|
},
|
||||||
|
"state": {
|
||||||
|
"name": "state",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "'pending'"
|
||||||
|
},
|
||||||
|
"total_snippets": {
|
||||||
|
"name": "total_snippets",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": 0
|
||||||
|
},
|
||||||
|
"total_tokens": {
|
||||||
|
"name": "total_tokens",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": 0
|
||||||
|
},
|
||||||
|
"trust_score": {
|
||||||
|
"name": "trust_score",
|
||||||
|
"type": "real",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": 0
|
||||||
|
},
|
||||||
|
"benchmark_score": {
|
||||||
|
"name": "benchmark_score",
|
||||||
|
"type": "real",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": 0
|
||||||
|
},
|
||||||
|
"stars": {
|
||||||
|
"name": "stars",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"github_token": {
|
||||||
|
"name": "github_token",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"last_indexed_at": {
|
||||||
|
"name": "last_indexed_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"updated_at": {
|
||||||
|
"name": "updated_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"repository_configs": {
|
||||||
|
"name": "repository_configs",
|
||||||
|
"columns": {
|
||||||
|
"repository_id": {
|
||||||
|
"name": "repository_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"version_id": {
|
||||||
|
"name": "version_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"project_title": {
|
||||||
|
"name": "project_title",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"name": "description",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"folders": {
|
||||||
|
"name": "folders",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"exclude_folders": {
|
||||||
|
"name": "exclude_folders",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"exclude_files": {
|
||||||
|
"name": "exclude_files",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"rules": {
|
||||||
|
"name": "rules",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"previous_versions": {
|
||||||
|
"name": "previous_versions",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"updated_at": {
|
||||||
|
"name": "updated_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {
|
||||||
|
"uniq_repo_config_base": {
|
||||||
|
"name": "uniq_repo_config_base",
|
||||||
|
"columns": ["repository_id"],
|
||||||
|
"isUnique": true,
|
||||||
|
"where": "`version_id` IS NULL"
|
||||||
|
},
|
||||||
|
"uniq_repo_config_version": {
|
||||||
|
"name": "uniq_repo_config_version",
|
||||||
|
"columns": ["repository_id", "version_id"],
|
||||||
|
"isUnique": true,
|
||||||
|
"where": "`version_id` IS NOT NULL"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"foreignKeys": {
|
||||||
|
"repository_configs_repository_id_repositories_id_fk": {
|
||||||
|
"name": "repository_configs_repository_id_repositories_id_fk",
|
||||||
|
"tableFrom": "repository_configs",
|
||||||
|
"tableTo": "repositories",
|
||||||
|
"columnsFrom": ["repository_id"],
|
||||||
|
"columnsTo": ["id"],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"repository_versions": {
|
||||||
|
"name": "repository_versions",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"repository_id": {
|
||||||
|
"name": "repository_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"tag": {
|
||||||
|
"name": "tag",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"name": "title",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"commit_hash": {
|
||||||
|
"name": "commit_hash",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"state": {
|
||||||
|
"name": "state",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": "'pending'"
|
||||||
|
},
|
||||||
|
"total_snippets": {
|
||||||
|
"name": "total_snippets",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": 0
|
||||||
|
},
|
||||||
|
"indexed_at": {
|
||||||
|
"name": "indexed_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {
|
||||||
|
"repository_versions_repository_id_repositories_id_fk": {
|
||||||
|
"name": "repository_versions_repository_id_repositories_id_fk",
|
||||||
|
"tableFrom": "repository_versions",
|
||||||
|
"tableTo": "repositories",
|
||||||
|
"columnsFrom": ["repository_id"],
|
||||||
|
"columnsTo": ["id"],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"settings": {
|
||||||
|
"name": "settings",
|
||||||
|
"columns": {
|
||||||
|
"key": {
|
||||||
|
"name": "key",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"value": {
|
||||||
|
"name": "value",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"updated_at": {
|
||||||
|
"name": "updated_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"snippet_embeddings": {
|
||||||
|
"name": "snippet_embeddings",
|
||||||
|
"columns": {
|
||||||
|
"snippet_id": {
|
||||||
|
"name": "snippet_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"profile_id": {
|
||||||
|
"name": "profile_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"name": "model",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"dimensions": {
|
||||||
|
"name": "dimensions",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"embedding": {
|
||||||
|
"name": "embedding",
|
||||||
|
"type": "blob",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {
|
||||||
|
"snippet_embeddings_snippet_id_snippets_id_fk": {
|
||||||
|
"name": "snippet_embeddings_snippet_id_snippets_id_fk",
|
||||||
|
"tableFrom": "snippet_embeddings",
|
||||||
|
"tableTo": "snippets",
|
||||||
|
"columnsFrom": ["snippet_id"],
|
||||||
|
"columnsTo": ["id"],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
},
|
||||||
|
"snippet_embeddings_profile_id_embedding_profiles_id_fk": {
|
||||||
|
"name": "snippet_embeddings_profile_id_embedding_profiles_id_fk",
|
||||||
|
"tableFrom": "snippet_embeddings",
|
||||||
|
"tableTo": "embedding_profiles",
|
||||||
|
"columnsFrom": ["profile_id"],
|
||||||
|
"columnsTo": ["id"],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compositePrimaryKeys": {
|
||||||
|
"snippet_embeddings_snippet_id_profile_id_pk": {
|
||||||
|
"columns": ["snippet_id", "profile_id"],
|
||||||
|
"name": "snippet_embeddings_snippet_id_profile_id_pk"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
},
|
||||||
|
"snippets": {
|
||||||
|
"name": "snippets",
|
||||||
|
"columns": {
|
||||||
|
"id": {
|
||||||
|
"name": "id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": true,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"document_id": {
|
||||||
|
"name": "document_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"repository_id": {
|
||||||
|
"name": "repository_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"version_id": {
|
||||||
|
"name": "version_id",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"name": "type",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"name": "title",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"name": "content",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"language": {
|
||||||
|
"name": "language",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"breadcrumb": {
|
||||||
|
"name": "breadcrumb",
|
||||||
|
"type": "text",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false
|
||||||
|
},
|
||||||
|
"token_count": {
|
||||||
|
"name": "token_count",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": false,
|
||||||
|
"autoincrement": false,
|
||||||
|
"default": 0
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"name": "created_at",
|
||||||
|
"type": "integer",
|
||||||
|
"primaryKey": false,
|
||||||
|
"notNull": true,
|
||||||
|
"autoincrement": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"indexes": {},
|
||||||
|
"foreignKeys": {
|
||||||
|
"snippets_document_id_documents_id_fk": {
|
||||||
|
"name": "snippets_document_id_documents_id_fk",
|
||||||
|
"tableFrom": "snippets",
|
||||||
|
"tableTo": "documents",
|
||||||
|
"columnsFrom": ["document_id"],
|
||||||
|
"columnsTo": ["id"],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
},
|
||||||
|
"snippets_repository_id_repositories_id_fk": {
|
||||||
|
"name": "snippets_repository_id_repositories_id_fk",
|
||||||
|
"tableFrom": "snippets",
|
||||||
|
"tableTo": "repositories",
|
||||||
|
"columnsFrom": ["repository_id"],
|
||||||
|
"columnsTo": ["id"],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
},
|
||||||
|
"snippets_version_id_repository_versions_id_fk": {
|
||||||
|
"name": "snippets_version_id_repository_versions_id_fk",
|
||||||
|
"tableFrom": "snippets",
|
||||||
|
"tableTo": "repository_versions",
|
||||||
|
"columnsFrom": ["version_id"],
|
||||||
|
"columnsTo": ["id"],
|
||||||
|
"onDelete": "cascade",
|
||||||
|
"onUpdate": "no action"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"compositePrimaryKeys": {},
|
||||||
|
"uniqueConstraints": {},
|
||||||
|
"checkConstraints": {}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"views": {},
|
||||||
|
"enums": {},
|
||||||
|
"schemas": {},
|
||||||
|
"sequences": {},
|
||||||
|
"_meta": {
|
||||||
|
"schemas": {},
|
||||||
|
"tables": {},
|
||||||
|
"columns": {}
|
||||||
|
},
|
||||||
|
"internal": {
|
||||||
|
"indexes": {}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -268,6 +268,8 @@ describe('IndexingPipeline', () => {
|
|||||||
crawlResult: {
|
crawlResult: {
|
||||||
files: Array<{ path: string; content: string; sha: string; language: string }>;
|
files: Array<{ path: string; content: string; sha: string; language: string }>;
|
||||||
totalFiles: number;
|
totalFiles: number;
|
||||||
|
/** Optional pre-parsed config — simulates LocalCrawler returning CrawlResult.config. */
|
||||||
|
config?: Record<string, unknown>;
|
||||||
} = { files: [], totalFiles: 0 },
|
} = { files: [], totalFiles: 0 },
|
||||||
embeddingService: EmbeddingService | null = null
|
embeddingService: EmbeddingService | null = null
|
||||||
) {
|
) {
|
||||||
@@ -867,15 +869,17 @@ describe('IndexingPipeline', () => {
|
|||||||
|
|
||||||
await pipeline.run(job as never);
|
await pipeline.run(job as never);
|
||||||
|
|
||||||
// Repo-wide row (version_id IS NULL) must exist.
|
// Repo-wide row (version_id IS NULL) must NOT be written by a version job —
|
||||||
|
// writing it here would contaminate the NULL entry with version-specific rules
|
||||||
|
// (Bug 5b regression guard).
|
||||||
const repoRow = db
|
const repoRow = db
|
||||||
.prepare(
|
.prepare(
|
||||||
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
|
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
|
||||||
)
|
)
|
||||||
.get() as { rules: string } | undefined;
|
.get() as { rules: string } | undefined;
|
||||||
expect(repoRow).toBeDefined();
|
expect(repoRow).toBeUndefined();
|
||||||
|
|
||||||
// Version-specific row must also exist.
|
// Version-specific row must exist with the correct rules.
|
||||||
const versionRow = db
|
const versionRow = db
|
||||||
.prepare(
|
.prepare(
|
||||||
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id = ?`
|
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id = ?`
|
||||||
@@ -885,4 +889,133 @@ describe('IndexingPipeline', () => {
|
|||||||
const rules = JSON.parse(versionRow!.rules);
|
const rules = JSON.parse(versionRow!.rules);
|
||||||
expect(rules).toEqual(['This is v2. Use the new Builder API.']);
|
expect(rules).toEqual(['This is v2. Use the new Builder API.']);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('regression(Bug5b): version job does not overwrite the repo-wide NULL rules entry', async () => {
|
||||||
|
// Arrange: index the main branch first to establish a repo-wide rules entry.
|
||||||
|
const mainBranchRules = ['Always use TypeScript strict mode.'];
|
||||||
|
const mainPipeline = makePipeline({
|
||||||
|
files: [
|
||||||
|
{
|
||||||
|
path: 'trueref.json',
|
||||||
|
content: JSON.stringify({ rules: mainBranchRules }),
|
||||||
|
sha: 'sha-main-config',
|
||||||
|
language: 'json'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
totalFiles: 1
|
||||||
|
});
|
||||||
|
const mainJob = makeJob('/test/repo'); // no versionId → main-branch job
|
||||||
|
await mainPipeline.run(mainJob as never);
|
||||||
|
|
||||||
|
// Confirm the repo-wide entry was written.
|
||||||
|
const afterMain = db
|
||||||
|
.prepare(
|
||||||
|
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
|
||||||
|
)
|
||||||
|
.get() as { rules: string } | undefined;
|
||||||
|
expect(afterMain).toBeDefined();
|
||||||
|
expect(JSON.parse(afterMain!.rules)).toEqual(mainBranchRules);
|
||||||
|
|
||||||
|
// Act: index a version with different rules.
|
||||||
|
const versionId = insertVersion(db, { tag: 'v3.0.0', state: 'pending' });
|
||||||
|
const versionRules = ['v3 only: use the streaming API.'];
|
||||||
|
const versionPipeline = makePipeline({
|
||||||
|
files: [
|
||||||
|
{
|
||||||
|
path: 'trueref.json',
|
||||||
|
content: JSON.stringify({ rules: versionRules }),
|
||||||
|
sha: 'sha-v3-config',
|
||||||
|
language: 'json'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
totalFiles: 1
|
||||||
|
});
|
||||||
|
const versionJob = makeJob('/test/repo', versionId);
|
||||||
|
await versionPipeline.run(versionJob as never);
|
||||||
|
|
||||||
|
// Assert: the repo-wide NULL entry must still contain the main-branch rules,
|
||||||
|
// not the version-specific ones.
|
||||||
|
const afterVersion = db
|
||||||
|
.prepare(
|
||||||
|
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
|
||||||
|
)
|
||||||
|
.get() as { rules: string } | undefined;
|
||||||
|
expect(afterVersion).toBeDefined();
|
||||||
|
expect(JSON.parse(afterVersion!.rules)).toEqual(mainBranchRules);
|
||||||
|
|
||||||
|
// And the version-specific row must contain the version rules.
|
||||||
|
const versionRow = db
|
||||||
|
.prepare(
|
||||||
|
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id = ?`
|
||||||
|
)
|
||||||
|
.get(versionId) as { rules: string } | undefined;
|
||||||
|
expect(versionRow).toBeDefined();
|
||||||
|
expect(JSON.parse(versionRow!.rules)).toEqual(versionRules);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('persists rules from CrawlResult.config even when trueref.json is absent from files (folders allowlist bug)', async () => {
|
||||||
|
// Regression test for MULTIVERSION-0001:
|
||||||
|
// When trueref.json specifies a `folders` allowlist (e.g. ["src/"]),
|
||||||
|
// shouldIndexFile() excludes trueref.json itself because it lives at the
|
||||||
|
// repo root. The LocalCrawler now carries the pre-parsed config in
|
||||||
|
// CrawlResult.config so the pipeline no longer needs to find the file in
|
||||||
|
// crawlResult.files[].
|
||||||
|
const pipeline = makePipeline({
|
||||||
|
// trueref.json is NOT in files — simulates it being excluded by folders allowlist.
|
||||||
|
files: [
|
||||||
|
{
|
||||||
|
path: 'src/index.ts',
|
||||||
|
content: 'export const x = 1;',
|
||||||
|
sha: 'sha-src',
|
||||||
|
language: 'typescript'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
totalFiles: 1,
|
||||||
|
// The pre-parsed config is carried here instead (set by LocalCrawler).
|
||||||
|
config: { rules: ['Use strict TypeScript.', 'Avoid any.'] }
|
||||||
|
});
|
||||||
|
const job = makeJob();
|
||||||
|
|
||||||
|
await pipeline.run(job as never);
|
||||||
|
|
||||||
|
const row = db
|
||||||
|
.prepare(
|
||||||
|
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
|
||||||
|
)
|
||||||
|
.get() as { rules: string } | undefined;
|
||||||
|
|
||||||
|
expect(row).toBeDefined();
|
||||||
|
const rules = JSON.parse(row!.rules);
|
||||||
|
expect(rules).toEqual(['Use strict TypeScript.', 'Avoid any.']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('persists version-specific rules from CrawlResult.config when trueref.json is excluded by folders allowlist', async () => {
|
||||||
|
const versionId = insertVersion(db, { tag: 'v3.0.0', state: 'pending' });
|
||||||
|
|
||||||
|
const pipeline = makePipeline({
|
||||||
|
files: [
|
||||||
|
{
|
||||||
|
path: 'src/index.ts',
|
||||||
|
content: 'export const x = 1;',
|
||||||
|
sha: 'sha-src',
|
||||||
|
language: 'typescript'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
totalFiles: 1,
|
||||||
|
config: { rules: ['v3: use the streaming API.'] }
|
||||||
|
});
|
||||||
|
const job = makeJob('/test/repo', versionId);
|
||||||
|
|
||||||
|
await pipeline.run(job as never);
|
||||||
|
|
||||||
|
const versionRow = db
|
||||||
|
.prepare(
|
||||||
|
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id = ?`
|
||||||
|
)
|
||||||
|
.get(versionId) as { rules: string } | undefined;
|
||||||
|
|
||||||
|
expect(versionRow).toBeDefined();
|
||||||
|
const rules = JSON.parse(versionRow!.rules);
|
||||||
|
expect(rules).toEqual(['v3: use the streaming API.']);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -15,14 +15,14 @@
|
|||||||
|
|
||||||
import { createHash, randomUUID } from 'node:crypto';
|
import { createHash, randomUUID } from 'node:crypto';
|
||||||
import type Database from 'better-sqlite3';
|
import type Database from 'better-sqlite3';
|
||||||
import type { Document, NewDocument, NewSnippet } from '$lib/types';
|
import type { Document, NewDocument, NewSnippet, TrueRefConfig } from '$lib/types';
|
||||||
import type { crawl as GithubCrawlFn } from '$lib/server/crawler/github.crawler.js';
|
import type { crawl as GithubCrawlFn } from '$lib/server/crawler/github.crawler.js';
|
||||||
import type { LocalCrawler } from '$lib/server/crawler/local.crawler.js';
|
import type { LocalCrawler } from '$lib/server/crawler/local.crawler.js';
|
||||||
import type { EmbeddingService } from '$lib/server/embeddings/embedding.service.js';
|
import type { EmbeddingService } from '$lib/server/embeddings/embedding.service.js';
|
||||||
import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
|
import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
|
||||||
import { IndexingJob } from '$lib/server/models/indexing-job.js';
|
import { IndexingJob } from '$lib/server/models/indexing-job.js';
|
||||||
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
|
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
|
||||||
import { resolveConfig } from '$lib/server/config/config-parser.js';
|
import { resolveConfig, type ParsedConfig } from '$lib/server/config/config-parser.js';
|
||||||
import { parseFile } from '$lib/server/parser/index.js';
|
import { parseFile } from '$lib/server/parser/index.js';
|
||||||
import { computeTrustScore } from '$lib/server/search/trust-score.js';
|
import { computeTrustScore } from '$lib/server/search/trust-score.js';
|
||||||
import { computeDiff } from './diff.js';
|
import { computeDiff } from './diff.js';
|
||||||
@@ -101,13 +101,25 @@ export class IndexingPipeline {
|
|||||||
: undefined;
|
: undefined;
|
||||||
const crawlResult = await this.crawl(repo, versionTag);
|
const crawlResult = await this.crawl(repo, versionTag);
|
||||||
|
|
||||||
// Parse trueref.json / context7.json if present in the crawl results.
|
// Resolve trueref.json / context7.json configuration.
|
||||||
|
// Prefer the pre-parsed config carried in the CrawlResult (set by
|
||||||
|
// LocalCrawler so it is available even when a `folders` allowlist
|
||||||
|
// excludes the repo root and trueref.json never appears in files[]).
|
||||||
|
// Fall back to locating the file in crawlResult.files for GitHub crawls
|
||||||
|
// which do not yet populate CrawlResult.config.
|
||||||
|
let parsedConfig: ReturnType<typeof resolveConfig> | null = null;
|
||||||
|
if (crawlResult.config) {
|
||||||
|
// Config was pre-parsed by the crawler — wrap it in a ParsedConfig
|
||||||
|
// shell so the rest of the pipeline can use it uniformly.
|
||||||
|
parsedConfig = { config: crawlResult.config, source: 'trueref.json', warnings: [] } satisfies ParsedConfig;
|
||||||
|
} else {
|
||||||
const configFile = crawlResult.files.find(
|
const configFile = crawlResult.files.find(
|
||||||
(f) => f.path === 'trueref.json' || f.path === 'context7.json'
|
(f) => f.path === 'trueref.json' || f.path === 'context7.json'
|
||||||
);
|
);
|
||||||
const parsedConfig = configFile
|
parsedConfig = configFile
|
||||||
? resolveConfig([{ filename: configFile.path, content: configFile.content }])
|
? resolveConfig([{ filename: configFile.path, content: configFile.content }])
|
||||||
: null;
|
: null;
|
||||||
|
}
|
||||||
const excludeFiles: string[] = parsedConfig?.config.excludeFiles ?? [];
|
const excludeFiles: string[] = parsedConfig?.config.excludeFiles ?? [];
|
||||||
|
|
||||||
// Filter out excluded files before diff computation.
|
// Filter out excluded files before diff computation.
|
||||||
@@ -264,10 +276,13 @@ export class IndexingPipeline {
|
|||||||
|
|
||||||
// ---- Stage 6: Persist rules from config ----------------------------
|
// ---- Stage 6: Persist rules from config ----------------------------
|
||||||
if (parsedConfig?.config.rules?.length) {
|
if (parsedConfig?.config.rules?.length) {
|
||||||
// Repo-wide rules (versionId = null).
|
if (!normJob.versionId) {
|
||||||
|
// Main-branch job: write the repo-wide entry only.
|
||||||
this.upsertRepoConfig(repo.id, null, parsedConfig.config.rules);
|
this.upsertRepoConfig(repo.id, null, parsedConfig.config.rules);
|
||||||
// Version-specific rules stored separately when indexing a version.
|
} else {
|
||||||
if (normJob.versionId) {
|
// Version job: write only the version-specific entry.
|
||||||
|
// Writing to the NULL row here would overwrite repo-wide rules
|
||||||
|
// with whatever the last-indexed version happened to carry.
|
||||||
this.upsertRepoConfig(repo.id, normJob.versionId, parsedConfig.config.rules);
|
this.upsertRepoConfig(repo.id, normJob.versionId, parsedConfig.config.rules);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -304,6 +319,8 @@ export class IndexingPipeline {
|
|||||||
private async crawl(repo: Repository, ref?: string): Promise<{
|
private async crawl(repo: Repository, ref?: string): Promise<{
|
||||||
files: Array<{ path: string; content: string; sha: string; size: number; language: string }>;
|
files: Array<{ path: string; content: string; sha: string; size: number; language: string }>;
|
||||||
totalFiles: number;
|
totalFiles: number;
|
||||||
|
/** Pre-parsed trueref.json / context7.json, or undefined when absent. */
|
||||||
|
config?: TrueRefConfig;
|
||||||
}> {
|
}> {
|
||||||
if (repo.source === 'github') {
|
if (repo.source === 'github') {
|
||||||
// Parse owner/repo from the canonical ID: "/owner/repo"
|
// Parse owner/repo from the canonical ID: "/owner/repo"
|
||||||
@@ -330,7 +347,7 @@ export class IndexingPipeline {
|
|||||||
ref: ref ?? (repo.branch !== 'main' ? (repo.branch ?? undefined) : undefined)
|
ref: ref ?? (repo.branch !== 'main' ? (repo.branch ?? undefined) : undefined)
|
||||||
});
|
});
|
||||||
|
|
||||||
return { files: result.files, totalFiles: result.totalFiles };
|
return { files: result.files, totalFiles: result.totalFiles, config: result.config };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -446,7 +446,11 @@ describe('API contract integration', () => {
|
|||||||
const repositoryId = seedRepo(db);
|
const repositoryId = seedRepo(db);
|
||||||
const versionId = seedVersion(db, repositoryId, 'v18.3.0');
|
const versionId = seedVersion(db, repositoryId, 'v18.3.0');
|
||||||
const documentId = seedDocument(db, repositoryId, versionId);
|
const documentId = seedDocument(db, repositoryId, versionId);
|
||||||
seedRules(db, repositoryId, ['Prefer hooks over classes']);
|
// Insert version-specific rules (versioned queries no longer inherit the NULL row).
|
||||||
|
db.prepare(
|
||||||
|
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?)`
|
||||||
|
).run(repositoryId, versionId, JSON.stringify(['Prefer hooks over classes']), NOW_S);
|
||||||
seedSnippet(db, {
|
seedSnippet(db, {
|
||||||
documentId,
|
documentId,
|
||||||
repositoryId,
|
repositoryId,
|
||||||
@@ -497,12 +501,12 @@ describe('API contract integration', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
it('GET /api/v1/context returns merged repo-wide and version-specific rules', async () => {
|
it('GET /api/v1/context returns only version-specific rules for versioned queries (no NULL row contamination)', async () => {
|
||||||
const repositoryId = seedRepo(db);
|
const repositoryId = seedRepo(db);
|
||||||
const versionId = seedVersion(db, repositoryId, 'v2.0.0');
|
const versionId = seedVersion(db, repositoryId, 'v2.0.0');
|
||||||
const documentId = seedDocument(db, repositoryId, versionId);
|
const documentId = seedDocument(db, repositoryId, versionId);
|
||||||
|
|
||||||
// Insert repo-wide rules (version_id IS NULL).
|
// Insert repo-wide rules (version_id IS NULL) — these must NOT appear in versioned queries.
|
||||||
db.prepare(
|
db.prepare(
|
||||||
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
|
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
|
||||||
VALUES (?, NULL, ?, ?)`
|
VALUES (?, NULL, ?, ?)`
|
||||||
@@ -529,8 +533,8 @@ describe('API contract integration', () => {
|
|||||||
|
|
||||||
expect(response.status).toBe(200);
|
expect(response.status).toBe(200);
|
||||||
const body = await response.json();
|
const body = await response.json();
|
||||||
// Both repo-wide and version-specific rules should appear (deduped).
|
// Only the version-specific rule should appear — NULL row must not contaminate.
|
||||||
expect(body.rules).toEqual(['Repo-wide rule', 'Version-specific rule']);
|
expect(body.rules).toEqual(['Version-specific rule']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('GET /api/v1/context returns only repo-wide rules when no version is requested', async () => {
|
it('GET /api/v1/context returns only repo-wide rules when no version is requested', async () => {
|
||||||
@@ -556,12 +560,13 @@ describe('API contract integration', () => {
|
|||||||
expect(body.rules).toEqual(['Repo-wide rule only']);
|
expect(body.rules).toEqual(['Repo-wide rule only']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('GET /api/v1/context deduplicates rules that appear in both repo-wide and version config', async () => {
|
it('GET /api/v1/context versioned query returns only the version-specific rules row', async () => {
|
||||||
const repositoryId = seedRepo(db);
|
const repositoryId = seedRepo(db);
|
||||||
const versionId = seedVersion(db, repositoryId, 'v3.0.0');
|
const versionId = seedVersion(db, repositoryId, 'v3.0.0');
|
||||||
const documentId = seedDocument(db, repositoryId, versionId);
|
const documentId = seedDocument(db, repositoryId, versionId);
|
||||||
|
|
||||||
const sharedRule = 'Use TypeScript strict mode';
|
const sharedRule = 'Use TypeScript strict mode';
|
||||||
|
// Insert repo-wide NULL row — must NOT bleed into versioned query results.
|
||||||
db.prepare(
|
db.prepare(
|
||||||
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
|
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
|
||||||
VALUES (?, NULL, ?, ?)`
|
VALUES (?, NULL, ?, ?)`
|
||||||
@@ -582,10 +587,35 @@ describe('API contract integration', () => {
|
|||||||
|
|
||||||
expect(response.status).toBe(200);
|
expect(response.status).toBe(200);
|
||||||
const body = await response.json();
|
const body = await response.json();
|
||||||
// sharedRule appears once, version-only rule appended.
|
// Returns only the version-specific row as stored — no NULL row merge.
|
||||||
expect(body.rules).toEqual([sharedRule, 'Version-only rule']);
|
expect(body.rules).toEqual([sharedRule, 'Version-only rule']);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('GET /api/v1/context versioned query returns empty rules when only NULL row exists (no NULL contamination)', async () => {
|
||||||
|
const repositoryId = seedRepo(db);
|
||||||
|
const versionId = seedVersion(db, repositoryId, 'v1.0.0');
|
||||||
|
const documentId = seedDocument(db, repositoryId, versionId);
|
||||||
|
|
||||||
|
// Only a repo-wide NULL row exists — no version-specific config.
|
||||||
|
db.prepare(
|
||||||
|
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
|
||||||
|
VALUES (?, NULL, ?, ?)`
|
||||||
|
).run(repositoryId, JSON.stringify(['HEAD rules that must not contaminate v1']), NOW_S);
|
||||||
|
|
||||||
|
seedSnippet(db, { documentId, repositoryId, versionId, content: 'v1 content' });
|
||||||
|
|
||||||
|
const response = await getContext({
|
||||||
|
url: new URL(
|
||||||
|
`http://test/api/v1/context?libraryId=${encodeURIComponent(`${repositoryId}/v1.0.0`)}&query=${encodeURIComponent('v1 content')}`
|
||||||
|
)
|
||||||
|
} as never);
|
||||||
|
|
||||||
|
expect(response.status).toBe(200);
|
||||||
|
const body = await response.json();
|
||||||
|
// No version-specific config row → empty rules. NULL row must not bleed in.
|
||||||
|
expect(body.rules).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
it('GET /api/v1/context returns 404 with VERSION_NOT_FOUND when version does not exist', async () => {
|
it('GET /api/v1/context returns 404 with VERSION_NOT_FOUND when version does not exist', async () => {
|
||||||
const repositoryId = seedRepo(db);
|
const repositoryId = seedRepo(db);
|
||||||
|
|
||||||
|
|||||||
@@ -69,35 +69,25 @@ function getRules(
|
|||||||
repositoryId: string,
|
repositoryId: string,
|
||||||
versionId?: string
|
versionId?: string
|
||||||
): string[] {
|
): string[] {
|
||||||
// Repo-wide rules (version_id IS NULL).
|
if (!versionId) {
|
||||||
const repoRow = db
|
// Unversioned query: return repo-wide (HEAD) rules only.
|
||||||
|
const row = db
|
||||||
.prepare<
|
.prepare<
|
||||||
[string],
|
[string],
|
||||||
RawRepoConfig
|
RawRepoConfig
|
||||||
>(`SELECT rules FROM repository_configs WHERE repository_id = ? AND version_id IS NULL`)
|
>(`SELECT rules FROM repository_configs WHERE repository_id = ? AND version_id IS NULL`)
|
||||||
.get(repositoryId);
|
.get(repositoryId);
|
||||||
|
return parseRulesJson(row?.rules);
|
||||||
|
}
|
||||||
|
|
||||||
const repoRules = parseRulesJson(repoRow?.rules);
|
// Versioned query: return only version-specific rules (no NULL row merge).
|
||||||
|
const row = db
|
||||||
if (!versionId) return repoRules;
|
|
||||||
|
|
||||||
// Version-specific rules.
|
|
||||||
const versionRow = db
|
|
||||||
.prepare<
|
.prepare<
|
||||||
[string, string],
|
[string, string],
|
||||||
RawRepoConfig
|
RawRepoConfig
|
||||||
>(`SELECT rules FROM repository_configs WHERE repository_id = ? AND version_id = ?`)
|
>(`SELECT rules FROM repository_configs WHERE repository_id = ? AND version_id = ?`)
|
||||||
.get(repositoryId, versionId);
|
.get(repositoryId, versionId);
|
||||||
|
return parseRulesJson(row?.rules);
|
||||||
const versionRules = parseRulesJson(versionRow?.rules);
|
|
||||||
|
|
||||||
// Merge: repo-wide first, then version-specific (deduped by content).
|
|
||||||
const seen = new Set(repoRules);
|
|
||||||
const merged = [...repoRules];
|
|
||||||
for (const r of versionRules) {
|
|
||||||
if (!seen.has(r)) merged.push(r);
|
|
||||||
}
|
|
||||||
return merged;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RawRepoState {
|
interface RawRepoState {
|
||||||
|
|||||||
@@ -524,6 +524,29 @@
|
|||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{#if version.totalSnippets > 0 || version.commitHash || version.indexedAt}
|
||||||
|
{@const metaParts = (
|
||||||
|
[
|
||||||
|
version.totalSnippets > 0
|
||||||
|
? { text: `${version.totalSnippets} snippets`, mono: false }
|
||||||
|
: null,
|
||||||
|
version.commitHash
|
||||||
|
? { text: version.commitHash.slice(0, 8), mono: true }
|
||||||
|
: null,
|
||||||
|
version.indexedAt
|
||||||
|
? { text: formatDate(version.indexedAt), mono: false }
|
||||||
|
: null
|
||||||
|
] as Array<{ text: string; mono: boolean } | null>
|
||||||
|
).filter((p): p is { text: string; mono: boolean } => p !== null)}
|
||||||
|
<div class="mt-1 flex items-center gap-1.5">
|
||||||
|
{#each metaParts as part, i (i)}
|
||||||
|
{#if i > 0}
|
||||||
|
<span class="text-xs text-gray-300">·</span>
|
||||||
|
{/if}
|
||||||
|
<span class="text-xs text-gray-400{part.mono ? ' font-mono' : ''}">{part.text}</span>
|
||||||
|
{/each}
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
{#if !!activeVersionJobs[version.tag]}
|
{#if !!activeVersionJobs[version.tag]}
|
||||||
<IndexingProgress
|
<IndexingProgress
|
||||||
jobId={activeVersionJobs[version.tag]!}
|
jobId={activeVersionJobs[version.tag]!}
|
||||||
|
|||||||
Reference in New Issue
Block a user