fix(MULTIVERSION-0001): surface pre-parsed config in CrawlResult to fix rules persistence
When trueref.json specifies a `folders` allowlist (e.g. ["src/"]), shouldIndexFile() excludes trueref.json itself because it lives at the repo root. The indexing pipeline then searches crawlResult.files for the config file, finds nothing, and never writes rules to repository_configs. Fix (Option B): add a `config` field to CrawlResult so LocalCrawler returns the pre-parsed config directly. The indexing pipeline now reads crawlResult.config first instead of scanning files[], which resolves the regression for all repos with a folders allowlist. - Add `config?: RepoConfig` to CrawlResult in crawler/types.ts - Return `config` from LocalCrawler.crawlDirectory() - Update IndexingPipeline.crawl() to propagate CrawlResult.config - Update IndexingPipeline.run() to prefer crawlResult.config over files - Add regression tests covering the folders-allowlist exclusion scenario Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -268,6 +268,8 @@ describe('IndexingPipeline', () => {
|
||||
crawlResult: {
|
||||
files: Array<{ path: string; content: string; sha: string; language: string }>;
|
||||
totalFiles: number;
|
||||
/** Optional pre-parsed config — simulates LocalCrawler returning CrawlResult.config. */
|
||||
config?: Record<string, unknown>;
|
||||
} = { files: [], totalFiles: 0 },
|
||||
embeddingService: EmbeddingService | null = null
|
||||
) {
|
||||
@@ -885,4 +887,70 @@ describe('IndexingPipeline', () => {
|
||||
const rules = JSON.parse(versionRow!.rules);
|
||||
expect(rules).toEqual(['This is v2. Use the new Builder API.']);
|
||||
});
|
||||
|
||||
it('persists rules from CrawlResult.config even when trueref.json is absent from files (folders allowlist bug)', async () => {
|
||||
// Regression test for MULTIVERSION-0001:
|
||||
// When trueref.json specifies a `folders` allowlist (e.g. ["src/"]),
|
||||
// shouldIndexFile() excludes trueref.json itself because it lives at the
|
||||
// repo root. The LocalCrawler now carries the pre-parsed config in
|
||||
// CrawlResult.config so the pipeline no longer needs to find the file in
|
||||
// crawlResult.files[].
|
||||
const pipeline = makePipeline({
|
||||
// trueref.json is NOT in files — simulates it being excluded by folders allowlist.
|
||||
files: [
|
||||
{
|
||||
path: 'src/index.ts',
|
||||
content: 'export const x = 1;',
|
||||
sha: 'sha-src',
|
||||
language: 'typescript'
|
||||
}
|
||||
],
|
||||
totalFiles: 1,
|
||||
// The pre-parsed config is carried here instead (set by LocalCrawler).
|
||||
config: { rules: ['Use strict TypeScript.', 'Avoid any.'] }
|
||||
});
|
||||
const job = makeJob();
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
const row = db
|
||||
.prepare(
|
||||
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
|
||||
)
|
||||
.get() as { rules: string } | undefined;
|
||||
|
||||
expect(row).toBeDefined();
|
||||
const rules = JSON.parse(row!.rules);
|
||||
expect(rules).toEqual(['Use strict TypeScript.', 'Avoid any.']);
|
||||
});
|
||||
|
||||
it('persists version-specific rules from CrawlResult.config when trueref.json is excluded by folders allowlist', async () => {
|
||||
const versionId = insertVersion(db, { tag: 'v3.0.0', state: 'pending' });
|
||||
|
||||
const pipeline = makePipeline({
|
||||
files: [
|
||||
{
|
||||
path: 'src/index.ts',
|
||||
content: 'export const x = 1;',
|
||||
sha: 'sha-src',
|
||||
language: 'typescript'
|
||||
}
|
||||
],
|
||||
totalFiles: 1,
|
||||
config: { rules: ['v3: use the streaming API.'] }
|
||||
});
|
||||
const job = makeJob('/test/repo', versionId);
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
const versionRow = db
|
||||
.prepare(
|
||||
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id = ?`
|
||||
)
|
||||
.get(versionId) as { rules: string } | undefined;
|
||||
|
||||
expect(versionRow).toBeDefined();
|
||||
const rules = JSON.parse(versionRow!.rules);
|
||||
expect(rules).toEqual(['v3: use the streaming API.']);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user