fix(MULTIVERSION-0001): surface pre-parsed config in CrawlResult to fix rules persistence
When trueref.json specifies a `folders` allowlist (e.g. ["src/"]), shouldIndexFile() excludes trueref.json itself because it lives at the repo root. The indexing pipeline then searches crawlResult.files for the config file, finds nothing, and never writes rules to repository_configs. Fix (Option B): add a `config` field to CrawlResult so LocalCrawler returns the pre-parsed config directly. The indexing pipeline now reads crawlResult.config first instead of scanning files[], which resolves the regression for all repos with a folders allowlist. - Add `config?: RepoConfig` to CrawlResult in crawler/types.ts - Return `config` from LocalCrawler.crawlDirectory() - Update IndexingPipeline.crawl() to propagate CrawlResult.config - Update IndexingPipeline.run() to prefer crawlResult.config over files - Add regression tests covering the folders-allowlist exclusion scenario Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -413,6 +413,59 @@ describe('LocalCrawler.crawl() — config file detection', () => {
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('populates CrawlResult.config with the parsed trueref.json even when folders allowlist excludes the root', async () => {
|
||||
// Regression test for MULTIVERSION-0001:
|
||||
// When folders: ["src/"] is set, trueref.json at the root is excluded from
|
||||
// files[] by shouldIndexFile(). The config must still be returned in
|
||||
// CrawlResult.config so the indexing pipeline can persist rules.
|
||||
root = await makeTempRepo({
|
||||
'trueref.json': JSON.stringify({
|
||||
folders: ['src/'],
|
||||
rules: ['Always document public APIs.']
|
||||
}),
|
||||
'src/index.ts': 'export {};',
|
||||
'docs/guide.md': '# Guide'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
|
||||
// trueref.json must NOT appear in files (excluded by folders allowlist).
|
||||
expect(result.files.some((f) => f.path === 'trueref.json')).toBe(false);
|
||||
// docs/guide.md must NOT appear (outside src/).
|
||||
expect(result.files.some((f) => f.path === 'docs/guide.md')).toBe(false);
|
||||
// src/index.ts must appear (inside src/).
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
// CrawlResult.config must carry the parsed config.
|
||||
expect(result.config).toBeDefined();
|
||||
expect(result.config?.rules).toEqual(['Always document public APIs.']);
|
||||
});
|
||||
|
||||
it('populates CrawlResult.config with the parsed context7.json', async () => {
|
||||
root = await makeTempRepo({
|
||||
'context7.json': JSON.stringify({ rules: ['Rule from context7.'] }),
|
||||
'src/index.ts': 'export {};'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.config).toBeDefined();
|
||||
expect(result.config?.rules).toEqual(['Rule from context7.']);
|
||||
});
|
||||
|
||||
it('CrawlResult.config is undefined when no config file is present', async () => {
|
||||
root = await makeTempRepo({ 'src/index.ts': 'export {};' });
|
||||
const result = await crawlRoot();
|
||||
expect(result.config).toBeUndefined();
|
||||
});
|
||||
|
||||
it('CrawlResult.config is undefined when caller supplies config (caller-provided takes precedence, no auto-detect)', async () => {
|
||||
root = await makeTempRepo({
|
||||
'trueref.json': JSON.stringify({ rules: ['From file.'] }),
|
||||
'src/index.ts': 'export {};'
|
||||
});
|
||||
// Caller-supplied config prevents auto-detection; CrawlResult.config
|
||||
// should carry the caller config (not the file content).
|
||||
const result = await crawlRoot({ config: { rules: ['From caller.'] } });
|
||||
expect(result.config?.rules).toEqual(['From caller.']);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user