Files
trueref/src/lib/server/crawler/local.crawler.test.ts
Giancarmine Salucci cd4ea7112c fix(MULTIVERSION-0001): surface pre-parsed config in CrawlResult to fix rules persistence
When trueref.json specifies a `folders` allowlist (e.g. ["src/"]),
shouldIndexFile() excludes trueref.json itself because it lives at the
repo root. The indexing pipeline then searches crawlResult.files for the
config file, finds nothing, and never writes rules to repository_configs.

Fix (Option B): add a `config` field to CrawlResult so LocalCrawler
returns the pre-parsed config directly. The indexing pipeline now reads
crawlResult.config first instead of scanning files[], which resolves the
regression for all repos with a folders allowlist.

- Add `config?: RepoConfig` to CrawlResult in crawler/types.ts
- Return `config` from LocalCrawler.crawlDirectory()
- Update IndexingPipeline.crawl() to propagate CrawlResult.config
- Update IndexingPipeline.run() to prefer crawlResult.config over files
- Add regression tests covering the folders-allowlist exclusion scenario

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-28 17:27:53 +01:00

750 lines
26 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Unit tests for the local filesystem crawler (TRUEREF-0004).
*
* Each test that needs a filesystem fixture creates a temporary directory via
* `fs.mkdtemp`, writes the required files, runs the crawler, then cleans up
* with `fs.rm` regardless of the test outcome.
*/
import { execFile } from 'node:child_process';
import { createHash } from 'node:crypto';
import { promises as fs } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { promisify } from 'node:util';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { LocalCrawler } from './local.crawler.js';
import type { LocalCrawlOptions } from './local.crawler.js';
import { InvalidRefError, NotAGitRepositoryError } from './types.js';
const execFileAsync = promisify(execFile);
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function sha256(content: string): string {
return createHash('sha256').update(content, 'utf-8').digest('hex');
}
/** Create a temp directory, write a map of relPath → content, return rootPath. */
async function makeTempRepo(files: Record<string, string>): Promise<string> {
const root = await fs.mkdtemp(join(tmpdir(), 'trueref-test-'));
for (const [relPath, content] of Object.entries(files)) {
const absPath = join(root, relPath);
await fs.mkdir(join(absPath, '..'), { recursive: true });
await fs.writeFile(absPath, content, 'utf-8');
}
return root;
}
/** Remove a temporary directory tree created by makeTempRepo. */
async function cleanupTempRepo(root: string): Promise<void> {
await fs.rm(root, { recursive: true, force: true });
}
// ---------------------------------------------------------------------------
// Test state
// ---------------------------------------------------------------------------
let root: string = '';
const crawler = new LocalCrawler();
async function crawlRoot(
opts: Partial<LocalCrawlOptions> = {}
): Promise<ReturnType<LocalCrawler['crawl']>> {
return crawler.crawl({ rootPath: root, ...opts });
}
// ---------------------------------------------------------------------------
// Basic crawl behaviour
// ---------------------------------------------------------------------------
describe('LocalCrawler.crawl() — basic file enumeration', () => {
beforeEach(async () => {
root = await makeTempRepo({
'README.md': '# Hello',
'src/index.ts': 'export const x = 1;',
'src/utils.ts': 'export const y = 2;',
'package.json': '{"name":"test"}'
});
});
afterEach(async () => {
await cleanupTempRepo(root);
});
it('returns all indexable files', async () => {
const result = await crawlRoot();
const paths = result.files.map((f) => f.path).sort();
expect(paths).toEqual(['README.md', 'package.json', 'src/index.ts', 'src/utils.ts'].sort());
});
it('populates content as a UTF-8 string', async () => {
const result = await crawlRoot();
const readme = result.files.find((f) => f.path === 'README.md');
expect(readme?.content).toBe('# Hello');
});
it('sets size equal to Buffer.byteLength of content', async () => {
const result = await crawlRoot();
for (const file of result.files) {
expect(file.size).toBe(Buffer.byteLength(file.content, 'utf-8'));
}
});
it('computes correct SHA-256 per file', async () => {
const result = await crawlRoot();
const readme = result.files.find((f) => f.path === 'README.md');
expect(readme?.sha).toBe(sha256('# Hello'));
});
it('detects language from extension', async () => {
const result = await crawlRoot();
const ts = result.files.find((f) => f.path === 'src/index.ts');
expect(ts?.language).toBe('typescript');
const md = result.files.find((f) => f.path === 'README.md');
expect(md?.language).toBe('markdown');
const json = result.files.find((f) => f.path === 'package.json');
expect(json?.language).toBe('json');
});
it('sets branch to "local"', async () => {
const result = await crawlRoot();
expect(result.branch).toBe('local');
});
it('sets totalFiles to the count of filtered files', async () => {
const result = await crawlRoot();
expect(result.totalFiles).toBe(result.files.length);
});
it('sets commitSha to a non-empty hex string', async () => {
const result = await crawlRoot();
expect(result.commitSha).toMatch(/^[0-9a-f]{64}$/);
});
it('produces a deterministic commitSha for the same file set', async () => {
const r1 = await crawlRoot();
const r2 = await crawlRoot();
expect(r1.commitSha).toBe(r2.commitSha);
});
});
// ---------------------------------------------------------------------------
// Filtering — default excludes and extension allow-list
// ---------------------------------------------------------------------------
describe('LocalCrawler.crawl() — default filtering', () => {
beforeEach(async () => {
root = await makeTempRepo({
'src/index.ts': 'export {};',
'dist/bundle.js': 'bundled',
'node_modules/lodash/index.js': 'lodash',
'.git/config': '[core]',
'image.png': '\x89PNG',
'README.md': '# Docs'
});
});
afterEach(async () => {
await cleanupTempRepo(root);
});
it('excludes files in dist/', async () => {
const result = await crawlRoot();
expect(result.files.every((f) => !f.path.startsWith('dist/'))).toBe(true);
});
it('excludes files in node_modules/', async () => {
const result = await crawlRoot();
expect(result.files.every((f) => !f.path.startsWith('node_modules/'))).toBe(true);
});
it('excludes files in .git/', async () => {
const result = await crawlRoot();
expect(result.files.every((f) => !f.path.startsWith('.git/'))).toBe(true);
});
it('excludes non-indexable extensions like .png', async () => {
const result = await crawlRoot();
expect(result.files.every((f) => !f.path.endsWith('.png'))).toBe(true);
});
it('reports skippedFiles = total enumerated filtered', async () => {
const result = await crawlRoot();
// dist/, node_modules/, .git/ are pruned at walk time — never counted.
// Only image.png reaches allRelPaths and is skipped (non-indexable extension).
// src/index.ts + README.md = 2 kept
expect(result.skippedFiles).toBe(1);
expect(result.totalFiles).toBe(2);
});
});
// ---------------------------------------------------------------------------
// .gitignore support
// ---------------------------------------------------------------------------
describe('LocalCrawler.crawl() — .gitignore support', () => {
afterEach(async () => {
await cleanupTempRepo(root);
});
it('excludes files matching a .gitignore pattern', async () => {
root = await makeTempRepo({
'.gitignore': '*.log\nsecrets.ts',
'src/index.ts': 'export {};',
'debug.log': 'log data',
'secrets.ts': 'const key = "abc";'
});
const result = await crawlRoot();
expect(result.files.some((f) => f.path === 'debug.log')).toBe(false);
expect(result.files.some((f) => f.path === 'secrets.ts')).toBe(false);
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
it('excludes a directory listed in .gitignore', async () => {
root = await makeTempRepo({
'.gitignore': 'generated/',
'src/index.ts': 'export {};',
'generated/api.ts': 'auto-generated'
});
const result = await crawlRoot();
expect(result.files.some((f) => f.path.startsWith('generated/'))).toBe(false);
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
it('respects negation patterns in .gitignore', async () => {
root = await makeTempRepo({
'.gitignore': '*.env\n!.env.example',
'src/index.ts': 'export {};',
'.env': 'SECRET=abc',
'.env.example': 'SECRET=changeme'
});
const result = await crawlRoot();
// .env files don't have an indexable extension so this tests the gitignore logic
// doesn't incorrectly block .env.example from passing through
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
it('falls back to IGNORED_DIR_NAMES when no .gitignore is present', async () => {
root = await makeTempRepo({
'src/index.ts': 'export {};',
'node_modules/lodash/index.js': 'lodash',
'__pycache__/main.cpython-311.pyc': 'bytecode'
});
const result = await crawlRoot();
expect(result.files.every((f) => !f.path.startsWith('node_modules/'))).toBe(true);
expect(result.files.every((f) => !f.path.startsWith('__pycache__/'))).toBe(true);
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
it('excludes nested node_modules via fallback (no .gitignore)', async () => {
root = await makeTempRepo({
'src/index.ts': 'export {};',
'packages/ui/node_modules/react/index.js': 'react'
});
const result = await crawlRoot();
expect(result.files.every((f) => !f.path.includes('node_modules'))).toBe(true);
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
it('still prunes common dependency directories when .gitignore exists', async () => {
root = await makeTempRepo({
'.gitignore': 'logs/\n*.log',
'src/index.ts': 'export {};',
'node_modules/lodash/index.js': 'lodash',
'packages/ui/node_modules/react/index.js': 'react',
'logs/debug.log': 'debug'
});
const result = await crawlRoot();
expect(result.files.every((f) => !f.path.includes('node_modules'))).toBe(true);
expect(result.files.every((f) => !f.path.startsWith('logs/'))).toBe(true);
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
});
// ---------------------------------------------------------------------------
// Lock file and minified file exclusions
// ---------------------------------------------------------------------------
describe('LocalCrawler.crawl() — lock file and minified file exclusions', () => {
afterEach(async () => {
await cleanupTempRepo(root);
});
it('excludes package-lock.json', async () => {
root = await makeTempRepo({
'src/index.ts': 'export {};',
'package-lock.json': '{"lockfileVersion":3}'
});
const result = await crawlRoot();
expect(result.files.some((f) => f.path === 'package-lock.json')).toBe(false);
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
it('excludes pnpm-lock.yaml', async () => {
root = await makeTempRepo({
'src/index.ts': 'export {};',
'pnpm-lock.yaml': 'lockfileVersion: 9'
});
const result = await crawlRoot();
expect(result.files.some((f) => f.path === 'pnpm-lock.yaml')).toBe(false);
});
it('excludes minified .js files', async () => {
root = await makeTempRepo({
'src/index.ts': 'export {};',
'dist/vendor.min.js': '!function(e,t){}()'
});
// dist/ is pruned by default — test via shouldIndexFile logic only if .gitignore present
// Use a custom path outside ignored dirs:
await fs.rm(root, { recursive: true, force: true });
root = await makeTempRepo({
'src/index.ts': 'export {};',
'public/vendor.min.js': '!function(){}'
});
const r2 = await crawlRoot();
expect(r2.files.some((f) => f.path === 'public/vendor.min.js')).toBe(false);
expect(r2.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
it('excludes .bundle.js files', async () => {
root = await makeTempRepo({
'src/index.ts': 'export {};',
'public/app.bundle.js': 'bundled code'
});
const result = await crawlRoot();
expect(result.files.some((f) => f.path === 'public/app.bundle.js')).toBe(false);
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
});
// ---------------------------------------------------------------------------
// Size limit
// ---------------------------------------------------------------------------
describe('LocalCrawler.crawl() — size limit', () => {
afterEach(async () => {
await cleanupTempRepo(root);
});
it('excludes files larger than MAX_FILE_SIZE_BYTES (500 KB)', async () => {
// 500_001 bytes of 'x'
const bigContent = 'x'.repeat(500_001);
root = await makeTempRepo({
'big.ts': bigContent,
'small.ts': 'export const x = 1;'
});
const result = await crawlRoot();
expect(result.files.some((f) => f.path === 'big.ts')).toBe(false);
expect(result.files.some((f) => f.path === 'small.ts')).toBe(true);
});
it('includes files exactly at MAX_FILE_SIZE_BYTES (500 KB)', async () => {
const edgeContent = 'a'.repeat(500_000);
root = await makeTempRepo({ 'edge.ts': edgeContent });
const result = await crawlRoot();
expect(result.files.some((f) => f.path === 'edge.ts')).toBe(true);
});
});
// ---------------------------------------------------------------------------
// trueref.json / context7.json config detection
// ---------------------------------------------------------------------------
describe('LocalCrawler.crawl() — config file detection', () => {
afterEach(async () => {
await cleanupTempRepo(root);
});
it('auto-detects trueref.json and applies excludeFiles', async () => {
root = await makeTempRepo({
'trueref.json': JSON.stringify({ excludeFiles: ['package.json'] }),
'src/index.ts': 'export {};',
'package.json': '{"name":"test"}'
});
const result = await crawlRoot();
expect(result.files.some((f) => f.path === 'package.json')).toBe(false);
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
it('auto-detects context7.json and applies folders allowlist', async () => {
root = await makeTempRepo({
'context7.json': JSON.stringify({ folders: ['docs/'] }),
'src/index.ts': 'export {};',
'docs/guide.md': '# Guide'
});
const result = await crawlRoot();
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(false);
expect(result.files.some((f) => f.path === 'docs/guide.md')).toBe(true);
});
it('caller-supplied config takes precedence over discovered config file', async () => {
root = await makeTempRepo({
'trueref.json': JSON.stringify({ excludeFiles: ['package.json'] }),
'src/index.ts': 'export {};',
'package.json': '{"name":"test"}'
});
// Caller provides a config with no exclusions — package.json should appear.
const result = await crawlRoot({ config: {} });
expect(result.files.some((f) => f.path === 'package.json')).toBe(true);
});
it('applies excludeFolders from config', async () => {
root = await makeTempRepo({
'trueref.json': JSON.stringify({ excludeFolders: ['internal/'] }),
'internal/secret.ts': 'secret',
'src/public.ts': 'public'
});
const result = await crawlRoot();
expect(result.files.some((f) => f.path.startsWith('internal/'))).toBe(false);
expect(result.files.some((f) => f.path === 'src/public.ts')).toBe(true);
});
it('gracefully handles a malformed config file', async () => {
root = await makeTempRepo({
'trueref.json': 'NOT VALID JSON {{{',
'src/index.ts': 'export {};'
});
// Should not throw; falls back to no config.
const result = await crawlRoot();
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
it('populates CrawlResult.config with the parsed trueref.json even when folders allowlist excludes the root', async () => {
// Regression test for MULTIVERSION-0001:
// When folders: ["src/"] is set, trueref.json at the root is excluded from
// files[] by shouldIndexFile(). The config must still be returned in
// CrawlResult.config so the indexing pipeline can persist rules.
root = await makeTempRepo({
'trueref.json': JSON.stringify({
folders: ['src/'],
rules: ['Always document public APIs.']
}),
'src/index.ts': 'export {};',
'docs/guide.md': '# Guide'
});
const result = await crawlRoot();
// trueref.json must NOT appear in files (excluded by folders allowlist).
expect(result.files.some((f) => f.path === 'trueref.json')).toBe(false);
// docs/guide.md must NOT appear (outside src/).
expect(result.files.some((f) => f.path === 'docs/guide.md')).toBe(false);
// src/index.ts must appear (inside src/).
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
// CrawlResult.config must carry the parsed config.
expect(result.config).toBeDefined();
expect(result.config?.rules).toEqual(['Always document public APIs.']);
});
it('populates CrawlResult.config with the parsed context7.json', async () => {
root = await makeTempRepo({
'context7.json': JSON.stringify({ rules: ['Rule from context7.'] }),
'src/index.ts': 'export {};'
});
const result = await crawlRoot();
expect(result.config).toBeDefined();
expect(result.config?.rules).toEqual(['Rule from context7.']);
});
it('CrawlResult.config is undefined when no config file is present', async () => {
root = await makeTempRepo({ 'src/index.ts': 'export {};' });
const result = await crawlRoot();
expect(result.config).toBeUndefined();
});
it('CrawlResult.config is undefined when caller supplies config (caller-provided takes precedence, no auto-detect)', async () => {
root = await makeTempRepo({
'trueref.json': JSON.stringify({ rules: ['From file.'] }),
'src/index.ts': 'export {};'
});
// Caller-supplied config prevents auto-detection; CrawlResult.config
// should carry the caller config (not the file content).
const result = await crawlRoot({ config: { rules: ['From caller.'] } });
expect(result.config?.rules).toEqual(['From caller.']);
});
});
// ---------------------------------------------------------------------------
// Progress callback
// ---------------------------------------------------------------------------
describe('LocalCrawler.crawl() — progress reporting', () => {
beforeEach(async () => {
root = await makeTempRepo({
'src/a.ts': 'a',
'src/b.ts': 'b',
'src/c.ts': 'c'
});
});
afterEach(async () => {
await cleanupTempRepo(root);
});
it('calls onProgress once per filtered file', async () => {
const calls: Array<[number, number]> = [];
await crawlRoot({ onProgress: (p, t) => calls.push([p, t]) });
expect(calls).toHaveLength(3);
});
it('increments processed from 1 to totalFiles', async () => {
const calls: Array<[number, number]> = [];
await crawlRoot({ onProgress: (p, t) => calls.push([p, t]) });
const processed = calls.map(([p]) => p);
expect(processed).toEqual([1, 2, 3]);
});
it('keeps total constant across all callback invocations', async () => {
const totals: number[] = [];
await crawlRoot({ onProgress: (_, t) => totals.push(t) });
expect(totals.every((t) => t === totals[0])).toBe(true);
});
it('does not call onProgress when no files pass the filter', async () => {
// Overwrite root with only non-indexable files.
await fs.rm(root, { recursive: true, force: true });
root = await makeTempRepo({ 'image.png': '\x89PNG' });
const calls: number[] = [];
await crawlRoot({ onProgress: () => calls.push(1) });
expect(calls).toHaveLength(0);
});
});
// ---------------------------------------------------------------------------
// Git ref checkout
// ---------------------------------------------------------------------------
/**
* Create a temp directory that is a valid git repo with one commit per entry
* in `history`. Each entry is a map of relPath → content committed under the
* given tag (if provided). Returns the repo root path.
*
* Layout of `history`:
* [{ tag?: string, files: Record<string, string> }, ...]
*/
async function makeGitRepo(
history: Array<{ tag?: string; files: Record<string, string> }>
): Promise<string> {
const root = await fs.mkdtemp(join(tmpdir(), 'trueref-git-test-'));
async function git(...args: string[]) {
await execFileAsync('git', ['-C', root, ...args]);
}
await git('init', '--initial-branch=main');
await git('config', 'user.email', 'test@trueref.local');
await git('config', 'user.name', 'TrueRef Test');
for (const { tag, files } of history) {
// Write files
for (const [relPath, content] of Object.entries(files)) {
const absPath = join(root, relPath);
await fs.mkdir(join(absPath, '..'), { recursive: true });
await fs.writeFile(absPath, content, 'utf-8');
}
await git('add', '.');
await git('commit', '--allow-empty', '-m', `commit for ${tag ?? 'HEAD'}`);
if (tag) {
await git('tag', tag);
}
}
return root;
}
describe('LocalCrawler.crawl() — git ref checkout', () => {
let root: string = '';
const crawler = new LocalCrawler();
afterEach(async () => {
if (root) await cleanupTempRepo(root);
});
it('crawls files at a specific tag, not the HEAD state', async () => {
root = await makeGitRepo([
{ tag: 'v1.0.0', files: { 'src/index.ts': 'export const version = 1;' } },
{ files: { 'src/index.ts': 'export const version = 2;' } }
]);
const result = await crawler.crawl({ rootPath: root, ref: 'v1.0.0' });
const indexFile = result.files.find((f) => f.path === 'src/index.ts');
expect(indexFile?.content).toBe('export const version = 1;');
});
it('crawls files at a specific commit SHA', async () => {
root = await makeGitRepo([
{ tag: 'v1.0.0', files: { 'api.ts': 'v1' } },
{ files: { 'api.ts': 'v2' } }
]);
// Resolve the SHA of v1.0.0
const { stdout } = await execFileAsync('git', ['-C', root, 'rev-parse', 'v1.0.0'], {
encoding: 'utf-8'
});
const sha = stdout.trim();
const result = await crawler.crawl({ rootPath: root, ref: sha });
const api = result.files.find((f) => f.path === 'api.ts');
expect(api?.content).toBe('v1');
});
it('sets branch to the ref string in the result', async () => {
root = await makeGitRepo([{ tag: 'v2.3.1', files: { 'README.md': '# v2' } }]);
const result = await crawler.crawl({ rootPath: root, ref: 'v2.3.1' });
expect(result.branch).toBe('v2.3.1');
});
it('sets commitSha to the git-resolved SHA (not file-content hash)', async () => {
root = await makeGitRepo([{ tag: 'v1.0.0', files: { 'a.ts': 'a' } }]);
const { stdout } = await execFileAsync('git', ['-C', root, 'rev-parse', 'v1.0.0'], {
encoding: 'utf-8'
});
const expectedSha = stdout.trim();
const result = await crawler.crawl({ rootPath: root, ref: 'v1.0.0' });
expect(result.commitSha).toBe(expectedSha);
});
it('does not modify the working tree', async () => {
root = await makeGitRepo([
{ tag: 'v1.0.0', files: { 'src/index.ts': 'v1' } },
{ files: { 'src/index.ts': 'v2' } }
]);
// Working tree is at HEAD (v2)
const before = await fs.readFile(join(root, 'src/index.ts'), 'utf-8');
await crawler.crawl({ rootPath: root, ref: 'v1.0.0' });
const after = await fs.readFile(join(root, 'src/index.ts'), 'utf-8');
expect(before).toBe('v2');
expect(after).toBe('v2');
});
it('removes the temporary worktree after crawling', async () => {
root = await makeGitRepo([{ tag: 'v1.0.0', files: { 'f.ts': 'x' } }]);
await crawler.crawl({ rootPath: root, ref: 'v1.0.0' });
// List remaining worktrees — only the main one should remain.
const { stdout } = await execFileAsync('git', ['-C', root, 'worktree', 'list', '--porcelain'], {
encoding: 'utf-8'
});
const worktreeCount = stdout.split('\n').filter((l) => l.startsWith('worktree ')).length;
expect(worktreeCount).toBe(1);
});
it('throws NotAGitRepositoryError for a plain directory', async () => {
const plainDir = await fs.mkdtemp(join(tmpdir(), 'trueref-plain-'));
root = plainDir; // cleaned up in afterEach
await expect(crawler.crawl({ rootPath: plainDir, ref: 'v1.0.0' })).rejects.toThrow(
NotAGitRepositoryError
);
});
it('throws InvalidRefError for a ref that does not exist', async () => {
root = await makeGitRepo([{ tag: 'v1.0.0', files: { 'f.ts': 'x' } }]);
await expect(crawler.crawl({ rootPath: root, ref: 'v99.99.99' })).rejects.toThrow(
InvalidRefError
);
});
it('applies caller-supplied config at the checked-out ref', async () => {
root = await makeGitRepo([
{
tag: 'v1.0.0',
files: {
'src/index.ts': 'export {};',
'package.json': '{"name":"test"}'
}
}
]);
// Exclude package.json via caller config
const result = await crawler.crawl({
rootPath: root,
ref: 'v1.0.0',
config: { excludeFiles: ['package.json'] }
});
expect(result.files.some((f) => f.path === 'package.json')).toBe(false);
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
it('reads trueref.json from the checked-out ref', async () => {
root = await makeGitRepo([
{
tag: 'v1.0.0',
files: {
'trueref.json': JSON.stringify({ excludeFiles: ['package.json'] }),
'src/index.ts': 'export {};',
'package.json': '{"name":"test"}'
}
}
]);
const result = await crawler.crawl({ rootPath: root, ref: 'v1.0.0' });
expect(result.files.some((f) => f.path === 'package.json')).toBe(false);
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
});
});
// ---------------------------------------------------------------------------
// Edge cases
// ---------------------------------------------------------------------------
describe('LocalCrawler.crawl() — edge cases', () => {
afterEach(async () => {
await cleanupTempRepo(root);
});
it('returns empty result for an empty directory', async () => {
root = await makeTempRepo({});
const result = await crawlRoot();
expect(result.files).toHaveLength(0);
expect(result.totalFiles).toBe(0);
expect(result.skippedFiles).toBe(0);
});
it('handles deeply nested directory structures', async () => {
root = await makeTempRepo({
'a/b/c/d/deep.ts': 'export const deep = true;'
});
const result = await crawlRoot();
expect(result.files.some((f) => f.path === 'a/b/c/d/deep.ts')).toBe(true);
});
it('handles files with UTF-8 content correctly', async () => {
const utf8Content = 'const greeting = "héllo wörld — 日本語";';
root = await makeTempRepo({ 'src/unicode.ts': utf8Content });
const result = await crawlRoot();
const file = result.files.find((f) => f.path === 'src/unicode.ts');
expect(file?.content).toBe(utf8Content);
expect(file?.sha).toBe(sha256(utf8Content));
});
it('commitSha differs when file content changes', async () => {
root = await makeTempRepo({ 'src/index.ts': 'version 1' });
const r1 = await crawlRoot();
await fs.writeFile(join(root, 'src/index.ts'), 'version 2', 'utf-8');
const r2 = await crawlRoot();
expect(r1.commitSha).not.toBe(r2.commitSha);
});
it('commitSha is empty-string hash when no files are crawled', async () => {
root = await makeTempRepo({ 'image.png': '\x89PNG' });
const result = await crawlRoot();
// SHA-256 of an empty string
expect(result.commitSha).toBe(sha256(''));
});
});