feat(crawler): ignore .gitingore files and folders, fallback to common ignored deps
This commit is contained in:
@@ -173,13 +173,154 @@ describe('LocalCrawler.crawl() — default filtering', () => {
|
||||
|
||||
it('reports skippedFiles = total enumerated – filtered', async () => {
|
||||
const result = await crawlRoot();
|
||||
// dist/, node_modules/, .git/, .png = 4 skipped
|
||||
// dist/, node_modules/, .git/ are pruned at walk time — never counted.
|
||||
// Only image.png reaches allRelPaths and is skipped (non-indexable extension).
|
||||
// src/index.ts + README.md = 2 kept
|
||||
expect(result.skippedFiles).toBe(4);
|
||||
expect(result.skippedFiles).toBe(1);
|
||||
expect(result.totalFiles).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// .gitignore support
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('LocalCrawler.crawl() — .gitignore support', () => {
|
||||
afterEach(async () => {
|
||||
await cleanupTempRepo(root);
|
||||
});
|
||||
|
||||
it('excludes files matching a .gitignore pattern', async () => {
|
||||
root = await makeTempRepo({
|
||||
'.gitignore': '*.log\nsecrets.ts',
|
||||
'src/index.ts': 'export {};',
|
||||
'debug.log': 'log data',
|
||||
'secrets.ts': 'const key = "abc";'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path === 'debug.log')).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'secrets.ts')).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('excludes a directory listed in .gitignore', async () => {
|
||||
root = await makeTempRepo({
|
||||
'.gitignore': 'generated/',
|
||||
'src/index.ts': 'export {};',
|
||||
'generated/api.ts': 'auto-generated'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path.startsWith('generated/'))).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('respects negation patterns in .gitignore', async () => {
|
||||
root = await makeTempRepo({
|
||||
'.gitignore': '*.env\n!.env.example',
|
||||
'src/index.ts': 'export {};',
|
||||
'.env': 'SECRET=abc',
|
||||
'.env.example': 'SECRET=changeme'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
// .env files don't have an indexable extension so this tests the gitignore logic
|
||||
// doesn't incorrectly block .env.example from passing through
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('falls back to IGNORED_DIR_NAMES when no .gitignore is present', async () => {
|
||||
root = await makeTempRepo({
|
||||
'src/index.ts': 'export {};',
|
||||
'node_modules/lodash/index.js': 'lodash',
|
||||
'__pycache__/main.cpython-311.pyc': 'bytecode'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.every((f) => !f.path.startsWith('node_modules/'))).toBe(true);
|
||||
expect(result.files.every((f) => !f.path.startsWith('__pycache__/'))).toBe(true);
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('excludes nested node_modules via fallback (no .gitignore)', async () => {
|
||||
root = await makeTempRepo({
|
||||
'src/index.ts': 'export {};',
|
||||
'packages/ui/node_modules/react/index.js': 'react'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.every((f) => !f.path.includes('node_modules'))).toBe(true);
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('still prunes common dependency directories when .gitignore exists', async () => {
|
||||
root = await makeTempRepo({
|
||||
'.gitignore': 'logs/\n*.log',
|
||||
'src/index.ts': 'export {};',
|
||||
'node_modules/lodash/index.js': 'lodash',
|
||||
'packages/ui/node_modules/react/index.js': 'react',
|
||||
'logs/debug.log': 'debug'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.every((f) => !f.path.includes('node_modules'))).toBe(true);
|
||||
expect(result.files.every((f) => !f.path.startsWith('logs/'))).toBe(true);
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Lock file and minified file exclusions
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('LocalCrawler.crawl() — lock file and minified file exclusions', () => {
|
||||
afterEach(async () => {
|
||||
await cleanupTempRepo(root);
|
||||
});
|
||||
|
||||
it('excludes package-lock.json', async () => {
|
||||
root = await makeTempRepo({
|
||||
'src/index.ts': 'export {};',
|
||||
'package-lock.json': '{"lockfileVersion":3}'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path === 'package-lock.json')).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('excludes pnpm-lock.yaml', async () => {
|
||||
root = await makeTempRepo({
|
||||
'src/index.ts': 'export {};',
|
||||
'pnpm-lock.yaml': 'lockfileVersion: 9'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path === 'pnpm-lock.yaml')).toBe(false);
|
||||
});
|
||||
|
||||
it('excludes minified .js files', async () => {
|
||||
root = await makeTempRepo({
|
||||
'src/index.ts': 'export {};',
|
||||
'dist/vendor.min.js': '!function(e,t){}()'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
// dist/ is pruned by default — test via shouldIndexFile logic only if .gitignore present
|
||||
// Use a custom path outside ignored dirs:
|
||||
await fs.rm(root, { recursive: true, force: true });
|
||||
root = await makeTempRepo({
|
||||
'src/index.ts': 'export {};',
|
||||
'public/vendor.min.js': '!function(){}'
|
||||
});
|
||||
const r2 = await crawlRoot();
|
||||
expect(r2.files.some((f) => f.path === 'public/vendor.min.js')).toBe(false);
|
||||
expect(r2.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('excludes .bundle.js files', async () => {
|
||||
root = await makeTempRepo({
|
||||
'src/index.ts': 'export {};',
|
||||
'public/app.bundle.js': 'bundled code'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path === 'public/app.bundle.js')).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Size limit
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user