feat(TRUEREF-0003-0004): implement GitHub and local filesystem crawlers
- GitHub crawler with rate limiting, semaphore concurrency, retry logic - File filtering by extension, size, and trueref.json rules - Local filesystem crawler with SHA-256 checksums and progress callbacks - Shared types and file filter logic between both crawlers Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
183
src/lib/server/crawler/file-filter.ts
Normal file
183
src/lib/server/crawler/file-filter.ts
Normal file
@@ -0,0 +1,183 @@
|
||||
/**
|
||||
* File filtering logic for the GitHub crawler (TRUEREF-0003).
|
||||
*
|
||||
* Determines whether a file in the repository tree should be downloaded
|
||||
* and indexed based on its extension, size, and the trueref.json config.
|
||||
*/
|
||||
|
||||
import { extname, basename } from 'node:path';
|
||||
import type { RepoConfig } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** File extensions that the indexer can meaningfully process. */
|
||||
export const INDEXABLE_EXTENSIONS = new Set([
|
||||
// Documentation
|
||||
'.md',
|
||||
'.mdx',
|
||||
'.txt',
|
||||
'.rst',
|
||||
// Code
|
||||
'.ts',
|
||||
'.tsx',
|
||||
'.js',
|
||||
'.jsx',
|
||||
'.py',
|
||||
'.rb',
|
||||
'.go',
|
||||
'.rs',
|
||||
'.java',
|
||||
'.cs',
|
||||
'.cpp',
|
||||
'.c',
|
||||
'.h',
|
||||
'.swift',
|
||||
'.kt',
|
||||
'.php',
|
||||
'.scala',
|
||||
'.clj',
|
||||
'.ex',
|
||||
'.exs',
|
||||
'.sh',
|
||||
'.bash',
|
||||
'.zsh',
|
||||
'.fish',
|
||||
// Config / data
|
||||
'.json',
|
||||
'.yaml',
|
||||
'.yml',
|
||||
'.toml',
|
||||
// Web
|
||||
'.html',
|
||||
'.css',
|
||||
'.svelte',
|
||||
'.vue'
|
||||
]);
|
||||
|
||||
/** Maximum file size we are willing to download (500 KB). */
|
||||
export const MAX_FILE_SIZE_BYTES = 500_000;
|
||||
|
||||
/**
|
||||
* Default path prefixes that are always excluded regardless of config.
|
||||
* These directories contain generated or dependency files that should never
|
||||
* be indexed.
|
||||
*/
|
||||
const DEFAULT_EXCLUDES: string[] = [
|
||||
'node_modules/',
|
||||
'.git/',
|
||||
'dist/',
|
||||
'build/',
|
||||
'coverage/',
|
||||
'.next/',
|
||||
'__pycache__/',
|
||||
'vendor/',
|
||||
'target/',
|
||||
'.cache/'
|
||||
];
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Language detection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const EXTENSION_TO_LANGUAGE: Record<string, string> = {
|
||||
'.ts': 'typescript',
|
||||
'.tsx': 'typescript',
|
||||
'.js': 'javascript',
|
||||
'.jsx': 'javascript',
|
||||
'.py': 'python',
|
||||
'.rb': 'ruby',
|
||||
'.go': 'go',
|
||||
'.rs': 'rust',
|
||||
'.java': 'java',
|
||||
'.cs': 'csharp',
|
||||
'.cpp': 'cpp',
|
||||
'.c': 'c',
|
||||
'.h': 'c',
|
||||
'.swift': 'swift',
|
||||
'.kt': 'kotlin',
|
||||
'.php': 'php',
|
||||
'.scala': 'scala',
|
||||
'.clj': 'clojure',
|
||||
'.ex': 'elixir',
|
||||
'.exs': 'elixir',
|
||||
'.sh': 'shell',
|
||||
'.bash': 'shell',
|
||||
'.zsh': 'shell',
|
||||
'.fish': 'shell',
|
||||
'.json': 'json',
|
||||
'.yaml': 'yaml',
|
||||
'.yml': 'yaml',
|
||||
'.toml': 'toml',
|
||||
'.html': 'html',
|
||||
'.css': 'css',
|
||||
'.svelte': 'svelte',
|
||||
'.vue': 'vue',
|
||||
'.md': 'markdown',
|
||||
'.mdx': 'markdown',
|
||||
'.txt': 'text',
|
||||
'.rst': 'rst'
|
||||
};
|
||||
|
||||
/**
|
||||
* Detect a human-readable language name from a file extension.
|
||||
* Returns an empty string when the extension is unknown.
|
||||
*/
|
||||
export function detectLanguage(filePath: string): string {
|
||||
const ext = extname(filePath).toLowerCase();
|
||||
return EXTENSION_TO_LANGUAGE[ext] ?? '';
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Filter predicate
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Decide whether a file from the repository tree should be downloaded.
|
||||
*
|
||||
* Rules (applied in order):
|
||||
* 1. Must have an indexable extension.
|
||||
* 2. Must not exceed the size limit.
|
||||
* 3. Must not match config.excludeFiles (exact basename match).
|
||||
* 4. Must not be under a config.excludeFolders path / regex.
|
||||
* 5. Must be under a config.folders allowlist path / regex (if specified).
|
||||
* 6. Must not start with a default-excluded prefix.
|
||||
*/
|
||||
export function shouldIndexFile(
|
||||
filePath: string,
|
||||
fileSize: number,
|
||||
config?: RepoConfig
|
||||
): boolean {
|
||||
const ext = extname(filePath).toLowerCase();
|
||||
|
||||
// 1. Extension allow-list
|
||||
if (!INDEXABLE_EXTENSIONS.has(ext)) return false;
|
||||
|
||||
// 2. Size limit
|
||||
if (fileSize > MAX_FILE_SIZE_BYTES) return false;
|
||||
|
||||
// 3. Config excludeFiles — exact basename match
|
||||
if (config?.excludeFiles?.includes(basename(filePath))) return false;
|
||||
|
||||
// 4. Config excludeFolders — prefix or regex match
|
||||
if (
|
||||
config?.excludeFolders?.some(
|
||||
(folder) => filePath.startsWith(folder) || new RegExp(folder).test(filePath)
|
||||
)
|
||||
)
|
||||
return false;
|
||||
|
||||
// 5. Config folders allowlist — if provided, the file must match at least one
|
||||
if (config?.folders?.length) {
|
||||
const inAllowedFolder = config.folders.some(
|
||||
(folder) => filePath.startsWith(folder) || new RegExp(folder).test(filePath)
|
||||
);
|
||||
if (!inAllowedFolder) return false;
|
||||
}
|
||||
|
||||
// 6. Default excludes
|
||||
if (DEFAULT_EXCLUDES.some((ex) => filePath.startsWith(ex))) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
561
src/lib/server/crawler/github.crawler.test.ts
Normal file
561
src/lib/server/crawler/github.crawler.test.ts
Normal file
@@ -0,0 +1,561 @@
|
||||
/**
|
||||
* Unit tests for the GitHub repository crawler (TRUEREF-0003).
|
||||
*
|
||||
* All GitHub API calls are intercepted via vi.stubGlobal('fetch', ...) so
|
||||
* that no real network traffic is produced.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
|
||||
import { crawl } from './github.crawler.js';
|
||||
import { shouldIndexFile, detectLanguage, INDEXABLE_EXTENSIONS, MAX_FILE_SIZE_BYTES } from './file-filter.js';
|
||||
import { GitHubRateLimiter, Semaphore, withRetry } from './rate-limiter.js';
|
||||
import {
|
||||
AuthenticationError,
|
||||
PermissionError,
|
||||
RepositoryNotFoundError
|
||||
} from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock fetch helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type FetchHandler = (url: string, init?: RequestInit) => Response;
|
||||
|
||||
function stubFetch(handler: FetchHandler) {
|
||||
vi.stubGlobal(
|
||||
'fetch',
|
||||
vi.fn((url: string, init?: RequestInit) => Promise.resolve(handler(url, init)))
|
||||
);
|
||||
}
|
||||
|
||||
function jsonResponse(body: unknown, status = 200, headers: Record<string, string> = {}): Response {
|
||||
return new Response(JSON.stringify(body), {
|
||||
status,
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-RateLimit-Remaining': '4999',
|
||||
'X-RateLimit-Reset': String(Math.floor(Date.now() / 1000) + 3600),
|
||||
...headers
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function textResponse(body: string, status = 200, headers: Record<string, string> = {}): Response {
|
||||
return new Response(body, {
|
||||
status,
|
||||
headers: {
|
||||
'Content-Type': 'text/plain',
|
||||
'X-RateLimit-Remaining': '4999',
|
||||
'X-RateLimit-Reset': String(Math.floor(Date.now() / 1000) + 3600),
|
||||
...headers
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fixtures
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const REPO_INFO = {
|
||||
default_branch: 'main',
|
||||
stargazers_count: 42
|
||||
};
|
||||
|
||||
const TREE_RESPONSE = {
|
||||
tree: [
|
||||
{ path: 'README.md', type: 'blob', size: 1024, sha: 'sha-readme', url: '' },
|
||||
{ path: 'src/index.ts', type: 'blob', size: 512, sha: 'sha-index', url: '' },
|
||||
{ path: 'src/utils.ts', type: 'blob', size: 256, sha: 'sha-utils', url: '' },
|
||||
{ path: 'package.json', type: 'blob', size: 128, sha: 'sha-pkg', url: '' },
|
||||
{ path: 'dist/bundle.js', type: 'blob', size: 9999, sha: 'sha-dist', url: '' }, // excluded by default
|
||||
{ path: 'node_modules/lodash/index.js', type: 'blob', size: 100, sha: 'sha-nm', url: '' }, // excluded
|
||||
{ path: 'image.png', type: 'blob', size: 4096, sha: 'sha-img', url: '' }, // non-indexable
|
||||
{ path: 'src', type: 'tree', size: 0, sha: 'sha-src-tree', url: '' }
|
||||
],
|
||||
truncated: false
|
||||
};
|
||||
|
||||
const COMMIT_SHA = 'deadbeef1234567890abcdef';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// shouldIndexFile unit tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('shouldIndexFile()', () => {
|
||||
it('returns true for a .ts file within size limit', () => {
|
||||
expect(shouldIndexFile('src/index.ts', 1000)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for a .png file (non-indexable extension)', () => {
|
||||
expect(shouldIndexFile('assets/logo.png', 100)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false when file exceeds MAX_FILE_SIZE_BYTES', () => {
|
||||
expect(shouldIndexFile('big.ts', MAX_FILE_SIZE_BYTES + 1)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false for a file in node_modules/', () => {
|
||||
expect(shouldIndexFile('node_modules/lodash/index.js', 100)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false for a file in dist/', () => {
|
||||
expect(shouldIndexFile('dist/bundle.js', 100)).toBe(false);
|
||||
});
|
||||
|
||||
it('respects config.excludeFiles (exact basename)', () => {
|
||||
expect(shouldIndexFile('src/secret.ts', 100, { excludeFiles: ['secret.ts'] })).toBe(false);
|
||||
});
|
||||
|
||||
it('does not exclude a file whose basename merely contains the excluded name', () => {
|
||||
expect(shouldIndexFile('src/not-secret.ts', 100, { excludeFiles: ['secret.ts'] })).toBe(true);
|
||||
});
|
||||
|
||||
it('respects config.excludeFolders prefix', () => {
|
||||
expect(shouldIndexFile('internal/config.ts', 100, { excludeFolders: ['internal/'] })).toBe(false);
|
||||
});
|
||||
|
||||
it('allows files outside of config.excludeFolders', () => {
|
||||
expect(shouldIndexFile('public/api.ts', 100, { excludeFolders: ['internal/'] })).toBe(true);
|
||||
});
|
||||
|
||||
it('restricts to config.folders allowlist when specified', () => {
|
||||
const config = { folders: ['docs/'] };
|
||||
expect(shouldIndexFile('src/index.ts', 100, config)).toBe(false);
|
||||
expect(shouldIndexFile('docs/guide.md', 100, config)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true when config.folders is an empty array (no restriction)', () => {
|
||||
expect(shouldIndexFile('src/index.ts', 100, { folders: [] })).toBe(true);
|
||||
});
|
||||
|
||||
it('handles all default-excluded directories', () => {
|
||||
const excluded = [
|
||||
'node_modules/pkg/index.js',
|
||||
'.git/config',
|
||||
'dist/out.js',
|
||||
'build/app.js',
|
||||
'coverage/lcov.info',
|
||||
'.next/server.js',
|
||||
'__pycache__/mod.py',
|
||||
'vendor/lib.go',
|
||||
'target/release.rs',
|
||||
'.cache/file.ts'
|
||||
];
|
||||
for (const path of excluded) {
|
||||
expect(shouldIndexFile(path, 100), `should exclude ${path}`).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it('INDEXABLE_EXTENSIONS covers all expected types', () => {
|
||||
const required = ['.md', '.ts', '.py', '.go', '.rs', '.json', '.svelte'];
|
||||
for (const ext of required) {
|
||||
expect(INDEXABLE_EXTENSIONS.has(ext), `missing extension ${ext}`).toBe(true);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// detectLanguage unit tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('detectLanguage()', () => {
|
||||
it('detects typescript', () => expect(detectLanguage('foo.ts')).toBe('typescript'));
|
||||
it('detects tsx as typescript', () => expect(detectLanguage('foo.tsx')).toBe('typescript'));
|
||||
it('detects javascript', () => expect(detectLanguage('foo.js')).toBe('javascript'));
|
||||
it('detects python', () => expect(detectLanguage('foo.py')).toBe('python'));
|
||||
it('detects go', () => expect(detectLanguage('foo.go')).toBe('go'));
|
||||
it('detects rust', () => expect(detectLanguage('foo.rs')).toBe('rust'));
|
||||
it('detects markdown', () => expect(detectLanguage('README.md')).toBe('markdown'));
|
||||
it('detects svelte', () => expect(detectLanguage('App.svelte')).toBe('svelte'));
|
||||
it('detects yaml', () => expect(detectLanguage('config.yaml')).toBe('yaml'));
|
||||
it('returns empty string for unknown extension', () => expect(detectLanguage('file.xyz')).toBe(''));
|
||||
it('is case-insensitive for extensions', () => expect(detectLanguage('FILE.TS')).toBe('typescript'));
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// GitHubRateLimiter unit tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('GitHubRateLimiter', () => {
|
||||
it('defaults to 5000 remaining requests', () => {
|
||||
const limiter = new GitHubRateLimiter();
|
||||
expect(limiter.remainingRequests).toBe(5000);
|
||||
});
|
||||
|
||||
it('updates remaining and resetAt from headers', () => {
|
||||
const limiter = new GitHubRateLimiter();
|
||||
const resetEpoch = Math.floor(Date.now() / 1000) + 3600;
|
||||
const headers = new Headers({
|
||||
'X-RateLimit-Remaining': '42',
|
||||
'X-RateLimit-Reset': String(resetEpoch)
|
||||
});
|
||||
limiter.updateFromHeaders(headers);
|
||||
expect(limiter.remainingRequests).toBe(42);
|
||||
expect(limiter.resetTimestamp).toBe(resetEpoch * 1000);
|
||||
});
|
||||
|
||||
it('does not mutate state when headers are absent', () => {
|
||||
const limiter = new GitHubRateLimiter();
|
||||
limiter.updateFromHeaders(new Headers());
|
||||
expect(limiter.remainingRequests).toBe(5000);
|
||||
});
|
||||
|
||||
it('waitIfNeeded resolves immediately when remaining > 10', async () => {
|
||||
const limiter = new GitHubRateLimiter();
|
||||
const start = Date.now();
|
||||
await limiter.waitIfNeeded();
|
||||
expect(Date.now() - start).toBeLessThan(100);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Semaphore unit tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Semaphore', () => {
|
||||
it('allows up to concurrency tasks to run simultaneously', async () => {
|
||||
const sem = new Semaphore(2);
|
||||
let active = 0;
|
||||
let maxActive = 0;
|
||||
|
||||
const task = () =>
|
||||
sem.run(async () => {
|
||||
active++;
|
||||
maxActive = Math.max(maxActive, active);
|
||||
await new Promise((r) => setTimeout(r, 10));
|
||||
active--;
|
||||
});
|
||||
|
||||
await Promise.all([task(), task(), task(), task()]);
|
||||
expect(maxActive).toBeLessThanOrEqual(2);
|
||||
});
|
||||
|
||||
it('resolves all tasks even when queued', async () => {
|
||||
const sem = new Semaphore(1);
|
||||
const results: number[] = [];
|
||||
await Promise.all(
|
||||
[1, 2, 3].map((n) =>
|
||||
sem.run(async () => {
|
||||
results.push(n);
|
||||
})
|
||||
)
|
||||
);
|
||||
expect(results).toHaveLength(3);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// withRetry unit tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('withRetry()', () => {
|
||||
it('returns the result on first success', async () => {
|
||||
const result = await withRetry(() => Promise.resolve(42));
|
||||
expect(result).toBe(42);
|
||||
});
|
||||
|
||||
it('retries on failure and returns eventual success', async () => {
|
||||
let calls = 0;
|
||||
const result = await withRetry(async () => {
|
||||
calls++;
|
||||
if (calls < 3) throw new Error('transient');
|
||||
return 'ok';
|
||||
}, 3);
|
||||
expect(result).toBe('ok');
|
||||
expect(calls).toBe(3);
|
||||
});
|
||||
|
||||
it('throws after exhausting all attempts', async () => {
|
||||
await expect(
|
||||
withRetry(() => Promise.reject(new Error('always fails')), 3)
|
||||
).rejects.toThrow('always fails');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// crawl() integration tests (fetch mocked)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('crawl()', () => {
|
||||
beforeEach(() => {
|
||||
vi.useFakeTimers();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
function setupDefaultMocks(overrides: Partial<Record<string, Response>> = {}) {
|
||||
stubFetch((url) => {
|
||||
// Repo info
|
||||
if (url === 'https://api.github.com/repos/owner/repo') {
|
||||
return overrides[url] ?? jsonResponse(REPO_INFO);
|
||||
}
|
||||
// Commit SHA
|
||||
if (url === 'https://api.github.com/repos/owner/repo/commits/main') {
|
||||
return overrides[url] ?? textResponse(COMMIT_SHA);
|
||||
}
|
||||
// File tree
|
||||
if (url.startsWith('https://api.github.com/repos/owner/repo/git/trees/main')) {
|
||||
return overrides[url] ?? jsonResponse(TREE_RESPONSE);
|
||||
}
|
||||
// Raw content (raw.githubusercontent.com)
|
||||
if (url.startsWith('https://raw.githubusercontent.com/')) {
|
||||
const filePath = url.split('/').slice(6).join('/');
|
||||
return overrides[url] ?? textResponse(`// content of ${filePath}`);
|
||||
}
|
||||
return new Response('not found', { status: 404 });
|
||||
});
|
||||
}
|
||||
|
||||
it('returns files that pass the filter', async () => {
|
||||
setupDefaultMocks();
|
||||
|
||||
const result = await crawl({ owner: 'owner', repo: 'repo' });
|
||||
|
||||
// dist/ and node_modules/ should be excluded; .png should be excluded.
|
||||
// Expected: README.md, src/index.ts, src/utils.ts, package.json
|
||||
expect(result.files.length).toBeGreaterThanOrEqual(4);
|
||||
expect(result.files.every((f) => !f.path.startsWith('dist/'))).toBe(true);
|
||||
expect(result.files.every((f) => !f.path.startsWith('node_modules/'))).toBe(true);
|
||||
expect(result.files.every((f) => !f.path.endsWith('.png'))).toBe(true);
|
||||
});
|
||||
|
||||
it('sets branch to the default_branch when no ref is given', async () => {
|
||||
setupDefaultMocks();
|
||||
const result = await crawl({ owner: 'owner', repo: 'repo' });
|
||||
expect(result.branch).toBe('main');
|
||||
});
|
||||
|
||||
it('uses the provided ref when specified', async () => {
|
||||
stubFetch((url) => {
|
||||
if (url === 'https://api.github.com/repos/owner/repo') {
|
||||
return jsonResponse(REPO_INFO);
|
||||
}
|
||||
if (url.includes('/git/trees/v2.0.0')) {
|
||||
return jsonResponse({ tree: [], truncated: false });
|
||||
}
|
||||
if (url.includes('/commits/v2.0.0')) {
|
||||
return textResponse('tagsha');
|
||||
}
|
||||
return textResponse('content');
|
||||
});
|
||||
|
||||
const result = await crawl({ owner: 'owner', repo: 'repo', ref: 'v2.0.0' });
|
||||
expect(result.branch).toBe('v2.0.0');
|
||||
});
|
||||
|
||||
it('populates commitSha from the commits endpoint', async () => {
|
||||
setupDefaultMocks();
|
||||
const result = await crawl({ owner: 'owner', repo: 'repo' });
|
||||
expect(result.commitSha).toBe(COMMIT_SHA);
|
||||
});
|
||||
|
||||
it('sets correct sha on each CrawledFile from the tree', async () => {
|
||||
setupDefaultMocks();
|
||||
const result = await crawl({ owner: 'owner', repo: 'repo' });
|
||||
const readme = result.files.find((f) => f.path === 'README.md');
|
||||
expect(readme).toBeDefined();
|
||||
expect(readme!.sha).toBe('sha-readme');
|
||||
});
|
||||
|
||||
it('attaches language to each CrawledFile', async () => {
|
||||
setupDefaultMocks();
|
||||
const result = await crawl({ owner: 'owner', repo: 'repo' });
|
||||
const indexTs = result.files.find((f) => f.path === 'src/index.ts');
|
||||
expect(indexTs?.language).toBe('typescript');
|
||||
const readme = result.files.find((f) => f.path === 'README.md');
|
||||
expect(readme?.language).toBe('markdown');
|
||||
});
|
||||
|
||||
it('reports progress via onProgress callback', async () => {
|
||||
setupDefaultMocks();
|
||||
const calls: Array<[number, number]> = [];
|
||||
await crawl({
|
||||
owner: 'owner',
|
||||
repo: 'repo',
|
||||
onProgress: (p, t) => calls.push([p, t])
|
||||
});
|
||||
expect(calls.length).toBeGreaterThan(0);
|
||||
// Total must remain constant across all calls.
|
||||
const totals = calls.map(([, t]) => t);
|
||||
expect(totals.every((t) => t === totals[0])).toBe(true);
|
||||
});
|
||||
|
||||
it('skips files that fail to download without throwing', async () => {
|
||||
stubFetch((url) => {
|
||||
if (url === 'https://api.github.com/repos/owner/repo') {
|
||||
return jsonResponse(REPO_INFO);
|
||||
}
|
||||
if (url.includes('/git/trees/main')) {
|
||||
return jsonResponse({
|
||||
tree: [{ path: 'src/index.ts', type: 'blob', size: 100, sha: 'sha1', url: '' }],
|
||||
truncated: false
|
||||
});
|
||||
}
|
||||
if (url.includes('/commits/main')) {
|
||||
return textResponse(COMMIT_SHA);
|
||||
}
|
||||
// All content downloads fail.
|
||||
return new Response('error', { status: 500 });
|
||||
});
|
||||
|
||||
// Should not throw; just return zero files.
|
||||
const result = await crawl({ owner: 'owner', repo: 'repo' });
|
||||
expect(result.files).toHaveLength(0);
|
||||
expect(result.totalFiles).toBe(1);
|
||||
});
|
||||
|
||||
it('throws RepositoryNotFoundError on 404', async () => {
|
||||
stubFetch((url) => {
|
||||
if (url === 'https://api.github.com/repos/owner/missing') {
|
||||
return jsonResponse({ message: 'Not Found' }, 404, {
|
||||
'X-RateLimit-Remaining': '4999',
|
||||
'X-RateLimit-Reset': String(Math.floor(Date.now() / 1000) + 3600)
|
||||
});
|
||||
}
|
||||
return new Response('not found', { status: 404 });
|
||||
});
|
||||
|
||||
await expect(crawl({ owner: 'owner', repo: 'missing' })).rejects.toThrow(
|
||||
RepositoryNotFoundError
|
||||
);
|
||||
});
|
||||
|
||||
it('throws AuthenticationError on 401', async () => {
|
||||
stubFetch(() =>
|
||||
new Response('Unauthorized', {
|
||||
status: 401,
|
||||
headers: {
|
||||
'X-RateLimit-Remaining': '0',
|
||||
'X-RateLimit-Reset': String(Math.floor(Date.now() / 1000) + 3600)
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
await expect(crawl({ owner: 'owner', repo: 'repo', token: 'bad-token' })).rejects.toThrow(
|
||||
AuthenticationError
|
||||
);
|
||||
});
|
||||
|
||||
it('throws PermissionError on 403 without rate-limit exhaustion', async () => {
|
||||
stubFetch(() =>
|
||||
new Response('Forbidden', {
|
||||
status: 403,
|
||||
headers: {
|
||||
'X-RateLimit-Remaining': '100',
|
||||
'X-RateLimit-Reset': String(Math.floor(Date.now() / 1000) + 3600)
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
await expect(crawl({ owner: 'owner', repo: 'repo' })).rejects.toThrow(PermissionError);
|
||||
});
|
||||
|
||||
it('respects config.folders allowlist when provided', async () => {
|
||||
setupDefaultMocks();
|
||||
const result = await crawl({
|
||||
owner: 'owner',
|
||||
repo: 'repo',
|
||||
config: { folders: ['src/'] }
|
||||
});
|
||||
// Only src/ files should be present.
|
||||
expect(result.files.every((f) => f.path.startsWith('src/'))).toBe(true);
|
||||
});
|
||||
|
||||
it('applies config.excludeFiles filter', async () => {
|
||||
setupDefaultMocks();
|
||||
const result = await crawl({
|
||||
owner: 'owner',
|
||||
repo: 'repo',
|
||||
config: { excludeFiles: ['package.json'] }
|
||||
});
|
||||
expect(result.files.some((f) => f.path === 'package.json')).toBe(false);
|
||||
});
|
||||
|
||||
it('returns correct skippedFiles count', async () => {
|
||||
setupDefaultMocks();
|
||||
const result = await crawl({ owner: 'owner', repo: 'repo' });
|
||||
// dist/, node_modules/, and .png are the excluded items = 3
|
||||
expect(result.skippedFiles).toBe(3);
|
||||
});
|
||||
|
||||
it('uses auth token in requests to GitHub API', async () => {
|
||||
const capturedHeaders: Record<string, string>[] = [];
|
||||
|
||||
stubFetch((url, init) => {
|
||||
const headers = Object.fromEntries(
|
||||
Object.entries((init?.headers as Record<string, string>) ?? {})
|
||||
);
|
||||
capturedHeaders.push(headers);
|
||||
|
||||
if (url === 'https://api.github.com/repos/owner/repo') {
|
||||
return jsonResponse(REPO_INFO);
|
||||
}
|
||||
if (url.includes('/git/trees/main')) {
|
||||
return jsonResponse({ tree: [], truncated: false });
|
||||
}
|
||||
if (url.includes('/commits/main')) {
|
||||
return textResponse(COMMIT_SHA);
|
||||
}
|
||||
return textResponse('content');
|
||||
});
|
||||
|
||||
await crawl({ owner: 'owner', repo: 'repo', token: 'ghp_mysecrettoken' });
|
||||
|
||||
const apiCalls = capturedHeaders.filter((h) => h.Authorization);
|
||||
expect(apiCalls.length).toBeGreaterThan(0);
|
||||
expect(apiCalls[0].Authorization).toBe('Bearer ghp_mysecrettoken');
|
||||
});
|
||||
|
||||
it('handles a tree with zero indexable files gracefully', async () => {
|
||||
stubFetch((url) => {
|
||||
if (url === 'https://api.github.com/repos/owner/repo') return jsonResponse(REPO_INFO);
|
||||
if (url.includes('/git/trees/main'))
|
||||
return jsonResponse({
|
||||
tree: [
|
||||
{ path: 'image.png', type: 'blob', size: 100, sha: 'sha1', url: '' },
|
||||
{ path: 'video.mp4', type: 'blob', size: 1000, sha: 'sha2', url: '' }
|
||||
],
|
||||
truncated: false
|
||||
});
|
||||
if (url.includes('/commits/main')) return textResponse(COMMIT_SHA);
|
||||
return textResponse('content');
|
||||
});
|
||||
|
||||
const result = await crawl({ owner: 'owner', repo: 'repo' });
|
||||
expect(result.files).toHaveLength(0);
|
||||
expect(result.totalFiles).toBe(0);
|
||||
expect(result.skippedFiles).toBe(2);
|
||||
});
|
||||
|
||||
it('reads and applies config from trueref.json found in the tree', async () => {
|
||||
const truerefConfig = { excludeFiles: ['package.json'] };
|
||||
|
||||
stubFetch((url) => {
|
||||
if (url === 'https://api.github.com/repos/owner/repo') return jsonResponse(REPO_INFO);
|
||||
if (url.includes('/git/trees/main')) {
|
||||
return jsonResponse({
|
||||
tree: [
|
||||
{ path: 'trueref.json', type: 'blob', size: 50, sha: 'sha-cfg', url: '' },
|
||||
{ path: 'src/index.ts', type: 'blob', size: 200, sha: 'sha-idx', url: '' },
|
||||
{ path: 'package.json', type: 'blob', size: 100, sha: 'sha-pkg', url: '' }
|
||||
],
|
||||
truncated: false
|
||||
});
|
||||
}
|
||||
if (url.includes('/commits/main')) return textResponse(COMMIT_SHA);
|
||||
if (url.includes('trueref.json')) return textResponse(JSON.stringify(truerefConfig));
|
||||
if (url.includes('src/index.ts')) return textResponse('export const x = 1;');
|
||||
if (url.includes('package.json')) return textResponse('{"name":"test"}');
|
||||
return textResponse('content');
|
||||
});
|
||||
|
||||
// No caller-supplied config — crawler should auto-detect trueref.json.
|
||||
const result = await crawl({ owner: 'owner', repo: 'repo' });
|
||||
expect(result.files.some((f) => f.path === 'package.json')).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
});
|
||||
477
src/lib/server/crawler/github.crawler.ts
Normal file
477
src/lib/server/crawler/github.crawler.ts
Normal file
@@ -0,0 +1,477 @@
|
||||
/**
|
||||
* GitHub Repository Crawler (TRUEREF-0003).
|
||||
*
|
||||
* Fetches repository file trees via the GitHub Trees API and downloads file
|
||||
* contents in parallel while respecting rate limits and applying
|
||||
* include/exclude filtering rules from trueref.json.
|
||||
*
|
||||
* Download strategy:
|
||||
* - Uses raw.githubusercontent.com for file content — faster and counts less
|
||||
* against the REST API rate limit.
|
||||
* - Falls back to the GitHub Contents API if raw download fails.
|
||||
*
|
||||
* Error handling:
|
||||
* - 404 → RepositoryNotFoundError
|
||||
* - 401 → AuthenticationError
|
||||
* - 403 → waits for rate-limit reset if X-RateLimit-Remaining is 0; else PermissionError
|
||||
* - 422 → tree too large; switches to directory-by-directory traversal (depth pagination)
|
||||
* - Network errors → retried up to 3 times with exponential backoff
|
||||
* - Bad base64 content → file skipped with a console warning
|
||||
*/
|
||||
|
||||
import { shouldIndexFile, detectLanguage } from './file-filter.js';
|
||||
import { GitHubRateLimiter, Semaphore, withRetry } from './rate-limiter.js';
|
||||
import {
|
||||
AuthenticationError,
|
||||
PermissionError,
|
||||
RateLimitError,
|
||||
RepositoryNotFoundError
|
||||
} from './types.js';
|
||||
|
||||
// Domain errors should not be retried — they are permanent HTTP status codes.
|
||||
function isDomainError(err: unknown): boolean {
|
||||
return (
|
||||
err instanceof RepositoryNotFoundError ||
|
||||
err instanceof AuthenticationError ||
|
||||
err instanceof PermissionError ||
|
||||
err instanceof RateLimitError
|
||||
);
|
||||
}
|
||||
|
||||
function isRetryable(err: unknown): boolean {
|
||||
return !isDomainError(err);
|
||||
}
|
||||
import type {
|
||||
CrawlOptions,
|
||||
CrawlResult,
|
||||
CrawledFile,
|
||||
GitHubContentResponse,
|
||||
GitHubRepoResponse,
|
||||
GitHubTreeItem,
|
||||
GitHubTreeResponse
|
||||
} from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const GITHUB_API = 'https://api.github.com';
|
||||
const RAW_CONTENT = 'https://raw.githubusercontent.com';
|
||||
|
||||
/** Maximum parallel file downloads. */
|
||||
const DOWNLOAD_CONCURRENCY = 10;
|
||||
|
||||
/** Config file names that should be fetched first so their filtering rules
|
||||
* apply to all subsequent downloads. */
|
||||
const CONFIG_FILE_NAMES = new Set(['trueref.json', 'context7.json']);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build standard GitHub API request headers.
|
||||
*/
|
||||
function buildHeaders(token?: string): Record<string, string> {
|
||||
const headers: Record<string, string> = {
|
||||
Accept: 'application/vnd.github+json',
|
||||
'X-GitHub-Api-Version': '2022-11-28'
|
||||
};
|
||||
if (token) {
|
||||
headers.Authorization = `Bearer ${token}`;
|
||||
}
|
||||
return headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Throw a domain error for non-2xx GitHub API responses.
|
||||
* Mutates the rate limiter with header data before throwing.
|
||||
*/
|
||||
async function throwForStatus(response: Response, rateLimiter: GitHubRateLimiter): Promise<void> {
|
||||
if (response.ok) return;
|
||||
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
|
||||
switch (response.status) {
|
||||
case 401:
|
||||
throw new AuthenticationError('GitHub authentication failed — check your PAT.');
|
||||
case 403: {
|
||||
const remaining = response.headers.get('X-RateLimit-Remaining');
|
||||
if (remaining === '0') {
|
||||
const reset = parseInt(response.headers.get('X-RateLimit-Reset') ?? '0', 10) * 1000;
|
||||
throw new RateLimitError('GitHub rate limit exceeded.', reset);
|
||||
}
|
||||
throw new PermissionError(
|
||||
'GitHub returned 403 Forbidden — insufficient permissions for this resource.'
|
||||
);
|
||||
}
|
||||
case 404:
|
||||
throw new RepositoryNotFoundError(
|
||||
`Repository not found or not accessible: ${response.url}`
|
||||
);
|
||||
default: {
|
||||
const body = await response.text().catch(() => '');
|
||||
throw new Error(`GitHub API error ${response.status}: ${body}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// GitHub API calls
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Fetch repository metadata (default branch, stars, etc.).
|
||||
*/
|
||||
async function fetchRepoInfo(
|
||||
owner: string,
|
||||
repo: string,
|
||||
token: string | undefined,
|
||||
rateLimiter: GitHubRateLimiter
|
||||
): Promise<GitHubRepoResponse> {
|
||||
return withRetry(async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
|
||||
const response = await fetch(`${GITHUB_API}/repos/${owner}/${repo}`, {
|
||||
headers: buildHeaders(token)
|
||||
});
|
||||
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
await throwForStatus(response, rateLimiter);
|
||||
|
||||
return (await response.json()) as GitHubRepoResponse;
|
||||
}, 3, isRetryable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the recursive file tree for a given ref.
|
||||
* Returns null when the tree is truncated (>100k items), signalling that we
|
||||
* should fall back to directory-level traversal.
|
||||
*/
|
||||
async function fetchTree(
|
||||
owner: string,
|
||||
repo: string,
|
||||
ref: string,
|
||||
token: string | undefined,
|
||||
rateLimiter: GitHubRateLimiter
|
||||
): Promise<GitHubTreeResponse | null> {
|
||||
return withRetry(async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
|
||||
const url = `${GITHUB_API}/repos/${owner}/${repo}/git/trees/${ref}?recursive=1`;
|
||||
const response = await fetch(url, { headers: buildHeaders(token) });
|
||||
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
|
||||
// 422 means the tree is too large for a single recursive call.
|
||||
if (response.status === 422) return null;
|
||||
|
||||
await throwForStatus(response, rateLimiter);
|
||||
|
||||
return (await response.json()) as GitHubTreeResponse;
|
||||
}, 3, isRetryable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a subtree (non-recursive) for a single directory path.
|
||||
* Used when the full recursive tree is truncated.
|
||||
*/
|
||||
async function fetchSubTree(
|
||||
owner: string,
|
||||
repo: string,
|
||||
ref: string,
|
||||
treeSha: string,
|
||||
token: string | undefined,
|
||||
rateLimiter: GitHubRateLimiter
|
||||
): Promise<GitHubTreeResponse> {
|
||||
return withRetry(async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
|
||||
const url = `${GITHUB_API}/repos/${owner}/${repo}/git/trees/${treeSha}`;
|
||||
const response = await fetch(url, { headers: buildHeaders(token) });
|
||||
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
await throwForStatus(response, rateLimiter);
|
||||
|
||||
return (await response.json()) as GitHubTreeResponse;
|
||||
}, 3, isRetryable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the HEAD commit SHA from a branch/tag ref by fetching the
|
||||
* commit object at the ref tip.
|
||||
*/
|
||||
async function fetchCommitSha(
|
||||
owner: string,
|
||||
repo: string,
|
||||
ref: string,
|
||||
token: string | undefined,
|
||||
rateLimiter: GitHubRateLimiter
|
||||
): Promise<string> {
|
||||
return withRetry(async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
|
||||
const url = `${GITHUB_API}/repos/${owner}/${repo}/commits/${ref}`;
|
||||
const response = await fetch(url, {
|
||||
headers: { ...buildHeaders(token), Accept: 'application/vnd.github.sha' }
|
||||
});
|
||||
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
await throwForStatus(response, rateLimiter);
|
||||
|
||||
// When Accept is 'application/vnd.github.sha', the response body is the
|
||||
// bare SHA string.
|
||||
return (await response.text()).trim();
|
||||
}, 3, isRetryable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Download raw file content via raw.githubusercontent.com.
|
||||
* Returns null on any failure (the caller will skip or fall back).
|
||||
*/
|
||||
async function downloadRawFile(
|
||||
owner: string,
|
||||
repo: string,
|
||||
ref: string,
|
||||
filePath: string,
|
||||
token: string | undefined
|
||||
): Promise<string | null> {
|
||||
try {
|
||||
const url = `${RAW_CONTENT}/${owner}/${repo}/${ref}/${filePath}`;
|
||||
const headers: Record<string, string> = {};
|
||||
if (token) headers.Authorization = `Bearer ${token}`;
|
||||
|
||||
const response = await fetch(url, { headers });
|
||||
if (!response.ok) return null;
|
||||
|
||||
return await response.text();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Download file content via the GitHub Contents API (fallback).
|
||||
*/
|
||||
async function downloadViaContentsApi(
|
||||
owner: string,
|
||||
repo: string,
|
||||
ref: string,
|
||||
filePath: string,
|
||||
token: string | undefined,
|
||||
rateLimiter: GitHubRateLimiter
|
||||
): Promise<string | null> {
|
||||
try {
|
||||
return await withRetry(async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
|
||||
const url = `${GITHUB_API}/repos/${owner}/${repo}/contents/${filePath}?ref=${ref}`;
|
||||
const response = await fetch(url, { headers: buildHeaders(token) });
|
||||
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
if (!response.ok) return null;
|
||||
|
||||
const data = (await response.json()) as GitHubContentResponse;
|
||||
if (data.encoding !== 'base64') return null;
|
||||
|
||||
// Node.js Buffer handles both padded and unpadded base64.
|
||||
return Buffer.from(data.content.replace(/\n/g, ''), 'base64').toString('utf-8');
|
||||
});
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Directory-level traversal (fallback for truncated trees)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Recursively collect all blob items from sub-trees when the top-level
|
||||
* recursive tree is truncated (>100k items).
|
||||
*/
|
||||
async function collectBlobsFromSubTrees(
|
||||
owner: string,
|
||||
repo: string,
|
||||
ref: string,
|
||||
token: string | undefined,
|
||||
rateLimiter: GitHubRateLimiter
|
||||
): Promise<GitHubTreeItem[]> {
|
||||
const allBlobs: GitHubTreeItem[] = [];
|
||||
const queue: Array<{ sha: string; prefix: string }> = [{ sha: ref, prefix: '' }];
|
||||
|
||||
while (queue.length > 0) {
|
||||
const batch = queue.splice(0, DOWNLOAD_CONCURRENCY);
|
||||
|
||||
await Promise.all(
|
||||
batch.map(async ({ sha, prefix }) => {
|
||||
const subTree = await fetchSubTree(owner, repo, ref, sha, token, rateLimiter).catch(
|
||||
() => null
|
||||
);
|
||||
if (!subTree) return;
|
||||
|
||||
for (const item of subTree.tree) {
|
||||
const fullPath = prefix ? `${prefix}/${item.path}` : item.path;
|
||||
if (item.type === 'blob') {
|
||||
allBlobs.push({ ...item, path: fullPath });
|
||||
} else if (item.type === 'tree') {
|
||||
queue.push({ sha: item.sha, prefix: fullPath });
|
||||
}
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
return allBlobs;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Config file detection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Try to download and parse a trueref.json / context7.json config from the
|
||||
* repository root. Returns undefined if not found or unparseable.
|
||||
*/
|
||||
async function fetchRepoConfig(
|
||||
owner: string,
|
||||
repo: string,
|
||||
ref: string,
|
||||
token: string | undefined,
|
||||
blobs: GitHubTreeItem[],
|
||||
rateLimiter: GitHubRateLimiter
|
||||
): Promise<CrawlOptions['config'] | undefined> {
|
||||
// Look for config files only at the repo root (no directory prefix).
|
||||
const configItem = blobs.find((b) => CONFIG_FILE_NAMES.has(b.path));
|
||||
if (!configItem) return undefined;
|
||||
|
||||
const content =
|
||||
(await downloadRawFile(owner, repo, ref, configItem.path, token)) ??
|
||||
(await downloadViaContentsApi(
|
||||
owner,
|
||||
repo,
|
||||
ref,
|
||||
configItem.path,
|
||||
token,
|
||||
rateLimiter
|
||||
));
|
||||
|
||||
if (!content) return undefined;
|
||||
|
||||
try {
|
||||
return JSON.parse(content) as CrawlOptions['config'];
|
||||
} catch {
|
||||
console.warn(`[GitHubCrawler] Failed to parse config file: ${configItem.path}`);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public crawl() function
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Crawl a GitHub repository and return structured file objects.
|
||||
*
|
||||
* @param options - Repository coordinates, auth token, filter config, and
|
||||
* optional progress callback.
|
||||
* @returns CrawlResult with all downloaded files and summary statistics.
|
||||
*/
|
||||
export async function crawl(options: CrawlOptions): Promise<CrawlResult> {
|
||||
const { owner, repo, token, onProgress } = options;
|
||||
const rateLimiter = new GitHubRateLimiter();
|
||||
const semaphore = new Semaphore(DOWNLOAD_CONCURRENCY);
|
||||
|
||||
// ---- Step 1: Resolve the ref (default branch if not provided) ----------
|
||||
let ref = options.ref;
|
||||
let commitSha = '';
|
||||
|
||||
const repoInfo = await fetchRepoInfo(owner, repo, token, rateLimiter);
|
||||
if (!ref) {
|
||||
ref = repoInfo.default_branch;
|
||||
}
|
||||
|
||||
// ---- Step 2: Fetch the file tree ---------------------------------------
|
||||
let blobs: GitHubTreeItem[];
|
||||
|
||||
const treeResponse = await fetchTree(owner, repo, ref, token, rateLimiter);
|
||||
|
||||
if (treeResponse === null) {
|
||||
// Tree truncated — fall back to directory-by-directory traversal.
|
||||
console.warn(
|
||||
`[GitHubCrawler] Tree for ${owner}/${repo}@${ref} is truncated; using sub-tree traversal.`
|
||||
);
|
||||
blobs = await collectBlobsFromSubTrees(owner, repo, ref, token, rateLimiter);
|
||||
} else {
|
||||
blobs = treeResponse.tree.filter((item) => item.type === 'blob');
|
||||
}
|
||||
|
||||
// Resolve HEAD commit SHA (best-effort; empty string on failure).
|
||||
commitSha = await fetchCommitSha(owner, repo, ref, token, rateLimiter).catch(() => '');
|
||||
|
||||
// ---- Step 3: Detect and download config file first ---------------------
|
||||
// Merge caller-supplied config with any discovered repo config.
|
||||
let effectiveConfig = options.config;
|
||||
if (!effectiveConfig) {
|
||||
effectiveConfig = await fetchRepoConfig(owner, repo, ref, token, blobs, rateLimiter);
|
||||
}
|
||||
|
||||
// ---- Step 4: Filter blobs according to config --------------------------
|
||||
const filteredBlobs = blobs.filter((item) =>
|
||||
shouldIndexFile(item.path, item.size ?? 0, effectiveConfig)
|
||||
);
|
||||
|
||||
const totalFiles = filteredBlobs.length;
|
||||
const skippedFiles = blobs.length - totalFiles;
|
||||
|
||||
// ---- Step 5: Download file contents in parallel -------------------------
|
||||
const files: CrawledFile[] = [];
|
||||
let processed = 0;
|
||||
|
||||
await Promise.all(
|
||||
filteredBlobs.map((item) =>
|
||||
semaphore.run(async () => {
|
||||
try {
|
||||
// Prefer raw download (cheaper on rate limit); fall back to API.
|
||||
const content =
|
||||
(await downloadRawFile(owner, repo, ref!, item.path, token)) ??
|
||||
(await downloadViaContentsApi(
|
||||
owner,
|
||||
repo,
|
||||
ref!,
|
||||
item.path,
|
||||
token,
|
||||
rateLimiter
|
||||
));
|
||||
|
||||
if (content === null) {
|
||||
console.warn(`[GitHubCrawler] Could not download: ${item.path} — skipping.`);
|
||||
} else {
|
||||
files.push({
|
||||
path: item.path,
|
||||
content,
|
||||
size: item.size ?? Buffer.byteLength(content, 'utf-8'),
|
||||
sha: item.sha,
|
||||
language: detectLanguage(item.path)
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn(
|
||||
`[GitHubCrawler] Error downloading ${item.path}: ${err instanceof Error ? err.message : String(err)}`
|
||||
);
|
||||
} finally {
|
||||
processed++;
|
||||
onProgress?.(processed, totalFiles);
|
||||
}
|
||||
})
|
||||
)
|
||||
);
|
||||
|
||||
return {
|
||||
files,
|
||||
totalFiles,
|
||||
skippedFiles,
|
||||
branch: ref,
|
||||
commitSha
|
||||
};
|
||||
}
|
||||
554
src/lib/server/crawler/local.crawler.test.ts
Normal file
554
src/lib/server/crawler/local.crawler.test.ts
Normal file
@@ -0,0 +1,554 @@
|
||||
/**
|
||||
* Unit tests for the local filesystem crawler (TRUEREF-0004).
|
||||
*
|
||||
* Each test that needs a filesystem fixture creates a temporary directory via
|
||||
* `fs.mkdtemp`, writes the required files, runs the crawler, then cleans up
|
||||
* with `fs.rm` regardless of the test outcome.
|
||||
*/
|
||||
|
||||
import { execFile } from 'node:child_process';
|
||||
import { createHash } from 'node:crypto';
|
||||
import { promises as fs } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { promisify } from 'node:util';
|
||||
|
||||
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||
|
||||
import { LocalCrawler } from './local.crawler.js';
|
||||
import type { LocalCrawlOptions } from './local.crawler.js';
|
||||
import { InvalidRefError, NotAGitRepositoryError } from './types.js';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function sha256(content: string): string {
|
||||
return createHash('sha256').update(content, 'utf-8').digest('hex');
|
||||
}
|
||||
|
||||
/** Create a temp directory, write a map of relPath → content, return rootPath. */
|
||||
async function makeTempRepo(files: Record<string, string>): Promise<string> {
|
||||
const root = await fs.mkdtemp(join(tmpdir(), 'trueref-test-'));
|
||||
for (const [relPath, content] of Object.entries(files)) {
|
||||
const absPath = join(root, relPath);
|
||||
await fs.mkdir(join(absPath, '..'), { recursive: true });
|
||||
await fs.writeFile(absPath, content, 'utf-8');
|
||||
}
|
||||
return root;
|
||||
}
|
||||
|
||||
/** Remove a temporary directory tree created by makeTempRepo. */
|
||||
async function cleanupTempRepo(root: string): Promise<void> {
|
||||
await fs.rm(root, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test state
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let root: string = '';
|
||||
const crawler = new LocalCrawler();
|
||||
|
||||
async function crawlRoot(opts: Partial<LocalCrawlOptions> = {}): Promise<ReturnType<LocalCrawler['crawl']>> {
|
||||
return crawler.crawl({ rootPath: root, ...opts });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Basic crawl behaviour
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('LocalCrawler.crawl() — basic file enumeration', () => {
|
||||
beforeEach(async () => {
|
||||
root = await makeTempRepo({
|
||||
'README.md': '# Hello',
|
||||
'src/index.ts': 'export const x = 1;',
|
||||
'src/utils.ts': 'export const y = 2;',
|
||||
'package.json': '{"name":"test"}'
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await cleanupTempRepo(root);
|
||||
});
|
||||
|
||||
it('returns all indexable files', async () => {
|
||||
const result = await crawlRoot();
|
||||
const paths = result.files.map((f) => f.path).sort();
|
||||
expect(paths).toEqual(['README.md', 'package.json', 'src/index.ts', 'src/utils.ts'].sort());
|
||||
});
|
||||
|
||||
it('populates content as a UTF-8 string', async () => {
|
||||
const result = await crawlRoot();
|
||||
const readme = result.files.find((f) => f.path === 'README.md');
|
||||
expect(readme?.content).toBe('# Hello');
|
||||
});
|
||||
|
||||
it('sets size equal to Buffer.byteLength of content', async () => {
|
||||
const result = await crawlRoot();
|
||||
for (const file of result.files) {
|
||||
expect(file.size).toBe(Buffer.byteLength(file.content, 'utf-8'));
|
||||
}
|
||||
});
|
||||
|
||||
it('computes correct SHA-256 per file', async () => {
|
||||
const result = await crawlRoot();
|
||||
const readme = result.files.find((f) => f.path === 'README.md');
|
||||
expect(readme?.sha).toBe(sha256('# Hello'));
|
||||
});
|
||||
|
||||
it('detects language from extension', async () => {
|
||||
const result = await crawlRoot();
|
||||
const ts = result.files.find((f) => f.path === 'src/index.ts');
|
||||
expect(ts?.language).toBe('typescript');
|
||||
const md = result.files.find((f) => f.path === 'README.md');
|
||||
expect(md?.language).toBe('markdown');
|
||||
const json = result.files.find((f) => f.path === 'package.json');
|
||||
expect(json?.language).toBe('json');
|
||||
});
|
||||
|
||||
it('sets branch to "local"', async () => {
|
||||
const result = await crawlRoot();
|
||||
expect(result.branch).toBe('local');
|
||||
});
|
||||
|
||||
it('sets totalFiles to the count of filtered files', async () => {
|
||||
const result = await crawlRoot();
|
||||
expect(result.totalFiles).toBe(result.files.length);
|
||||
});
|
||||
|
||||
it('sets commitSha to a non-empty hex string', async () => {
|
||||
const result = await crawlRoot();
|
||||
expect(result.commitSha).toMatch(/^[0-9a-f]{64}$/);
|
||||
});
|
||||
|
||||
it('produces a deterministic commitSha for the same file set', async () => {
|
||||
const r1 = await crawlRoot();
|
||||
const r2 = await crawlRoot();
|
||||
expect(r1.commitSha).toBe(r2.commitSha);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Filtering — default excludes and extension allow-list
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('LocalCrawler.crawl() — default filtering', () => {
|
||||
beforeEach(async () => {
|
||||
root = await makeTempRepo({
|
||||
'src/index.ts': 'export {};',
|
||||
'dist/bundle.js': 'bundled',
|
||||
'node_modules/lodash/index.js': 'lodash',
|
||||
'.git/config': '[core]',
|
||||
'image.png': '\x89PNG',
|
||||
'README.md': '# Docs'
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await cleanupTempRepo(root);
|
||||
});
|
||||
|
||||
it('excludes files in dist/', async () => {
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.every((f) => !f.path.startsWith('dist/'))).toBe(true);
|
||||
});
|
||||
|
||||
it('excludes files in node_modules/', async () => {
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.every((f) => !f.path.startsWith('node_modules/'))).toBe(true);
|
||||
});
|
||||
|
||||
it('excludes files in .git/', async () => {
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.every((f) => !f.path.startsWith('.git/'))).toBe(true);
|
||||
});
|
||||
|
||||
it('excludes non-indexable extensions like .png', async () => {
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.every((f) => !f.path.endsWith('.png'))).toBe(true);
|
||||
});
|
||||
|
||||
it('reports skippedFiles = total enumerated – filtered', async () => {
|
||||
const result = await crawlRoot();
|
||||
// dist/, node_modules/, .git/, .png = 4 skipped
|
||||
// src/index.ts + README.md = 2 kept
|
||||
expect(result.skippedFiles).toBe(4);
|
||||
expect(result.totalFiles).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Size limit
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('LocalCrawler.crawl() — size limit', () => {
|
||||
afterEach(async () => {
|
||||
await cleanupTempRepo(root);
|
||||
});
|
||||
|
||||
it('excludes files larger than MAX_FILE_SIZE_BYTES (500 KB)', async () => {
|
||||
// 500_001 bytes of 'x'
|
||||
const bigContent = 'x'.repeat(500_001);
|
||||
root = await makeTempRepo({
|
||||
'big.ts': bigContent,
|
||||
'small.ts': 'export const x = 1;'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path === 'big.ts')).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'small.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('includes files exactly at MAX_FILE_SIZE_BYTES (500 KB)', async () => {
|
||||
const edgeContent = 'a'.repeat(500_000);
|
||||
root = await makeTempRepo({ 'edge.ts': edgeContent });
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path === 'edge.ts')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// trueref.json / context7.json config detection
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('LocalCrawler.crawl() — config file detection', () => {
|
||||
afterEach(async () => {
|
||||
await cleanupTempRepo(root);
|
||||
});
|
||||
|
||||
it('auto-detects trueref.json and applies excludeFiles', async () => {
|
||||
root = await makeTempRepo({
|
||||
'trueref.json': JSON.stringify({ excludeFiles: ['package.json'] }),
|
||||
'src/index.ts': 'export {};',
|
||||
'package.json': '{"name":"test"}'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path === 'package.json')).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('auto-detects context7.json and applies folders allowlist', async () => {
|
||||
root = await makeTempRepo({
|
||||
'context7.json': JSON.stringify({ folders: ['docs/'] }),
|
||||
'src/index.ts': 'export {};',
|
||||
'docs/guide.md': '# Guide'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'docs/guide.md')).toBe(true);
|
||||
});
|
||||
|
||||
it('caller-supplied config takes precedence over discovered config file', async () => {
|
||||
root = await makeTempRepo({
|
||||
'trueref.json': JSON.stringify({ excludeFiles: ['package.json'] }),
|
||||
'src/index.ts': 'export {};',
|
||||
'package.json': '{"name":"test"}'
|
||||
});
|
||||
// Caller provides a config with no exclusions — package.json should appear.
|
||||
const result = await crawlRoot({ config: {} });
|
||||
expect(result.files.some((f) => f.path === 'package.json')).toBe(true);
|
||||
});
|
||||
|
||||
it('applies excludeFolders from config', async () => {
|
||||
root = await makeTempRepo({
|
||||
'trueref.json': JSON.stringify({ excludeFolders: ['internal/'] }),
|
||||
'internal/secret.ts': 'secret',
|
||||
'src/public.ts': 'public'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path.startsWith('internal/'))).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'src/public.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('gracefully handles a malformed config file', async () => {
|
||||
root = await makeTempRepo({
|
||||
'trueref.json': 'NOT VALID JSON {{{',
|
||||
'src/index.ts': 'export {};'
|
||||
});
|
||||
// Should not throw; falls back to no config.
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Progress callback
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('LocalCrawler.crawl() — progress reporting', () => {
|
||||
beforeEach(async () => {
|
||||
root = await makeTempRepo({
|
||||
'src/a.ts': 'a',
|
||||
'src/b.ts': 'b',
|
||||
'src/c.ts': 'c'
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await cleanupTempRepo(root);
|
||||
});
|
||||
|
||||
it('calls onProgress once per filtered file', async () => {
|
||||
const calls: Array<[number, number]> = [];
|
||||
await crawlRoot({ onProgress: (p, t) => calls.push([p, t]) });
|
||||
expect(calls).toHaveLength(3);
|
||||
});
|
||||
|
||||
it('increments processed from 1 to totalFiles', async () => {
|
||||
const calls: Array<[number, number]> = [];
|
||||
await crawlRoot({ onProgress: (p, t) => calls.push([p, t]) });
|
||||
const processed = calls.map(([p]) => p);
|
||||
expect(processed).toEqual([1, 2, 3]);
|
||||
});
|
||||
|
||||
it('keeps total constant across all callback invocations', async () => {
|
||||
const totals: number[] = [];
|
||||
await crawlRoot({ onProgress: (_, t) => totals.push(t) });
|
||||
expect(totals.every((t) => t === totals[0])).toBe(true);
|
||||
});
|
||||
|
||||
it('does not call onProgress when no files pass the filter', async () => {
|
||||
// Overwrite root with only non-indexable files.
|
||||
await fs.rm(root, { recursive: true, force: true });
|
||||
root = await makeTempRepo({ 'image.png': '\x89PNG' });
|
||||
const calls: number[] = [];
|
||||
await crawlRoot({ onProgress: () => calls.push(1) });
|
||||
expect(calls).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Git ref checkout
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create a temp directory that is a valid git repo with one commit per entry
|
||||
* in `history`. Each entry is a map of relPath → content committed under the
|
||||
* given tag (if provided). Returns the repo root path.
|
||||
*
|
||||
* Layout of `history`:
|
||||
* [{ tag?: string, files: Record<string, string> }, ...]
|
||||
*/
|
||||
async function makeGitRepo(
|
||||
history: Array<{ tag?: string; files: Record<string, string> }>
|
||||
): Promise<string> {
|
||||
const root = await fs.mkdtemp(join(tmpdir(), 'trueref-git-test-'));
|
||||
|
||||
async function git(...args: string[]) {
|
||||
await execFileAsync('git', ['-C', root, ...args]);
|
||||
}
|
||||
|
||||
await git('init', '--initial-branch=main');
|
||||
await git('config', 'user.email', 'test@trueref.local');
|
||||
await git('config', 'user.name', 'TrueRef Test');
|
||||
|
||||
for (const { tag, files } of history) {
|
||||
// Write files
|
||||
for (const [relPath, content] of Object.entries(files)) {
|
||||
const absPath = join(root, relPath);
|
||||
await fs.mkdir(join(absPath, '..'), { recursive: true });
|
||||
await fs.writeFile(absPath, content, 'utf-8');
|
||||
}
|
||||
await git('add', '.');
|
||||
await git('commit', '--allow-empty', '-m', `commit for ${tag ?? 'HEAD'}`);
|
||||
if (tag) {
|
||||
await git('tag', tag);
|
||||
}
|
||||
}
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
describe('LocalCrawler.crawl() — git ref checkout', () => {
|
||||
let root: string = '';
|
||||
const crawler = new LocalCrawler();
|
||||
|
||||
afterEach(async () => {
|
||||
if (root) await cleanupTempRepo(root);
|
||||
});
|
||||
|
||||
it('crawls files at a specific tag, not the HEAD state', async () => {
|
||||
root = await makeGitRepo([
|
||||
{ tag: 'v1.0.0', files: { 'src/index.ts': 'export const version = 1;' } },
|
||||
{ files: { 'src/index.ts': 'export const version = 2;' } }
|
||||
]);
|
||||
|
||||
const result = await crawler.crawl({ rootPath: root, ref: 'v1.0.0' });
|
||||
const indexFile = result.files.find((f) => f.path === 'src/index.ts');
|
||||
expect(indexFile?.content).toBe('export const version = 1;');
|
||||
});
|
||||
|
||||
it('crawls files at a specific commit SHA', async () => {
|
||||
root = await makeGitRepo([
|
||||
{ tag: 'v1.0.0', files: { 'api.ts': 'v1' } },
|
||||
{ files: { 'api.ts': 'v2' } }
|
||||
]);
|
||||
|
||||
// Resolve the SHA of v1.0.0
|
||||
const { stdout } = await execFileAsync('git', ['-C', root, 'rev-parse', 'v1.0.0'], {
|
||||
encoding: 'utf-8'
|
||||
});
|
||||
const sha = stdout.trim();
|
||||
|
||||
const result = await crawler.crawl({ rootPath: root, ref: sha });
|
||||
const api = result.files.find((f) => f.path === 'api.ts');
|
||||
expect(api?.content).toBe('v1');
|
||||
});
|
||||
|
||||
it('sets branch to the ref string in the result', async () => {
|
||||
root = await makeGitRepo([{ tag: 'v2.3.1', files: { 'README.md': '# v2' } }]);
|
||||
|
||||
const result = await crawler.crawl({ rootPath: root, ref: 'v2.3.1' });
|
||||
expect(result.branch).toBe('v2.3.1');
|
||||
});
|
||||
|
||||
it('sets commitSha to the git-resolved SHA (not file-content hash)', async () => {
|
||||
root = await makeGitRepo([{ tag: 'v1.0.0', files: { 'a.ts': 'a' } }]);
|
||||
|
||||
const { stdout } = await execFileAsync('git', ['-C', root, 'rev-parse', 'v1.0.0'], {
|
||||
encoding: 'utf-8'
|
||||
});
|
||||
const expectedSha = stdout.trim();
|
||||
|
||||
const result = await crawler.crawl({ rootPath: root, ref: 'v1.0.0' });
|
||||
expect(result.commitSha).toBe(expectedSha);
|
||||
});
|
||||
|
||||
it('does not modify the working tree', async () => {
|
||||
root = await makeGitRepo([
|
||||
{ tag: 'v1.0.0', files: { 'src/index.ts': 'v1' } },
|
||||
{ files: { 'src/index.ts': 'v2' } }
|
||||
]);
|
||||
|
||||
// Working tree is at HEAD (v2)
|
||||
const before = await fs.readFile(join(root, 'src/index.ts'), 'utf-8');
|
||||
await crawler.crawl({ rootPath: root, ref: 'v1.0.0' });
|
||||
const after = await fs.readFile(join(root, 'src/index.ts'), 'utf-8');
|
||||
|
||||
expect(before).toBe('v2');
|
||||
expect(after).toBe('v2');
|
||||
});
|
||||
|
||||
it('removes the temporary worktree after crawling', async () => {
|
||||
root = await makeGitRepo([{ tag: 'v1.0.0', files: { 'f.ts': 'x' } }]);
|
||||
|
||||
await crawler.crawl({ rootPath: root, ref: 'v1.0.0' });
|
||||
|
||||
// List remaining worktrees — only the main one should remain.
|
||||
const { stdout } = await execFileAsync('git', ['-C', root, 'worktree', 'list', '--porcelain'], {
|
||||
encoding: 'utf-8'
|
||||
});
|
||||
const worktreeCount = stdout.split('\n').filter((l) => l.startsWith('worktree ')).length;
|
||||
expect(worktreeCount).toBe(1);
|
||||
});
|
||||
|
||||
it('throws NotAGitRepositoryError for a plain directory', async () => {
|
||||
const plainDir = await fs.mkdtemp(join(tmpdir(), 'trueref-plain-'));
|
||||
root = plainDir; // cleaned up in afterEach
|
||||
|
||||
await expect(crawler.crawl({ rootPath: plainDir, ref: 'v1.0.0' })).rejects.toThrow(
|
||||
NotAGitRepositoryError
|
||||
);
|
||||
});
|
||||
|
||||
it('throws InvalidRefError for a ref that does not exist', async () => {
|
||||
root = await makeGitRepo([{ tag: 'v1.0.0', files: { 'f.ts': 'x' } }]);
|
||||
|
||||
await expect(crawler.crawl({ rootPath: root, ref: 'v99.99.99' })).rejects.toThrow(
|
||||
InvalidRefError
|
||||
);
|
||||
});
|
||||
|
||||
it('applies caller-supplied config at the checked-out ref', async () => {
|
||||
root = await makeGitRepo([
|
||||
{
|
||||
tag: 'v1.0.0',
|
||||
files: {
|
||||
'src/index.ts': 'export {};',
|
||||
'package.json': '{"name":"test"}'
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
// Exclude package.json via caller config
|
||||
const result = await crawler.crawl({
|
||||
rootPath: root,
|
||||
ref: 'v1.0.0',
|
||||
config: { excludeFiles: ['package.json'] }
|
||||
});
|
||||
|
||||
expect(result.files.some((f) => f.path === 'package.json')).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('reads trueref.json from the checked-out ref', async () => {
|
||||
root = await makeGitRepo([
|
||||
{
|
||||
tag: 'v1.0.0',
|
||||
files: {
|
||||
'trueref.json': JSON.stringify({ excludeFiles: ['package.json'] }),
|
||||
'src/index.ts': 'export {};',
|
||||
'package.json': '{"name":"test"}'
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
const result = await crawler.crawl({ rootPath: root, ref: 'v1.0.0' });
|
||||
expect(result.files.some((f) => f.path === 'package.json')).toBe(false);
|
||||
expect(result.files.some((f) => f.path === 'src/index.ts')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Edge cases
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('LocalCrawler.crawl() — edge cases', () => {
|
||||
afterEach(async () => {
|
||||
await cleanupTempRepo(root);
|
||||
});
|
||||
|
||||
it('returns empty result for an empty directory', async () => {
|
||||
root = await makeTempRepo({});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files).toHaveLength(0);
|
||||
expect(result.totalFiles).toBe(0);
|
||||
expect(result.skippedFiles).toBe(0);
|
||||
});
|
||||
|
||||
it('handles deeply nested directory structures', async () => {
|
||||
root = await makeTempRepo({
|
||||
'a/b/c/d/deep.ts': 'export const deep = true;'
|
||||
});
|
||||
const result = await crawlRoot();
|
||||
expect(result.files.some((f) => f.path === 'a/b/c/d/deep.ts')).toBe(true);
|
||||
});
|
||||
|
||||
it('handles files with UTF-8 content correctly', async () => {
|
||||
const utf8Content = 'const greeting = "héllo wörld — 日本語";';
|
||||
root = await makeTempRepo({ 'src/unicode.ts': utf8Content });
|
||||
const result = await crawlRoot();
|
||||
const file = result.files.find((f) => f.path === 'src/unicode.ts');
|
||||
expect(file?.content).toBe(utf8Content);
|
||||
expect(file?.sha).toBe(sha256(utf8Content));
|
||||
});
|
||||
|
||||
it('commitSha differs when file content changes', async () => {
|
||||
root = await makeTempRepo({ 'src/index.ts': 'version 1' });
|
||||
const r1 = await crawlRoot();
|
||||
|
||||
await fs.writeFile(join(root, 'src/index.ts'), 'version 2', 'utf-8');
|
||||
const r2 = await crawlRoot();
|
||||
|
||||
expect(r1.commitSha).not.toBe(r2.commitSha);
|
||||
});
|
||||
|
||||
it('commitSha is empty-string hash when no files are crawled', async () => {
|
||||
root = await makeTempRepo({ 'image.png': '\x89PNG' });
|
||||
const result = await crawlRoot();
|
||||
// SHA-256 of an empty string
|
||||
expect(result.commitSha).toBe(sha256(''));
|
||||
});
|
||||
});
|
||||
275
src/lib/server/crawler/local.crawler.ts
Normal file
275
src/lib/server/crawler/local.crawler.ts
Normal file
@@ -0,0 +1,275 @@
|
||||
/**
|
||||
* Local Filesystem Crawler (TRUEREF-0004).
|
||||
*
|
||||
* Walks a directory tree and enumerates all files, applying the same
|
||||
* extension and size filters as the GitHub crawler (TRUEREF-0003).
|
||||
* Reads file contents as UTF-8 strings and computes SHA-256 checksums
|
||||
* for change detection.
|
||||
*
|
||||
* Design decisions:
|
||||
* - Uses Node.js `fs/promises` and `crypto` — no extra dependencies.
|
||||
* - Symlinks and special files (devices, sockets, FIFOs) are skipped.
|
||||
* - `trueref.json` / `context7.json` at the repo root are detected and
|
||||
* parsed before any other file filtering runs, matching the GitHub crawler.
|
||||
* - File size for filtering is taken from `stat().size` so the size limit
|
||||
* is applied before reading file content (saves I/O on large excluded files).
|
||||
* - `commitSha` is derived from a SHA-256 hash of all per-file checksums,
|
||||
* giving a deterministic fingerprint of the crawled file set.
|
||||
*/
|
||||
|
||||
import { execFile } from 'node:child_process';
|
||||
import { createHash } from 'node:crypto';
|
||||
import { promises as fs } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import { promisify } from 'node:util';
|
||||
|
||||
import { detectLanguage, shouldIndexFile } from './file-filter.js';
|
||||
import { InvalidRefError, NotAGitRepositoryError } from './types.js';
|
||||
import type { CrawledFile, CrawlResult, RepoConfig } from './types.js';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Public options type
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface LocalCrawlOptions {
|
||||
/** Absolute path to the repository root directory. */
|
||||
rootPath: string;
|
||||
/**
|
||||
* Git ref to check out before crawling — a tag name (e.g. "v2.1.0"),
|
||||
* a branch name, or a commit SHA. When provided the crawler creates an
|
||||
* isolated git worktree at that ref, crawls it, then removes the worktree.
|
||||
* The original working tree is never modified.
|
||||
* Requires `rootPath` to be inside a git repository.
|
||||
*/
|
||||
ref?: string;
|
||||
/** Pre-parsed trueref.json / context7.json configuration, if already loaded. */
|
||||
config?: RepoConfig;
|
||||
/** Progress callback invoked after each file is read. */
|
||||
onProgress?: (processed: number, total: number) => void;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Names of config files that control include/exclude rules. */
|
||||
const CONFIG_FILE_NAMES = new Set(['trueref.json', 'context7.json']);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Git helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Run a git command inside `cwd` and return trimmed stdout.
|
||||
* Throws the child-process error on non-zero exit.
|
||||
*/
|
||||
async function runGit(cwd: string, args: string[]): Promise<string> {
|
||||
const { stdout } = await execFileAsync('git', ['-C', cwd, ...args], { encoding: 'utf-8' });
|
||||
return stdout.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a SHA-256 hex digest of a UTF-8 string.
|
||||
*/
|
||||
function computeSHA256(content: string): string {
|
||||
return createHash('sha256').update(content, 'utf-8').digest('hex');
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to read and JSON-parse a config file.
|
||||
* Returns undefined if the file cannot be read or parsed.
|
||||
*/
|
||||
async function parseConfigFile(absPath: string): Promise<RepoConfig | undefined> {
|
||||
try {
|
||||
const raw = await fs.readFile(absPath, 'utf-8');
|
||||
return JSON.parse(raw) as RepoConfig;
|
||||
} catch {
|
||||
console.warn(`[LocalCrawler] Failed to parse config file: ${absPath}`);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// LocalCrawler
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class LocalCrawler {
|
||||
/**
|
||||
* Crawl a local directory tree and return structured file objects.
|
||||
*
|
||||
* When `options.ref` is supplied the crawler creates an isolated git
|
||||
* worktree checked out at that ref, crawls it, then removes the worktree.
|
||||
* The caller's working tree is never modified.
|
||||
*
|
||||
* @param options - Root path, optional git ref, optional config, and progress callback.
|
||||
* @returns CrawlResult with all read files and summary statistics.
|
||||
*/
|
||||
async crawl(options: LocalCrawlOptions): Promise<CrawlResult> {
|
||||
const { rootPath, ref } = options;
|
||||
|
||||
if (!ref) {
|
||||
// Fast path: crawl the working tree as-is.
|
||||
return this.crawlDirectory(rootPath, options.config, options.onProgress, 'local');
|
||||
}
|
||||
|
||||
// Git-aware path: verify repo, resolve ref, create worktree, crawl, clean up.
|
||||
let worktreePath: string | undefined;
|
||||
|
||||
try {
|
||||
// Verify rootPath is inside a git repository.
|
||||
await runGit(rootPath, ['rev-parse', '--git-dir']).catch(() => {
|
||||
throw new NotAGitRepositoryError(`Not a git repository: ${rootPath}`);
|
||||
});
|
||||
|
||||
// Resolve the ref to a concrete commit SHA (validates it exists).
|
||||
const commitSha = await runGit(rootPath, ['rev-parse', '--verify', ref]).catch(() => {
|
||||
throw new InvalidRefError(`Invalid git ref "${ref}" in repository: ${rootPath}`);
|
||||
});
|
||||
|
||||
// Create a temporary isolated worktree at the resolved ref.
|
||||
const tmpDir = await fs.mkdtemp(join(tmpdir(), 'trueref-wt-'));
|
||||
worktreePath = tmpDir;
|
||||
|
||||
await runGit(rootPath, ['worktree', 'add', '--detach', tmpDir, ref]).catch((err) => {
|
||||
throw new InvalidRefError(
|
||||
`Cannot create worktree for ref "${ref}": ${err instanceof Error ? err.message : String(err)}`
|
||||
);
|
||||
});
|
||||
|
||||
// Crawl the worktree and stamp the result with the git-resolved metadata.
|
||||
const result = await this.crawlDirectory(worktreePath, options.config, options.onProgress, ref);
|
||||
|
||||
return { ...result, commitSha };
|
||||
} finally {
|
||||
if (worktreePath) {
|
||||
// Remove the worktree (git also deletes the directory).
|
||||
await runGit(rootPath, ['worktree', 'remove', '--force', worktreePath]).catch(() => {
|
||||
// Best-effort; leave the temp directory for the OS to clean up.
|
||||
fs.rm(worktreePath!, { recursive: true, force: true }).catch(() => {});
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Private — directory crawl
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Walk `rootPath`, apply filters, read files, and build a CrawlResult.
|
||||
* `branch` is embedded verbatim into the returned result.
|
||||
*/
|
||||
private async crawlDirectory(
|
||||
rootPath: string,
|
||||
callerConfig: RepoConfig | undefined,
|
||||
onProgress: LocalCrawlOptions['onProgress'],
|
||||
branch: string
|
||||
): Promise<CrawlResult> {
|
||||
// Step 1: Walk the directory tree and collect (relPath, size) pairs.
|
||||
const statCache = new Map<string, number>();
|
||||
const allRelPaths = await this.walkDirectory(rootPath, '', statCache);
|
||||
|
||||
// Step 2: Detect trueref.json / context7.json at the repo root first.
|
||||
// Only root-level config files are honoured (no directory prefix).
|
||||
const configRelPath = allRelPaths.find((p) => CONFIG_FILE_NAMES.has(p));
|
||||
let config = callerConfig;
|
||||
if (configRelPath && !config) {
|
||||
config = await parseConfigFile(join(rootPath, configRelPath));
|
||||
}
|
||||
|
||||
// Step 3: Filter files according to extension, size, and config rules.
|
||||
const filteredPaths = allRelPaths.filter((relPath) => {
|
||||
const size = statCache.get(relPath) ?? 0;
|
||||
return shouldIndexFile(relPath, size, config);
|
||||
});
|
||||
|
||||
// Step 4: Read file contents and build CrawledFile records.
|
||||
const crawledFiles: CrawledFile[] = [];
|
||||
|
||||
for (const [i, relPath] of filteredPaths.entries()) {
|
||||
const absPath = join(rootPath, relPath);
|
||||
try {
|
||||
const content = await fs.readFile(absPath, 'utf-8');
|
||||
const sha = computeSHA256(content);
|
||||
crawledFiles.push({
|
||||
path: relPath,
|
||||
content,
|
||||
size: Buffer.byteLength(content, 'utf-8'),
|
||||
sha,
|
||||
language: detectLanguage(relPath)
|
||||
});
|
||||
} catch (err) {
|
||||
console.warn(
|
||||
`[LocalCrawler] Could not read file: ${relPath} — ${err instanceof Error ? err.message : String(err)}`
|
||||
);
|
||||
}
|
||||
onProgress?.(i + 1, filteredPaths.length);
|
||||
}
|
||||
|
||||
// Step 5: Build a deterministic repo-level fingerprint from file SHAs.
|
||||
const commitSha = computeSHA256(crawledFiles.map((f) => f.sha).join(''));
|
||||
|
||||
return {
|
||||
files: crawledFiles,
|
||||
totalFiles: filteredPaths.length,
|
||||
skippedFiles: allRelPaths.length - filteredPaths.length,
|
||||
branch,
|
||||
commitSha
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively walk a directory and collect relative paths of all regular files.
|
||||
* Symlinks and special files (devices, sockets, FIFOs) are silently skipped.
|
||||
* Populates `statCache` with file sizes so the caller can filter without a
|
||||
* second `stat()` call.
|
||||
*
|
||||
* @param dir - Absolute path of the directory to read.
|
||||
* @param rel - Relative path prefix accumulated during recursion.
|
||||
* @param statCache - Mutable map from relative path → byte size.
|
||||
*/
|
||||
private async walkDirectory(
|
||||
dir: string,
|
||||
rel: string,
|
||||
statCache: Map<string, number>
|
||||
): Promise<string[]> {
|
||||
let entries;
|
||||
try {
|
||||
entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
// Directory is unreadable (permissions, etc.) — skip silently.
|
||||
return [];
|
||||
}
|
||||
|
||||
const files: string[] = [];
|
||||
|
||||
for (const entry of entries) {
|
||||
// Only descend into plain directories and collect plain files.
|
||||
// entry.isFile() / entry.isDirectory() return false for symlinks,
|
||||
// devices, sockets, and FIFOs, so those are all implicitly skipped.
|
||||
if (!entry.isFile() && !entry.isDirectory()) continue;
|
||||
|
||||
const relPath = rel ? `${rel}/${entry.name}` : entry.name;
|
||||
|
||||
if (entry.isDirectory()) {
|
||||
const children = await this.walkDirectory(join(dir, entry.name), relPath, statCache);
|
||||
files.push(...children);
|
||||
} else {
|
||||
// Capture file size from stat so shouldIndexFile can enforce the limit
|
||||
// without reading the file.
|
||||
try {
|
||||
const stat = await fs.stat(join(dir, entry.name));
|
||||
statCache.set(relPath, stat.size);
|
||||
} catch {
|
||||
statCache.set(relPath, 0);
|
||||
}
|
||||
files.push(relPath);
|
||||
}
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
}
|
||||
123
src/lib/server/crawler/rate-limiter.ts
Normal file
123
src/lib/server/crawler/rate-limiter.ts
Normal file
@@ -0,0 +1,123 @@
|
||||
/**
|
||||
* GitHub API rate-limit tracker and backoff helper (TRUEREF-0003).
|
||||
*
|
||||
* Reads X-RateLimit-* headers from every API response and pauses outgoing
|
||||
* requests when the remaining allowance drops to ≤ 10.
|
||||
*/
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
export class GitHubRateLimiter {
|
||||
private remaining = 5000;
|
||||
private resetAt = Date.now();
|
||||
|
||||
/**
|
||||
* Update internal counters from the headers of a GitHub API response.
|
||||
*/
|
||||
updateFromHeaders(headers: Headers): void {
|
||||
const remaining = headers.get('X-RateLimit-Remaining');
|
||||
const reset = headers.get('X-RateLimit-Reset');
|
||||
|
||||
if (remaining !== null) {
|
||||
this.remaining = parseInt(remaining, 10);
|
||||
}
|
||||
if (reset !== null) {
|
||||
// GitHub returns a Unix epoch in seconds.
|
||||
this.resetAt = parseInt(reset, 10) * 1000;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If the remaining allowance is critically low (≤ 10), sleep until the
|
||||
* rate-limit window resets (plus a 1 s buffer).
|
||||
*/
|
||||
async waitIfNeeded(): Promise<void> {
|
||||
if (this.remaining <= 10) {
|
||||
const waitMs = Math.max(0, this.resetAt - Date.now()) + 1000;
|
||||
await sleep(waitMs);
|
||||
}
|
||||
}
|
||||
|
||||
/** Remaining requests in the current window (for testing). */
|
||||
get remainingRequests(): number {
|
||||
return this.remaining;
|
||||
}
|
||||
|
||||
/** Reset timestamp as a Unix epoch in ms (for testing). */
|
||||
get resetTimestamp(): number {
|
||||
return this.resetAt;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Exponential-backoff retry wrapper for network-level errors.
|
||||
*
|
||||
* Retries up to `maxAttempts` times (default 3) with 1 s, 2 s, 4 s delays.
|
||||
*
|
||||
* @param fn - Async function to attempt.
|
||||
* @param maxAttempts - Maximum number of attempts (default 3).
|
||||
* @param isRetryable - Optional predicate; when it returns false for a given
|
||||
* error the error is re-thrown immediately without further
|
||||
* retries. Defaults to retrying all errors.
|
||||
*/
|
||||
export async function withRetry<T>(
|
||||
fn: () => Promise<T>,
|
||||
maxAttempts = 3,
|
||||
isRetryable: (err: unknown) => boolean = () => true
|
||||
): Promise<T> {
|
||||
let lastError: unknown;
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
try {
|
||||
return await fn();
|
||||
} catch (err) {
|
||||
if (!isRetryable(err)) throw err;
|
||||
lastError = err;
|
||||
if (attempt < maxAttempts - 1) {
|
||||
await sleep(1000 * Math.pow(2, attempt));
|
||||
}
|
||||
}
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
/**
|
||||
* Async semaphore — limits the number of concurrently executing promises.
|
||||
*/
|
||||
export class Semaphore {
|
||||
private count: number;
|
||||
private readonly queue: Array<() => void> = [];
|
||||
|
||||
constructor(concurrency: number) {
|
||||
this.count = concurrency;
|
||||
}
|
||||
|
||||
async acquire(): Promise<void> {
|
||||
if (this.count > 0) {
|
||||
this.count--;
|
||||
return;
|
||||
}
|
||||
return new Promise((resolve) => {
|
||||
this.queue.push(resolve);
|
||||
});
|
||||
}
|
||||
|
||||
release(): void {
|
||||
const next = this.queue.shift();
|
||||
if (next) {
|
||||
next();
|
||||
} else {
|
||||
this.count++;
|
||||
}
|
||||
}
|
||||
|
||||
async run<T>(fn: () => Promise<T>): Promise<T> {
|
||||
await this.acquire();
|
||||
try {
|
||||
return await fn();
|
||||
} finally {
|
||||
this.release();
|
||||
}
|
||||
}
|
||||
}
|
||||
135
src/lib/server/crawler/types.ts
Normal file
135
src/lib/server/crawler/types.ts
Normal file
@@ -0,0 +1,135 @@
|
||||
/**
|
||||
* Types for the GitHub repository crawler (TRUEREF-0003).
|
||||
*/
|
||||
|
||||
import type { TrueRefConfig } from '$lib/types';
|
||||
|
||||
// Re-export RepoConfig alias so crawler modules can reference it consistently.
|
||||
export type RepoConfig = TrueRefConfig;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Core crawler data types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface CrawledFile {
|
||||
/** Relative path within the repo, e.g. "src/index.ts" */
|
||||
path: string;
|
||||
/** UTF-8 file content */
|
||||
content: string;
|
||||
/** File size in bytes */
|
||||
size: number;
|
||||
/** GitHub blob SHA (used as checksum) */
|
||||
sha: string;
|
||||
/** Programming language detected from extension */
|
||||
language: string;
|
||||
}
|
||||
|
||||
export interface CrawlResult {
|
||||
/** Successfully downloaded files */
|
||||
files: CrawledFile[];
|
||||
/** Total files that matched filters */
|
||||
totalFiles: number;
|
||||
/** Files that were filtered out or too large */
|
||||
skippedFiles: number;
|
||||
/** Branch or tag that was crawled */
|
||||
branch: string;
|
||||
/** HEAD commit SHA */
|
||||
commitSha: string;
|
||||
}
|
||||
|
||||
export interface CrawlOptions {
|
||||
owner: string;
|
||||
repo: string;
|
||||
/** Branch, tag, or commit SHA; defaults to repo default branch */
|
||||
ref?: string;
|
||||
/** GitHub PAT for private repos */
|
||||
token?: string;
|
||||
/** Parsed trueref.json / context7.json configuration */
|
||||
config?: RepoConfig;
|
||||
/** Progress callback invoked after each file is processed */
|
||||
onProgress?: (processed: number, total: number) => void;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// GitHub API response shapes (minimal — only fields we use)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface GitHubRepoResponse {
|
||||
default_branch: string;
|
||||
stargazers_count: number;
|
||||
}
|
||||
|
||||
export interface GitHubTreeItem {
|
||||
path: string;
|
||||
type: 'blob' | 'tree';
|
||||
size?: number;
|
||||
sha: string;
|
||||
url: string;
|
||||
}
|
||||
|
||||
export interface GitHubTreeResponse {
|
||||
tree: GitHubTreeItem[];
|
||||
truncated: boolean;
|
||||
}
|
||||
|
||||
export interface GitHubContentResponse {
|
||||
content: string;
|
||||
encoding: string;
|
||||
size: number;
|
||||
sha: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Domain errors
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class RepositoryNotFoundError extends Error {
|
||||
readonly code = 'REPOSITORY_NOT_FOUND';
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = 'RepositoryNotFoundError';
|
||||
}
|
||||
}
|
||||
|
||||
export class AuthenticationError extends Error {
|
||||
readonly code = 'AUTHENTICATION_ERROR';
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = 'AuthenticationError';
|
||||
}
|
||||
}
|
||||
|
||||
export class PermissionError extends Error {
|
||||
readonly code = 'PERMISSION_ERROR';
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = 'PermissionError';
|
||||
}
|
||||
}
|
||||
|
||||
export class RateLimitError extends Error {
|
||||
readonly code = 'RATE_LIMIT_ERROR';
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly resetAt: number
|
||||
) {
|
||||
super(message);
|
||||
this.name = 'RateLimitError';
|
||||
}
|
||||
}
|
||||
|
||||
export class NotAGitRepositoryError extends Error {
|
||||
readonly code = 'NOT_A_GIT_REPOSITORY';
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = 'NotAGitRepositoryError';
|
||||
}
|
||||
}
|
||||
|
||||
export class InvalidRefError extends Error {
|
||||
readonly code = 'INVALID_REF';
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = 'InvalidRefError';
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user