- GitHub crawler with rate limiting, semaphore concurrency, retry logic - File filtering by extension, size, and trueref.json rules - Local filesystem crawler with SHA-256 checksums and progress callbacks - Shared types and file filter logic between both crawlers Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
124 lines
2.9 KiB
TypeScript
124 lines
2.9 KiB
TypeScript
/**
|
|
* GitHub API rate-limit tracker and backoff helper (TRUEREF-0003).
|
|
*
|
|
* Reads X-RateLimit-* headers from every API response and pauses outgoing
|
|
* requests when the remaining allowance drops to ≤ 10.
|
|
*/
|
|
|
|
function sleep(ms: number): Promise<void> {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
export class GitHubRateLimiter {
|
|
private remaining = 5000;
|
|
private resetAt = Date.now();
|
|
|
|
/**
|
|
* Update internal counters from the headers of a GitHub API response.
|
|
*/
|
|
updateFromHeaders(headers: Headers): void {
|
|
const remaining = headers.get('X-RateLimit-Remaining');
|
|
const reset = headers.get('X-RateLimit-Reset');
|
|
|
|
if (remaining !== null) {
|
|
this.remaining = parseInt(remaining, 10);
|
|
}
|
|
if (reset !== null) {
|
|
// GitHub returns a Unix epoch in seconds.
|
|
this.resetAt = parseInt(reset, 10) * 1000;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* If the remaining allowance is critically low (≤ 10), sleep until the
|
|
* rate-limit window resets (plus a 1 s buffer).
|
|
*/
|
|
async waitIfNeeded(): Promise<void> {
|
|
if (this.remaining <= 10) {
|
|
const waitMs = Math.max(0, this.resetAt - Date.now()) + 1000;
|
|
await sleep(waitMs);
|
|
}
|
|
}
|
|
|
|
/** Remaining requests in the current window (for testing). */
|
|
get remainingRequests(): number {
|
|
return this.remaining;
|
|
}
|
|
|
|
/** Reset timestamp as a Unix epoch in ms (for testing). */
|
|
get resetTimestamp(): number {
|
|
return this.resetAt;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Exponential-backoff retry wrapper for network-level errors.
|
|
*
|
|
* Retries up to `maxAttempts` times (default 3) with 1 s, 2 s, 4 s delays.
|
|
*
|
|
* @param fn - Async function to attempt.
|
|
* @param maxAttempts - Maximum number of attempts (default 3).
|
|
* @param isRetryable - Optional predicate; when it returns false for a given
|
|
* error the error is re-thrown immediately without further
|
|
* retries. Defaults to retrying all errors.
|
|
*/
|
|
export async function withRetry<T>(
|
|
fn: () => Promise<T>,
|
|
maxAttempts = 3,
|
|
isRetryable: (err: unknown) => boolean = () => true
|
|
): Promise<T> {
|
|
let lastError: unknown;
|
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
try {
|
|
return await fn();
|
|
} catch (err) {
|
|
if (!isRetryable(err)) throw err;
|
|
lastError = err;
|
|
if (attempt < maxAttempts - 1) {
|
|
await sleep(1000 * Math.pow(2, attempt));
|
|
}
|
|
}
|
|
}
|
|
throw lastError;
|
|
}
|
|
|
|
/**
|
|
* Async semaphore — limits the number of concurrently executing promises.
|
|
*/
|
|
export class Semaphore {
|
|
private count: number;
|
|
private readonly queue: Array<() => void> = [];
|
|
|
|
constructor(concurrency: number) {
|
|
this.count = concurrency;
|
|
}
|
|
|
|
async acquire(): Promise<void> {
|
|
if (this.count > 0) {
|
|
this.count--;
|
|
return;
|
|
}
|
|
return new Promise((resolve) => {
|
|
this.queue.push(resolve);
|
|
});
|
|
}
|
|
|
|
release(): void {
|
|
const next = this.queue.shift();
|
|
if (next) {
|
|
next();
|
|
} else {
|
|
this.count++;
|
|
}
|
|
}
|
|
|
|
async run<T>(fn: () => Promise<T>): Promise<T> {
|
|
await this.acquire();
|
|
try {
|
|
return await fn();
|
|
} finally {
|
|
this.release();
|
|
}
|
|
}
|
|
}
|