feat(TRUEREF-0003-0004): implement GitHub and local filesystem crawlers
- GitHub crawler with rate limiting, semaphore concurrency, retry logic - File filtering by extension, size, and trueref.json rules - Local filesystem crawler with SHA-256 checksums and progress callbacks - Shared types and file filter logic between both crawlers Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
123
src/lib/server/crawler/rate-limiter.ts
Normal file
123
src/lib/server/crawler/rate-limiter.ts
Normal file
@@ -0,0 +1,123 @@
|
||||
/**
|
||||
* GitHub API rate-limit tracker and backoff helper (TRUEREF-0003).
|
||||
*
|
||||
* Reads X-RateLimit-* headers from every API response and pauses outgoing
|
||||
* requests when the remaining allowance drops to ≤ 10.
|
||||
*/
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
export class GitHubRateLimiter {
|
||||
private remaining = 5000;
|
||||
private resetAt = Date.now();
|
||||
|
||||
/**
|
||||
* Update internal counters from the headers of a GitHub API response.
|
||||
*/
|
||||
updateFromHeaders(headers: Headers): void {
|
||||
const remaining = headers.get('X-RateLimit-Remaining');
|
||||
const reset = headers.get('X-RateLimit-Reset');
|
||||
|
||||
if (remaining !== null) {
|
||||
this.remaining = parseInt(remaining, 10);
|
||||
}
|
||||
if (reset !== null) {
|
||||
// GitHub returns a Unix epoch in seconds.
|
||||
this.resetAt = parseInt(reset, 10) * 1000;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If the remaining allowance is critically low (≤ 10), sleep until the
|
||||
* rate-limit window resets (plus a 1 s buffer).
|
||||
*/
|
||||
async waitIfNeeded(): Promise<void> {
|
||||
if (this.remaining <= 10) {
|
||||
const waitMs = Math.max(0, this.resetAt - Date.now()) + 1000;
|
||||
await sleep(waitMs);
|
||||
}
|
||||
}
|
||||
|
||||
/** Remaining requests in the current window (for testing). */
|
||||
get remainingRequests(): number {
|
||||
return this.remaining;
|
||||
}
|
||||
|
||||
/** Reset timestamp as a Unix epoch in ms (for testing). */
|
||||
get resetTimestamp(): number {
|
||||
return this.resetAt;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Exponential-backoff retry wrapper for network-level errors.
|
||||
*
|
||||
* Retries up to `maxAttempts` times (default 3) with 1 s, 2 s, 4 s delays.
|
||||
*
|
||||
* @param fn - Async function to attempt.
|
||||
* @param maxAttempts - Maximum number of attempts (default 3).
|
||||
* @param isRetryable - Optional predicate; when it returns false for a given
|
||||
* error the error is re-thrown immediately without further
|
||||
* retries. Defaults to retrying all errors.
|
||||
*/
|
||||
export async function withRetry<T>(
|
||||
fn: () => Promise<T>,
|
||||
maxAttempts = 3,
|
||||
isRetryable: (err: unknown) => boolean = () => true
|
||||
): Promise<T> {
|
||||
let lastError: unknown;
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
try {
|
||||
return await fn();
|
||||
} catch (err) {
|
||||
if (!isRetryable(err)) throw err;
|
||||
lastError = err;
|
||||
if (attempt < maxAttempts - 1) {
|
||||
await sleep(1000 * Math.pow(2, attempt));
|
||||
}
|
||||
}
|
||||
}
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
/**
|
||||
* Async semaphore — limits the number of concurrently executing promises.
|
||||
*/
|
||||
export class Semaphore {
|
||||
private count: number;
|
||||
private readonly queue: Array<() => void> = [];
|
||||
|
||||
constructor(concurrency: number) {
|
||||
this.count = concurrency;
|
||||
}
|
||||
|
||||
async acquire(): Promise<void> {
|
||||
if (this.count > 0) {
|
||||
this.count--;
|
||||
return;
|
||||
}
|
||||
return new Promise((resolve) => {
|
||||
this.queue.push(resolve);
|
||||
});
|
||||
}
|
||||
|
||||
release(): void {
|
||||
const next = this.queue.shift();
|
||||
if (next) {
|
||||
next();
|
||||
} else {
|
||||
this.count++;
|
||||
}
|
||||
}
|
||||
|
||||
async run<T>(fn: () => Promise<T>): Promise<T> {
|
||||
await this.acquire();
|
||||
try {
|
||||
return await fn();
|
||||
} finally {
|
||||
this.release();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user