Files
insta-recipe/src/lib/server/scheduler.ts
Giancarmine Salucci 8fc7c44943 feat: robust Instagram extractor with real-time progress tracking
Implements two major features:
1. Multi-strategy Instagram extraction with retry logic
2. Real-time progress reporting via Server-Sent Events

Instagram Extractor Refactor:
- Add 4 extraction strategies: embedded-json, dom-selector, graphql-api, legacy
- Implement browser stealth mode with anti-detection measures
- Add retry wrapper with exponential backoff (1s -> 2s -> 4s)
- Extract from window._sharedData, DOM selectors, GraphQL API
- Improve success rate from ~60% to ~95%

Real-Time Progress Integration:
- Create ProgressCallback system with typed events
- Implement /api/extract-stream SSE endpoint
- Update frontend to consume live progress updates
- Add visual enhancements: method icons, colored logs, current method indicator
- Enable transparency into extraction process

Technical:
- Type-safe TypeScript implementation
- Hexagonal Architecture compliance
- Backward compatible with existing /api/extract
- Comprehensive test coverage (7 passing tests)
- Full documentation in docs/outcomes/

Files changed: 12 files (+2,308 / -52)
Tests: All passing (build successful)

Related outcomes:
- docs/outcomes/RefactorRobustInstagramExtractor.md
- docs/outcomes/IntegrateExtractionProgressFrontend.md
2025-12-21 03:14:17 +01:00

194 lines
5.3 KiB
TypeScript

import fs from 'fs';
import path from 'path';
import { getBrowser } from './browser';
import { env } from '$env/dynamic/private';
export interface SchedulerConfig {
enabled: boolean;
intervalMinutes: number;
}
interface SchedulerState {
intervalId: NodeJS.Timer | null;
lastRenewalTime: number | null;
isRenewing: boolean;
}
const state: SchedulerState = {
intervalId: null,
lastRenewalTime: null,
isRenewing: false
};
/**
* Get scheduler configuration from environment variables
*/
function getConfig(): SchedulerConfig {
const enabled = env.AUTH_SCHEDULER_ENABLED === 'true';
let intervalMinutes = parseInt(env.AUTH_SCHEDULER_INTERVAL_MINUTES || '720', 10);
if (isNaN(intervalMinutes) || intervalMinutes < 5) {
console.warn(
`[Scheduler] Invalid or too short interval '${env.AUTH_SCHEDULER_INTERVAL_MINUTES}'. Defaulting to 720 minutes.`
);
intervalMinutes = 720;
}
return {
enabled,
intervalMinutes
};
}
/**
* Resolve authentication storage path
*/
function resolveAuthPath(): string {
const authPathDocker = '/app/secrets/auth.json';
const authPathLocal = './secrets/auth.json';
if (fs.existsSync(authPathDocker)) {
return authPathDocker;
}
if (fs.existsSync(authPathLocal)) {
return authPathLocal;
}
// Default to local path if neither exists yet
return authPathLocal;
}
/**
* Renew Instagram authentication by loading existing auth and refreshing the session
* Inspired by gen-auth.js - reuses existing stored credentials without manual input
*/
async function renewInstagramAuth(): Promise<boolean> {
if (state.isRenewing) {
console.log('[Scheduler] Auth renewal already in progress, skipping');
return false;
}
const authPath = resolveAuthPath();
if (!fs.existsSync(authPath)) {
console.warn('[Scheduler] No existing auth.json found. Run gen-auth.js first to set up initial authentication.');
return false;
}
state.isRenewing = true;
let context = null;
let page = null;
try {
console.log('[Scheduler] Starting Instagram authentication renewal...');
console.log(`[Scheduler] Loading existing auth from: ${authPath}`);
const browser = await getBrowser();
// Load existing authentication state
context = await browser.newContext({ storageState: authPath });
page = await context.newPage();
// Navigate to Instagram homepage - the existing auth will be used automatically
await page.goto('https://www.instagram.com/', { waitUntil: 'domcontentloaded' });
// Wait for the "Home" icon to appear (indicates successful login)
try {
await page.waitForSelector('svg[aria-label="Home"]', { timeout: 30000 });
console.log('[Scheduler] Successfully authenticated with Instagram');
} catch (e) {
console.warn('[Scheduler] Home icon not found - session may be expired or invalid');
return false;
}
// Save the refreshed authentication state
const authDir = path.dirname(authPath);
// Ensure directory exists
if (!fs.existsSync(authDir)) {
fs.mkdirSync(authDir, { recursive: true });
}
// Update auth.json with refreshed session
await context.storageState({ path: authPath });
state.lastRenewalTime = Date.now();
console.log(`[Scheduler] Instagram authentication renewed successfully at ${new Date().toISOString()}`);
console.log(`[Scheduler] Auth state updated at: ${authPath}`);
return true;
} catch (error) {
console.error('[Scheduler] Instagram authentication renewal failed:', error);
return false;
} finally {
if (page) {
await page.close().catch(() => {});
}
if (context) {
await context.close().catch(() => {});
}
state.isRenewing = false;
}
}
/**
* Start the authentication renewal scheduler
*/
export async function startScheduler(): Promise<void> {
const config = getConfig();
if (!config.enabled) {
console.log('[Scheduler] Authentication scheduler is disabled (set AUTH_SCHEDULER_ENABLED=true to enable)');
return;
}
if (state.intervalId !== null) {
console.warn('[Scheduler] Scheduler is already running');
return;
}
const intervalMs = config.intervalMinutes * 60 * 1000;
console.log(`[Scheduler] Starting authentication scheduler with ${config.intervalMinutes}min interval`);
// Schedule periodic renewals
state.intervalId = setInterval(async () => {
await renewInstagramAuth();
}, intervalMs);
// Ensure interval is not blocking (set it as unreferenceable so it doesn't keep the process alive)
if (state.intervalId.unref) {
state.intervalId.unref();
}
// Optional: Perform initial renewal on startup (uncomment to enable)
// await renewInstagramAuth();
}
/**
* Stop the authentication renewal scheduler
*/
export async function stopScheduler(): Promise<void> {
if (state.intervalId === null) {
console.log('[Scheduler] Scheduler is not running');
return;
}
console.log('[Scheduler] Stopping authentication scheduler...');
clearInterval(state.intervalId);
state.intervalId = null;
}
/**
* Get scheduler status information
*/
export function getSchedulerStatus() {
return {
running: state.intervalId !== null,
lastRenewalTime: state.lastRenewalTime ? new Date(state.lastRenewalTime).toISOString() : null,
isRenewing: state.isRenewing,
config: getConfig()
};
}