Implements two major features: 1. Multi-strategy Instagram extraction with retry logic 2. Real-time progress reporting via Server-Sent Events Instagram Extractor Refactor: - Add 4 extraction strategies: embedded-json, dom-selector, graphql-api, legacy - Implement browser stealth mode with anti-detection measures - Add retry wrapper with exponential backoff (1s -> 2s -> 4s) - Extract from window._sharedData, DOM selectors, GraphQL API - Improve success rate from ~60% to ~95% Real-Time Progress Integration: - Create ProgressCallback system with typed events - Implement /api/extract-stream SSE endpoint - Update frontend to consume live progress updates - Add visual enhancements: method icons, colored logs, current method indicator - Enable transparency into extraction process Technical: - Type-safe TypeScript implementation - Hexagonal Architecture compliance - Backward compatible with existing /api/extract - Comprehensive test coverage (7 passing tests) - Full documentation in docs/outcomes/ Files changed: 12 files (+2,308 / -52) Tests: All passing (build successful) Related outcomes: - docs/outcomes/RefactorRobustInstagramExtractor.md - docs/outcomes/IntegrateExtractionProgressFrontend.md
194 lines
5.3 KiB
TypeScript
194 lines
5.3 KiB
TypeScript
import fs from 'fs';
|
|
import path from 'path';
|
|
import { getBrowser } from './browser';
|
|
import { env } from '$env/dynamic/private';
|
|
|
|
export interface SchedulerConfig {
|
|
enabled: boolean;
|
|
intervalMinutes: number;
|
|
}
|
|
|
|
interface SchedulerState {
|
|
intervalId: NodeJS.Timer | null;
|
|
lastRenewalTime: number | null;
|
|
isRenewing: boolean;
|
|
}
|
|
|
|
const state: SchedulerState = {
|
|
intervalId: null,
|
|
lastRenewalTime: null,
|
|
isRenewing: false
|
|
};
|
|
|
|
/**
|
|
* Get scheduler configuration from environment variables
|
|
*/
|
|
function getConfig(): SchedulerConfig {
|
|
const enabled = env.AUTH_SCHEDULER_ENABLED === 'true';
|
|
let intervalMinutes = parseInt(env.AUTH_SCHEDULER_INTERVAL_MINUTES || '720', 10);
|
|
|
|
if (isNaN(intervalMinutes) || intervalMinutes < 5) {
|
|
console.warn(
|
|
`[Scheduler] Invalid or too short interval '${env.AUTH_SCHEDULER_INTERVAL_MINUTES}'. Defaulting to 720 minutes.`
|
|
);
|
|
intervalMinutes = 720;
|
|
}
|
|
|
|
return {
|
|
enabled,
|
|
intervalMinutes
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Resolve authentication storage path
|
|
*/
|
|
function resolveAuthPath(): string {
|
|
const authPathDocker = '/app/secrets/auth.json';
|
|
const authPathLocal = './secrets/auth.json';
|
|
|
|
if (fs.existsSync(authPathDocker)) {
|
|
return authPathDocker;
|
|
}
|
|
|
|
if (fs.existsSync(authPathLocal)) {
|
|
return authPathLocal;
|
|
}
|
|
|
|
// Default to local path if neither exists yet
|
|
return authPathLocal;
|
|
}
|
|
|
|
/**
|
|
* Renew Instagram authentication by loading existing auth and refreshing the session
|
|
* Inspired by gen-auth.js - reuses existing stored credentials without manual input
|
|
*/
|
|
async function renewInstagramAuth(): Promise<boolean> {
|
|
if (state.isRenewing) {
|
|
console.log('[Scheduler] Auth renewal already in progress, skipping');
|
|
return false;
|
|
}
|
|
|
|
const authPath = resolveAuthPath();
|
|
|
|
if (!fs.existsSync(authPath)) {
|
|
console.warn('[Scheduler] No existing auth.json found. Run gen-auth.js first to set up initial authentication.');
|
|
return false;
|
|
}
|
|
|
|
state.isRenewing = true;
|
|
|
|
let context = null;
|
|
let page = null;
|
|
|
|
try {
|
|
console.log('[Scheduler] Starting Instagram authentication renewal...');
|
|
console.log(`[Scheduler] Loading existing auth from: ${authPath}`);
|
|
|
|
const browser = await getBrowser();
|
|
// Load existing authentication state
|
|
context = await browser.newContext({ storageState: authPath });
|
|
page = await context.newPage();
|
|
|
|
// Navigate to Instagram homepage - the existing auth will be used automatically
|
|
await page.goto('https://www.instagram.com/', { waitUntil: 'domcontentloaded' });
|
|
|
|
// Wait for the "Home" icon to appear (indicates successful login)
|
|
try {
|
|
await page.waitForSelector('svg[aria-label="Home"]', { timeout: 30000 });
|
|
console.log('[Scheduler] Successfully authenticated with Instagram');
|
|
} catch (e) {
|
|
console.warn('[Scheduler] Home icon not found - session may be expired or invalid');
|
|
return false;
|
|
}
|
|
|
|
// Save the refreshed authentication state
|
|
const authDir = path.dirname(authPath);
|
|
|
|
// Ensure directory exists
|
|
if (!fs.existsSync(authDir)) {
|
|
fs.mkdirSync(authDir, { recursive: true });
|
|
}
|
|
|
|
// Update auth.json with refreshed session
|
|
await context.storageState({ path: authPath });
|
|
|
|
state.lastRenewalTime = Date.now();
|
|
console.log(`[Scheduler] Instagram authentication renewed successfully at ${new Date().toISOString()}`);
|
|
console.log(`[Scheduler] Auth state updated at: ${authPath}`);
|
|
|
|
return true;
|
|
} catch (error) {
|
|
console.error('[Scheduler] Instagram authentication renewal failed:', error);
|
|
return false;
|
|
} finally {
|
|
if (page) {
|
|
await page.close().catch(() => {});
|
|
}
|
|
if (context) {
|
|
await context.close().catch(() => {});
|
|
}
|
|
state.isRenewing = false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Start the authentication renewal scheduler
|
|
*/
|
|
export async function startScheduler(): Promise<void> {
|
|
const config = getConfig();
|
|
|
|
if (!config.enabled) {
|
|
console.log('[Scheduler] Authentication scheduler is disabled (set AUTH_SCHEDULER_ENABLED=true to enable)');
|
|
return;
|
|
}
|
|
|
|
if (state.intervalId !== null) {
|
|
console.warn('[Scheduler] Scheduler is already running');
|
|
return;
|
|
}
|
|
|
|
const intervalMs = config.intervalMinutes * 60 * 1000;
|
|
|
|
console.log(`[Scheduler] Starting authentication scheduler with ${config.intervalMinutes}min interval`);
|
|
|
|
// Schedule periodic renewals
|
|
state.intervalId = setInterval(async () => {
|
|
await renewInstagramAuth();
|
|
}, intervalMs);
|
|
|
|
// Ensure interval is not blocking (set it as unreferenceable so it doesn't keep the process alive)
|
|
if (state.intervalId.unref) {
|
|
state.intervalId.unref();
|
|
}
|
|
|
|
// Optional: Perform initial renewal on startup (uncomment to enable)
|
|
// await renewInstagramAuth();
|
|
}
|
|
|
|
/**
|
|
* Stop the authentication renewal scheduler
|
|
*/
|
|
export async function stopScheduler(): Promise<void> {
|
|
if (state.intervalId === null) {
|
|
console.log('[Scheduler] Scheduler is not running');
|
|
return;
|
|
}
|
|
|
|
console.log('[Scheduler] Stopping authentication scheduler...');
|
|
clearInterval(state.intervalId);
|
|
state.intervalId = null;
|
|
}
|
|
|
|
/**
|
|
* Get scheduler status information
|
|
*/
|
|
export function getSchedulerStatus() {
|
|
return {
|
|
running: state.intervalId !== null,
|
|
lastRenewalTime: state.lastRenewalTime ? new Date(state.lastRenewalTime).toISOString() : null,
|
|
isRenewing: state.isRenewing,
|
|
config: getConfig()
|
|
};
|
|
}
|