diff --git a/src/lib/server/queue/QueueProcessor.ts b/src/lib/server/queue/QueueProcessor.ts index 80cf957..f7d8d2b 100644 --- a/src/lib/server/queue/QueueProcessor.ts +++ b/src/lib/server/queue/QueueProcessor.ts @@ -23,52 +23,17 @@ import { env } from '$env/dynamic/private'; import type { ProgressEvent, ExtractedContent, ProgressCallback } from '$lib/server/extraction'; import type { QueueItem } from './types'; -// Returns true when yt-dlp returns a caption that Instagram truncated server-side. -// Truncated captions end with the Unicode ellipsis character (…), optionally -// followed by a period: "…." or just "…". -function isCaptionTruncated(text: string): boolean { - const t = text.trimEnd(); - return t.endsWith('…') || t.endsWith('….') || t.endsWith('...'); -} - // Feature flag: pick which Instagram extractor backend to invoke. -// Default to yt-dlp (fast, no browser); set EXTRACTOR_BACKEND=playwright to -// always use the stealth browser scraper. -// When yt-dlp is the primary backend, a truncated caption (ending with "…") -// automatically triggers a Playwright fallback to get the full text. -const extractTextAndThumbnail = async ( +// Default to yt-dlp; set EXTRACTOR_BACKEND=playwright to fall back to the +// legacy stealth scraper while we verify the new path. +const extractTextAndThumbnail = ( url: string, cb?: ProgressCallback ): Promise => { const backend = (env.EXTRACTOR_BACKEND ?? 'ytdlp').toLowerCase(); - if (backend === 'playwright') { - return extractWithPlaywright(url, cb); - } - - // yt-dlp primary path - const result = await extractWithYtDlp(url, cb); - - if (isCaptionTruncated(result.bodyText)) { - cb?.({ - type: 'status', - message: 'Caption truncated by Instagram — retrying with browser to get full text…', - timestamp: new Date().toISOString() - }); - try { - const full = await extractWithPlaywright(url, cb); - if (full.bodyText.length > result.bodyText.length) { - return full; - } - } catch (e) { - cb?.({ - type: 'status', - message: 'Browser fallback failed — continuing with truncated caption', - timestamp: new Date().toISOString() - }); - } - } - - return result; + return backend === 'playwright' + ? extractWithPlaywright(url, cb) + : extractWithYtDlp(url, cb); }; /**