diff --git a/src/lib/server/extraction.ts b/src/lib/server/extraction.ts index 4060f6c..fb3cc54 100644 --- a/src/lib/server/extraction.ts +++ b/src/lib/server/extraction.ts @@ -1386,32 +1386,29 @@ export async function extractTextAndThumbnail( }); await page.waitForTimeout(1000); - // Use intercepted GraphQL caption only if it is NOT truncated. - // Instagram truncates captions with "…." (U+2026 + "."). If that - // marker is present, fall through to HTML Section which will click - // "… more" in the DOM and get the complete text. - const TRUNCATED = '\u2026.'; + // Always use DOM extraction (HTML Section) — it clicks "… more" in + // the browser and gets the fully expanded caption. The GraphQL + // interception is unreliable: Instagram often truncates captions + // in API responses without any "…." marker, so we cannot trust + // the intercepted text to be complete. const capturedCaption = interceptedCaption as string | null; - if (capturedCaption && !capturedCaption.trimEnd().endsWith(TRUNCATED)) { - console.log('[Extractor] Using intercepted caption from network traffic (not truncated)'); - const thumbnail = await extractThumbnailStealth(page, onProgress); - onProgress?.({ - type: 'complete', - message: 'Extraction completed via GraphQL interception', - method: 'graphql-intercept', - timestamp: new Date().toISOString() - }); - return { bodyText: cleanText(capturedCaption), thumbnail }; - } if (capturedCaption) { console.log( - `[Extractor] GraphQL caption truncated (${capturedCaption.length} chars, ends with "….") — falling through to DOM extraction` + `[Extractor] Intercepted GraphQL caption (${capturedCaption.length} chars) — always using DOM extraction for full text` ); } const result = await extractWithStrategies(url, page, context, onProgress); if (!result.success || !result.data) { + // DOM extraction failed — fall back to intercepted caption if available + if (capturedCaption) { + console.log( + '[Extractor] DOM extraction failed — using intercepted GraphQL caption as fallback' + ); + const thumbnail = await extractThumbnailStealth(page, onProgress); + return { bodyText: cleanText(capturedCaption), thumbnail }; + } throw new Error(result.error || 'Extraction failed'); }