diff --git a/src/lib/server/extraction.ts b/src/lib/server/extraction.ts index 38ce12e..4060f6c 100644 --- a/src/lib/server/extraction.ts +++ b/src/lib/server/extraction.ts @@ -1386,9 +1386,14 @@ export async function extractTextAndThumbnail( }); await page.waitForTimeout(1000); - // If we intercepted a full caption, use it immediately - if (interceptedCaption) { - console.log('[Extractor] Using intercepted caption from network traffic'); + // Use intercepted GraphQL caption only if it is NOT truncated. + // Instagram truncates captions with "…." (U+2026 + "."). If that + // marker is present, fall through to HTML Section which will click + // "… more" in the DOM and get the complete text. + const TRUNCATED = '\u2026.'; + const capturedCaption = interceptedCaption as string | null; + if (capturedCaption && !capturedCaption.trimEnd().endsWith(TRUNCATED)) { + console.log('[Extractor] Using intercepted caption from network traffic (not truncated)'); const thumbnail = await extractThumbnailStealth(page, onProgress); onProgress?.({ type: 'complete', @@ -1396,7 +1401,12 @@ export async function extractTextAndThumbnail( method: 'graphql-intercept', timestamp: new Date().toISOString() }); - return { bodyText: cleanText(interceptedCaption), thumbnail }; + return { bodyText: cleanText(capturedCaption), thumbnail }; + } + if (capturedCaption) { + console.log( + `[Extractor] GraphQL caption truncated (${capturedCaption.length} chars, ends with "….") — falling through to DOM extraction` + ); } const result = await extractWithStrategies(url, page, context, onProgress);