From 73e10730dc41a07b724a162807eb657078c00935 Mon Sep 17 00:00:00 2001 From: Giancarmine Salucci Date: Wed, 13 May 2026 01:52:02 +0200 Subject: [PATCH] =?UTF-8?q?fix(extraction):=20don't=20use=20truncated=20Gr?= =?UTF-8?q?aphQL=20caption=20=E2=80=94=20fall=20through=20to=20DOM?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the GraphQL-intercepted caption ends with '….' (Instagram's truncation marker), skip it and fall through to HTML Section extraction which clicks the '… more' button in the DOM to get the complete, untruncated caption. Previously the 327-char truncated caption for DWWxiymssxE was returned immediately, causing the LLM to say 'no recipe' even though the full description had all ingredients and steps. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/lib/server/extraction.ts | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/lib/server/extraction.ts b/src/lib/server/extraction.ts index 38ce12e..4060f6c 100644 --- a/src/lib/server/extraction.ts +++ b/src/lib/server/extraction.ts @@ -1386,9 +1386,14 @@ export async function extractTextAndThumbnail( }); await page.waitForTimeout(1000); - // If we intercepted a full caption, use it immediately - if (interceptedCaption) { - console.log('[Extractor] Using intercepted caption from network traffic'); + // Use intercepted GraphQL caption only if it is NOT truncated. + // Instagram truncates captions with "…." (U+2026 + "."). If that + // marker is present, fall through to HTML Section which will click + // "… more" in the DOM and get the complete text. + const TRUNCATED = '\u2026.'; + const capturedCaption = interceptedCaption as string | null; + if (capturedCaption && !capturedCaption.trimEnd().endsWith(TRUNCATED)) { + console.log('[Extractor] Using intercepted caption from network traffic (not truncated)'); const thumbnail = await extractThumbnailStealth(page, onProgress); onProgress?.({ type: 'complete', @@ -1396,7 +1401,12 @@ export async function extractTextAndThumbnail( method: 'graphql-intercept', timestamp: new Date().toISOString() }); - return { bodyText: cleanText(interceptedCaption), thumbnail }; + return { bodyText: cleanText(capturedCaption), thumbnail }; + } + if (capturedCaption) { + console.log( + `[Extractor] GraphQL caption truncated (${capturedCaption.length} chars, ends with "….") — falling through to DOM extraction` + ); } const result = await extractWithStrategies(url, page, context, onProgress);