Revert "feat: auto Playwright fallback when yt-dlp caption is truncated"
All checks were successful
Build & Push Docker Image / test-and-build (push) Successful in 1m3s

This reverts commit 8c25bce400.
This commit is contained in:
Giancarmine Salucci
2026-05-12 23:49:34 +02:00
parent 8c25bce400
commit 10c4f78ace

View File

@@ -23,52 +23,17 @@ import { env } from '$env/dynamic/private';
import type { ProgressEvent, ExtractedContent, ProgressCallback } from '$lib/server/extraction'; import type { ProgressEvent, ExtractedContent, ProgressCallback } from '$lib/server/extraction';
import type { QueueItem } from './types'; import type { QueueItem } from './types';
// Returns true when yt-dlp returns a caption that Instagram truncated server-side.
// Truncated captions end with the Unicode ellipsis character (…), optionally
// followed by a period: "…." or just "…".
function isCaptionTruncated(text: string): boolean {
const t = text.trimEnd();
return t.endsWith('…') || t.endsWith('….') || t.endsWith('...');
}
// Feature flag: pick which Instagram extractor backend to invoke. // Feature flag: pick which Instagram extractor backend to invoke.
// Default to yt-dlp (fast, no browser); set EXTRACTOR_BACKEND=playwright to // Default to yt-dlp; set EXTRACTOR_BACKEND=playwright to fall back to the
// always use the stealth browser scraper. // legacy stealth scraper while we verify the new path.
// When yt-dlp is the primary backend, a truncated caption (ending with "…") const extractTextAndThumbnail = (
// automatically triggers a Playwright fallback to get the full text.
const extractTextAndThumbnail = async (
url: string, url: string,
cb?: ProgressCallback cb?: ProgressCallback
): Promise<ExtractedContent> => { ): Promise<ExtractedContent> => {
const backend = (env.EXTRACTOR_BACKEND ?? 'ytdlp').toLowerCase(); const backend = (env.EXTRACTOR_BACKEND ?? 'ytdlp').toLowerCase();
if (backend === 'playwright') { return backend === 'playwright'
return extractWithPlaywright(url, cb); ? extractWithPlaywright(url, cb)
} : extractWithYtDlp(url, cb);
// yt-dlp primary path
const result = await extractWithYtDlp(url, cb);
if (isCaptionTruncated(result.bodyText)) {
cb?.({
type: 'status',
message: 'Caption truncated by Instagram — retrying with browser to get full text…',
timestamp: new Date().toISOString()
});
try {
const full = await extractWithPlaywright(url, cb);
if (full.bodyText.length > result.bodyText.length) {
return full;
}
} catch (e) {
cb?.({
type: 'status',
message: 'Browser fallback failed — continuing with truncated caption',
timestamp: new Date().toISOString()
});
}
}
return result;
}; };
/** /**