Revert "feat: auto Playwright fallback when yt-dlp caption is truncated"
All checks were successful
Build & Push Docker Image / test-and-build (push) Successful in 1m3s

This reverts commit 8c25bce400.
This commit is contained in:
Giancarmine Salucci
2026-05-12 23:49:34 +02:00
parent 8c25bce400
commit 10c4f78ace

View File

@@ -23,52 +23,17 @@ import { env } from '$env/dynamic/private';
import type { ProgressEvent, ExtractedContent, ProgressCallback } from '$lib/server/extraction';
import type { QueueItem } from './types';
// Returns true when yt-dlp returns a caption that Instagram truncated server-side.
// Truncated captions end with the Unicode ellipsis character (…), optionally
// followed by a period: "…." or just "…".
function isCaptionTruncated(text: string): boolean {
const t = text.trimEnd();
return t.endsWith('…') || t.endsWith('….') || t.endsWith('...');
}
// Feature flag: pick which Instagram extractor backend to invoke.
// Default to yt-dlp (fast, no browser); set EXTRACTOR_BACKEND=playwright to
// always use the stealth browser scraper.
// When yt-dlp is the primary backend, a truncated caption (ending with "…")
// automatically triggers a Playwright fallback to get the full text.
const extractTextAndThumbnail = async (
// Default to yt-dlp; set EXTRACTOR_BACKEND=playwright to fall back to the
// legacy stealth scraper while we verify the new path.
const extractTextAndThumbnail = (
url: string,
cb?: ProgressCallback
): Promise<ExtractedContent> => {
const backend = (env.EXTRACTOR_BACKEND ?? 'ytdlp').toLowerCase();
if (backend === 'playwright') {
return extractWithPlaywright(url, cb);
}
// yt-dlp primary path
const result = await extractWithYtDlp(url, cb);
if (isCaptionTruncated(result.bodyText)) {
cb?.({
type: 'status',
message: 'Caption truncated by Instagram — retrying with browser to get full text…',
timestamp: new Date().toISOString()
});
try {
const full = await extractWithPlaywright(url, cb);
if (full.bodyText.length > result.bodyText.length) {
return full;
}
} catch (e) {
cb?.({
type: 'status',
message: 'Browser fallback failed — continuing with truncated caption',
timestamp: new Date().toISOString()
});
}
}
return result;
return backend === 'playwright'
? extractWithPlaywright(url, cb)
: extractWithYtDlp(url, cb);
};
/**