Revert "feat: auto Playwright fallback when yt-dlp caption is truncated"
All checks were successful
Build & Push Docker Image / test-and-build (push) Successful in 1m3s
All checks were successful
Build & Push Docker Image / test-and-build (push) Successful in 1m3s
This reverts commit 8c25bce400.
This commit is contained in:
@@ -23,52 +23,17 @@ import { env } from '$env/dynamic/private';
|
|||||||
import type { ProgressEvent, ExtractedContent, ProgressCallback } from '$lib/server/extraction';
|
import type { ProgressEvent, ExtractedContent, ProgressCallback } from '$lib/server/extraction';
|
||||||
import type { QueueItem } from './types';
|
import type { QueueItem } from './types';
|
||||||
|
|
||||||
// Returns true when yt-dlp returns a caption that Instagram truncated server-side.
|
|
||||||
// Truncated captions end with the Unicode ellipsis character (…), optionally
|
|
||||||
// followed by a period: "…." or just "…".
|
|
||||||
function isCaptionTruncated(text: string): boolean {
|
|
||||||
const t = text.trimEnd();
|
|
||||||
return t.endsWith('…') || t.endsWith('….') || t.endsWith('...');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Feature flag: pick which Instagram extractor backend to invoke.
|
// Feature flag: pick which Instagram extractor backend to invoke.
|
||||||
// Default to yt-dlp (fast, no browser); set EXTRACTOR_BACKEND=playwright to
|
// Default to yt-dlp; set EXTRACTOR_BACKEND=playwright to fall back to the
|
||||||
// always use the stealth browser scraper.
|
// legacy stealth scraper while we verify the new path.
|
||||||
// When yt-dlp is the primary backend, a truncated caption (ending with "…")
|
const extractTextAndThumbnail = (
|
||||||
// automatically triggers a Playwright fallback to get the full text.
|
|
||||||
const extractTextAndThumbnail = async (
|
|
||||||
url: string,
|
url: string,
|
||||||
cb?: ProgressCallback
|
cb?: ProgressCallback
|
||||||
): Promise<ExtractedContent> => {
|
): Promise<ExtractedContent> => {
|
||||||
const backend = (env.EXTRACTOR_BACKEND ?? 'ytdlp').toLowerCase();
|
const backend = (env.EXTRACTOR_BACKEND ?? 'ytdlp').toLowerCase();
|
||||||
if (backend === 'playwright') {
|
return backend === 'playwright'
|
||||||
return extractWithPlaywright(url, cb);
|
? extractWithPlaywright(url, cb)
|
||||||
}
|
: extractWithYtDlp(url, cb);
|
||||||
|
|
||||||
// yt-dlp primary path
|
|
||||||
const result = await extractWithYtDlp(url, cb);
|
|
||||||
|
|
||||||
if (isCaptionTruncated(result.bodyText)) {
|
|
||||||
cb?.({
|
|
||||||
type: 'status',
|
|
||||||
message: 'Caption truncated by Instagram — retrying with browser to get full text…',
|
|
||||||
timestamp: new Date().toISOString()
|
|
||||||
});
|
|
||||||
try {
|
|
||||||
const full = await extractWithPlaywright(url, cb);
|
|
||||||
if (full.bodyText.length > result.bodyText.length) {
|
|
||||||
return full;
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
cb?.({
|
|
||||||
type: 'status',
|
|
||||||
message: 'Browser fallback failed — continuing with truncated caption',
|
|
||||||
timestamp: new Date().toISOString()
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Reference in New Issue
Block a user