fix(extraction): resolve progressCallback undefined errors
- Add progressCallback parameter to extractFromEmbeddedJSON and extractFromDOM - Pass onProgress callback from extractWithStrategies to all strategies - Fix legacy strategy to use correct callback variable name - Verify extractViaGraphQL correctly returns null thumbnail This fixes ReferenceError that was preventing all extraction methods from working. All extraction strategies now properly emit thumbnail progress events via SSE. Closes: FixProgressCallbackUndefinedErrors
This commit is contained in:
@@ -204,7 +204,10 @@ function cleanText(text: string): string {
|
||||
/**
|
||||
* Strategy 1: Extract from embedded JSON data in script tags
|
||||
*/
|
||||
async function extractFromEmbeddedJSON(page: Page): Promise<ExtractedContent | null> {
|
||||
async function extractFromEmbeddedJSON(
|
||||
page: Page,
|
||||
progressCallback?: ProgressCallback
|
||||
): Promise<ExtractedContent | null> {
|
||||
try {
|
||||
// Extract all script tag contents
|
||||
const scriptContents = await page.evaluate(() => {
|
||||
@@ -313,7 +316,10 @@ function extractFromAlternativeStructure(items: any): Omit<ExtractedContent, 'th
|
||||
/**
|
||||
* Strategy 2: Extract from DOM using specific selectors
|
||||
*/
|
||||
async function extractFromDOM(page: Page): Promise<ExtractedContent | null> {
|
||||
async function extractFromDOM(
|
||||
page: Page,
|
||||
progressCallback?: ProgressCallback
|
||||
): Promise<ExtractedContent | null> {
|
||||
try {
|
||||
// Strategy: Direct caption selector
|
||||
const captionText = await page.evaluate(() => {
|
||||
@@ -442,11 +448,11 @@ async function extractWithStrategies(
|
||||
}> = [
|
||||
{
|
||||
name: 'embedded-json',
|
||||
fn: () => extractFromEmbeddedJSON(page)
|
||||
fn: () => extractFromEmbeddedJSON(page, onProgress)
|
||||
},
|
||||
{
|
||||
name: 'dom-selector',
|
||||
fn: () => extractFromDOM(page)
|
||||
fn: () => extractFromDOM(page, onProgress)
|
||||
},
|
||||
{
|
||||
name: 'graphql-api',
|
||||
@@ -456,7 +462,7 @@ async function extractWithStrategies(
|
||||
name: 'legacy',
|
||||
fn: async () => {
|
||||
const text = await extractCleanTextLegacy(page);
|
||||
const thumbnail = await extractThumbnailStealth(page, progressCallback);
|
||||
const thumbnail = await extractThumbnailStealth(page, onProgress);
|
||||
return { bodyText: text, thumbnail };
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user