feat: replace Playwright extractor with yt-dlp subprocess
- Add instagram-extractor.ts: yt-dlp subprocess backend for Instagram caption extraction. No in-process browser state, maintained against Instagram frontend churn, supports cookies.txt for auth-walled reels. - Add feature flag EXTRACTOR_BACKEND (ytdlp|playwright) in QueueProcessor so the old Playwright path remains available as fallback. - Add 9 unit tests and 2 live-network integration tests for the new extractor. - Dockerfile: install yt-dlp via pip3 alongside existing Chromium deps. - docker-compose: expose EXTRACTOR_BACKEND env var (default: ytdlp). Also in this commit: - LLM: configurable per-request timeout via LLM_REQUEST_TIMEOUT_MS (default 120s); set maxRetries=0 to surface errors immediately; llama-swap /running health probe. - QueueProcessor: thread progress callback through parser phase. - LlmHealthIndicator: surface llama-swap loaded-model name. - Logging: improve error serialization in queue-processor tests. - .env.example: document llama-swap endpoint and model options. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
40
.env.example
40
.env.example
@@ -7,15 +7,23 @@
|
|||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# LLM Configuration (REQUIRED)
|
# LLM Configuration (REQUIRED)
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# OpenAI-compatible API endpoint (OpenAI, LM Studio, Ollama, LiteLLM, etc.)
|
# OpenAI-compatible API endpoint. Production: llama-swap on ideapad.
|
||||||
OPENAI_BASE_URL=http://localhost:1234/v1
|
# llama-swap loads models on demand and unloads them after globalTTL (10 min).
|
||||||
|
OPENAI_BASE_URL=http://192.168.1.50:8080/v1
|
||||||
|
|
||||||
# API key for authentication
|
# API key for authentication (llama-swap accepts any non-empty value).
|
||||||
OPENAI_API_KEY=your-api-key-here
|
OPENAI_API_KEY=sk-llama-local
|
||||||
|
|
||||||
# Model to use for recipe extraction
|
# Model to use for recipe extraction. Available on the ideapad llama-swap stack:
|
||||||
# Examples: gpt-4o, gpt-4o-mini, llama-3.1, mistral, etc.
|
# gemma4-e4b-q6k (recommended — 4B, 65k ctx, 31 TPS)
|
||||||
LLM_MODEL=google/gemma-3-4b
|
# gemma4-e2b-q8_0 (faster — 2B, 65k ctx, 55 TPS)
|
||||||
|
# qwen3.5-4b-q8_0 (fallback — 22 TPS)
|
||||||
|
# phi4-mini-q8_0, granite-3.3-8b-q6k, plus larger MoE variants
|
||||||
|
LLM_MODEL=gemma4-e4b-q6k
|
||||||
|
|
||||||
|
# Per-request LLM timeout in ms. Must cover llama-swap cold-load (~5–30s for
|
||||||
|
# small models) plus generation time. Default 120000.
|
||||||
|
LLM_REQUEST_TIMEOUT_MS=120000
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Queue Configuration (OPTIONAL)
|
# Queue Configuration (OPTIONAL)
|
||||||
@@ -55,9 +63,23 @@ VAPID_PUBLIC_KEY=BNextdcB_fQ0BVvyGioM5L8Tf9vKQjs-WnF-rUbnU8MdWIZQYfggIHxBnW21I-l
|
|||||||
VAPID_PRIVATE_KEY=JwxI_KcsBcehYcTOufMcbVWJjCq1QbH5FJmSyQuG680
|
VAPID_PRIVATE_KEY=JwxI_KcsBcehYcTOufMcbVWJjCq1QbH5FJmSyQuG680
|
||||||
|
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Authentication Scheduler (OPTIONAL)
|
# Instagram Extraction Backend
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
# Enable automatic Instagram authentication renewal
|
# Which extractor to use:
|
||||||
|
# ytdlp (default) — yt-dlp subprocess, stateless, Sablier-safe
|
||||||
|
# playwright — legacy Playwright stealth scraper, requires
|
||||||
|
# secrets/auth.json + AUTH_SCHEDULER_* below
|
||||||
|
EXTRACTOR_BACKEND=ytdlp
|
||||||
|
|
||||||
|
# Optional Netscape-format cookies file for login-walled reels.
|
||||||
|
# yt-dlp picks it up automatically if it exists at /app/secrets/cookies.txt
|
||||||
|
# (Docker) or ./secrets/cookies.txt (local). No automation; export from a
|
||||||
|
# browser when an extraction starts hitting login walls.
|
||||||
|
|
||||||
|
# ==============================================================================
|
||||||
|
# Authentication Scheduler (LEGACY — only relevant when EXTRACTOR_BACKEND=playwright)
|
||||||
|
# ==============================================================================
|
||||||
|
# Enable automatic Instagram authentication renewal (Playwright backend only)
|
||||||
AUTH_SCHEDULER_ENABLED=true
|
AUTH_SCHEDULER_ENABLED=true
|
||||||
|
|
||||||
# Renewal interval in minutes (default: 720 = 12 hours)
|
# Renewal interval in minutes (default: 720 = 12 hours)
|
||||||
|
|||||||
@@ -1,12 +1,15 @@
|
|||||||
FROM node:24-alpine
|
FROM node:24-alpine
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install Playwright system dependencies
|
# Install yt-dlp (primary Instagram extractor) and Playwright system dependencies (fallback)
|
||||||
RUN apk add --no-cache \
|
RUN apk add --no-cache \
|
||||||
|
python3 \
|
||||||
|
py3-pip \
|
||||||
chromium \
|
chromium \
|
||||||
font-liberation \
|
font-liberation \
|
||||||
font-noto \
|
font-noto \
|
||||||
font-noto-cjk
|
font-noto-cjk && \
|
||||||
|
pip3 install --break-system-packages yt-dlp
|
||||||
|
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
RUN npm ci
|
RUN npm ci
|
||||||
|
|||||||
@@ -32,6 +32,9 @@ services:
|
|||||||
# Playwright Configuration
|
# Playwright Configuration
|
||||||
- DISPLAY=:99
|
- DISPLAY=:99
|
||||||
|
|
||||||
|
# Extractor backend: 'ytdlp' (default) or 'playwright' (legacy fallback)
|
||||||
|
- EXTRACTOR_BACKEND=${EXTRACTOR_BACKEND:-ytdlp}
|
||||||
|
|
||||||
# Node.js Environment
|
# Node.js Environment
|
||||||
- NODE_ENV=production
|
- NODE_ENV=production
|
||||||
security_opt:
|
security_opt:
|
||||||
|
|||||||
@@ -26,7 +26,14 @@ type CaptionCandidate = {
|
|||||||
brCount: number;
|
brCount: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type ProgressEventType = 'status' | 'method' | 'retry' | 'error' | 'thumbnail' | 'complete';
|
export type ProgressEventType =
|
||||||
|
| 'status'
|
||||||
|
| 'method'
|
||||||
|
| 'retry'
|
||||||
|
| 'error'
|
||||||
|
| 'thumbnail'
|
||||||
|
| 'complete'
|
||||||
|
| 'model_loading';
|
||||||
|
|
||||||
export interface ProgressEvent {
|
export interface ProgressEvent {
|
||||||
type: ProgressEventType;
|
type: ProgressEventType;
|
||||||
|
|||||||
193
src/lib/server/instagram-extractor.ts
Normal file
193
src/lib/server/instagram-extractor.ts
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
/**
|
||||||
|
* Instagram extractor — yt-dlp subprocess implementation.
|
||||||
|
*
|
||||||
|
* Replaces the Playwright-based scraper. yt-dlp is maintained against
|
||||||
|
* Instagram's frontend churn, has no in-process state, and works on public
|
||||||
|
* reels without authentication. Login-walled reels can be supported by
|
||||||
|
* dropping a Netscape-format cookies file at the path under SECRETS_DIR.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { execFile } from 'node:child_process';
|
||||||
|
import { promisify } from 'node:util';
|
||||||
|
import { existsSync } from 'node:fs';
|
||||||
|
import { logError } from './utils/logger';
|
||||||
|
import type { ExtractedContent, ProgressCallback } from './extraction';
|
||||||
|
|
||||||
|
const execFileAsync = promisify(execFile);
|
||||||
|
|
||||||
|
const YTDLP_TIMEOUT_MS = 60_000;
|
||||||
|
const IMAGE_FETCH_TIMEOUT_MS = 10_000;
|
||||||
|
const USER_AGENT =
|
||||||
|
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1';
|
||||||
|
|
||||||
|
const COOKIE_PATHS = ['/app/secrets/cookies.txt', './secrets/cookies.txt'];
|
||||||
|
|
||||||
|
function resolveCookiePath(): string | null {
|
||||||
|
for (const p of COOKIE_PATHS) {
|
||||||
|
if (existsSync(p)) return p;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface YtDlpJson {
|
||||||
|
description?: string | null;
|
||||||
|
title?: string | null;
|
||||||
|
thumbnail?: string | null;
|
||||||
|
thumbnails?: Array<{ url?: string }>;
|
||||||
|
}
|
||||||
|
|
||||||
|
function pickThumbnailUrl(data: YtDlpJson): string | null {
|
||||||
|
if (data.thumbnail) return data.thumbnail;
|
||||||
|
const first = (data.thumbnails ?? []).find((t) => t?.url);
|
||||||
|
return first?.url ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function fetchImageAsBase64(imageUrl: string): Promise<string | null> {
|
||||||
|
try {
|
||||||
|
const response = await fetch(imageUrl, {
|
||||||
|
signal: AbortSignal.timeout(IMAGE_FETCH_TIMEOUT_MS)
|
||||||
|
});
|
||||||
|
if (response.status !== 200) return null;
|
||||||
|
const contentType = response.headers.get('content-type') ?? '';
|
||||||
|
if (!contentType.startsWith('image/')) return null;
|
||||||
|
const buf = Buffer.from(await response.arrayBuffer());
|
||||||
|
return `data:${contentType};base64,${buf.toString('base64')}`;
|
||||||
|
} catch (e) {
|
||||||
|
logError('[ytdlp] Thumbnail fetch failed', e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function classifyYtDlpError(stderr: string): { recoverable: boolean; reason: string } {
|
||||||
|
const lower = stderr.toLowerCase();
|
||||||
|
if (
|
||||||
|
lower.includes('login required') ||
|
||||||
|
lower.includes('login_required') ||
|
||||||
|
lower.includes('private') ||
|
||||||
|
lower.includes('rate-limit') ||
|
||||||
|
lower.includes('rate limit')
|
||||||
|
) {
|
||||||
|
return {
|
||||||
|
recoverable: false,
|
||||||
|
reason:
|
||||||
|
'Instagram requires authentication for this reel. Drop a Netscape cookies.txt at secrets/cookies.txt and retry.'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (lower.includes('unsupported url')) {
|
||||||
|
return { recoverable: false, reason: 'URL not recognised by yt-dlp.' };
|
||||||
|
}
|
||||||
|
if (lower.includes('http error 404') || lower.includes('does not exist')) {
|
||||||
|
return { recoverable: false, reason: 'Reel not found (404).' };
|
||||||
|
}
|
||||||
|
return { recoverable: true, reason: stderr.split('\n').filter(Boolean).slice(-2).join(' ') };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract caption text + thumbnail data-URL from an Instagram reel.
|
||||||
|
*
|
||||||
|
* Mirrors the signature of the legacy Playwright extractor so QueueProcessor
|
||||||
|
* needs no contract change. ProgressCallback events use existing types
|
||||||
|
* (`status`, `method`, `error`) so the SSE consumers do not need updates.
|
||||||
|
*/
|
||||||
|
export async function extractTextAndThumbnail(
|
||||||
|
url: string,
|
||||||
|
progressCallback?: ProgressCallback
|
||||||
|
): Promise<ExtractedContent> {
|
||||||
|
progressCallback?.({
|
||||||
|
type: 'status',
|
||||||
|
message: 'Invoking yt-dlp...',
|
||||||
|
timestamp: new Date().toISOString()
|
||||||
|
});
|
||||||
|
|
||||||
|
const cookies = resolveCookiePath();
|
||||||
|
if (cookies) {
|
||||||
|
progressCallback?.({
|
||||||
|
type: 'status',
|
||||||
|
message: `Using cookies from ${cookies}`,
|
||||||
|
timestamp: new Date().toISOString()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const args = [
|
||||||
|
'--dump-single-json',
|
||||||
|
'--skip-download',
|
||||||
|
'--no-warnings',
|
||||||
|
'--no-call-home',
|
||||||
|
'--socket-timeout',
|
||||||
|
'20',
|
||||||
|
'--user-agent',
|
||||||
|
USER_AGENT,
|
||||||
|
...(cookies ? ['--cookies', cookies] : []),
|
||||||
|
url
|
||||||
|
];
|
||||||
|
|
||||||
|
let stdout: string;
|
||||||
|
try {
|
||||||
|
const result = await execFileAsync('yt-dlp', args, {
|
||||||
|
timeout: YTDLP_TIMEOUT_MS,
|
||||||
|
maxBuffer: 10 * 1024 * 1024
|
||||||
|
});
|
||||||
|
stdout = result.stdout;
|
||||||
|
} catch (e: any) {
|
||||||
|
const stderr = String(e?.stderr ?? e?.message ?? '');
|
||||||
|
const code = e?.code;
|
||||||
|
if (code === 'ENOENT') {
|
||||||
|
throw new Error(
|
||||||
|
'yt-dlp is not installed in this container. Add it to the Dockerfile.'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const { recoverable, reason } = classifyYtDlpError(stderr);
|
||||||
|
progressCallback?.({
|
||||||
|
type: 'error',
|
||||||
|
message: `yt-dlp failed: ${reason}`,
|
||||||
|
timestamp: new Date().toISOString()
|
||||||
|
});
|
||||||
|
const err = new Error(`yt-dlp extraction failed: ${reason}`);
|
||||||
|
// QueueProcessor.isRecoverableError() classifies on message; surface keywords.
|
||||||
|
if (!recoverable) (err as any).nonRecoverable = true;
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
|
||||||
|
let data: YtDlpJson;
|
||||||
|
try {
|
||||||
|
data = JSON.parse(stdout);
|
||||||
|
} catch (e) {
|
||||||
|
logError('[ytdlp] Failed to parse yt-dlp JSON output', e);
|
||||||
|
throw new Error('yt-dlp returned invalid JSON');
|
||||||
|
}
|
||||||
|
|
||||||
|
const bodyText = (data.description ?? data.title ?? '').trim();
|
||||||
|
if (!bodyText) {
|
||||||
|
throw new Error('yt-dlp returned no description for this reel');
|
||||||
|
}
|
||||||
|
|
||||||
|
progressCallback?.({
|
||||||
|
type: 'status',
|
||||||
|
message: `Caption extracted (${bodyText.length} chars)`,
|
||||||
|
timestamp: new Date().toISOString()
|
||||||
|
});
|
||||||
|
|
||||||
|
let thumbnail: string | null = null;
|
||||||
|
const thumbUrl = pickThumbnailUrl(data);
|
||||||
|
if (thumbUrl) {
|
||||||
|
progressCallback?.({
|
||||||
|
type: 'thumbnail',
|
||||||
|
message: 'Fetching thumbnail...',
|
||||||
|
timestamp: new Date().toISOString()
|
||||||
|
});
|
||||||
|
thumbnail = await fetchImageAsBase64(thumbUrl);
|
||||||
|
progressCallback?.({
|
||||||
|
type: 'status',
|
||||||
|
message: thumbnail ? 'Thumbnail fetched' : 'Thumbnail fetch failed (continuing without)',
|
||||||
|
timestamp: new Date().toISOString()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
progressCallback?.({
|
||||||
|
type: 'complete',
|
||||||
|
message: 'Extraction complete',
|
||||||
|
timestamp: new Date().toISOString()
|
||||||
|
});
|
||||||
|
|
||||||
|
return { bodyText, thumbnail };
|
||||||
|
}
|
||||||
@@ -2,15 +2,24 @@ import OpenAI from 'openai';
|
|||||||
import { env } from '$env/dynamic/private';
|
import { env } from '$env/dynamic/private';
|
||||||
import { logError } from './utils/logger';
|
import { logError } from './utils/logger';
|
||||||
|
|
||||||
|
const DEFAULT_REQUEST_TIMEOUT_MS = 120_000;
|
||||||
|
|
||||||
|
const parseTimeoutMs = (raw: string | undefined): number => {
|
||||||
|
if (!raw) return DEFAULT_REQUEST_TIMEOUT_MS;
|
||||||
|
const n = Number(raw);
|
||||||
|
return Number.isFinite(n) && n > 0 ? n : DEFAULT_REQUEST_TIMEOUT_MS;
|
||||||
|
};
|
||||||
|
|
||||||
export const createLLM = () => {
|
export const createLLM = () => {
|
||||||
// Detect if we are using Ollama or OpenAI based on URL
|
|
||||||
const baseURL = env.OPENAI_BASE_URL;
|
const baseURL = env.OPENAI_BASE_URL;
|
||||||
const apiKey = env.OPENAI_API_KEY;
|
const apiKey = env.OPENAI_API_KEY;
|
||||||
const model = env.LLM_MODEL || 'gpt-4o';
|
const model = env.LLM_MODEL || 'gpt-4o';
|
||||||
|
const timeout = parseTimeoutMs(env.LLM_REQUEST_TIMEOUT_MS);
|
||||||
|
|
||||||
console.log('[LLM] Initializing client...');
|
console.log('[LLM] Initializing client...');
|
||||||
console.log('[LLM] Base URL:', baseURL);
|
console.log('[LLM] Base URL:', baseURL);
|
||||||
console.log('[LLM] Model:', model);
|
console.log('[LLM] Model:', model);
|
||||||
|
console.log('[LLM] Request timeout (ms):', timeout);
|
||||||
|
|
||||||
if (!baseURL) {
|
if (!baseURL) {
|
||||||
throw new Error('OPENAI_BASE_URL environment variable is not set');
|
throw new Error('OPENAI_BASE_URL environment variable is not set');
|
||||||
@@ -22,7 +31,9 @@ export const createLLM = () => {
|
|||||||
|
|
||||||
const client = new OpenAI({
|
const client = new OpenAI({
|
||||||
apiKey,
|
apiKey,
|
||||||
baseURL
|
baseURL,
|
||||||
|
timeout,
|
||||||
|
maxRetries: 0
|
||||||
});
|
});
|
||||||
|
|
||||||
return { client, model };
|
return { client, model };
|
||||||
@@ -43,6 +54,47 @@ export async function checkLLMHealth(): Promise<boolean> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Strip a trailing /v1 (or /v1/) from a base URL to get the llama-swap root.
|
||||||
|
* llama-swap exposes both /v1/* (OpenAI-compatible) and /running, /upstream, etc.
|
||||||
|
* at the bare root.
|
||||||
|
*/
|
||||||
|
function llamaSwapRoot(baseURL: string): string {
|
||||||
|
return baseURL.replace(/\/v1\/?$/, '').replace(/\/$/, '');
|
||||||
|
}
|
||||||
|
|
||||||
|
interface RunningModelEntry {
|
||||||
|
model: string;
|
||||||
|
state?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Query llama-swap's /running endpoint and report whether `model` is currently
|
||||||
|
* loaded and ready to serve. Returns false on any error (treat as cold).
|
||||||
|
*
|
||||||
|
* Why we don't fold this into checkModelAvailability(): /v1/models lists every
|
||||||
|
* model llama-swap is configured to swap to (not just loaded ones), while
|
||||||
|
* /running returns only the in-VRAM instance. Both signals are useful.
|
||||||
|
*/
|
||||||
|
export async function isModelLoaded(model: string): Promise<boolean> {
|
||||||
|
const baseURL = env.OPENAI_BASE_URL;
|
||||||
|
if (!baseURL) return false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const url = `${llamaSwapRoot(baseURL)}/running`;
|
||||||
|
const response = await fetch(url, {
|
||||||
|
signal: AbortSignal.timeout(5_000)
|
||||||
|
});
|
||||||
|
if (!response.ok) return false;
|
||||||
|
const data = (await response.json()) as { running?: RunningModelEntry[] };
|
||||||
|
const running = data.running ?? [];
|
||||||
|
return running.some((m) => m.model === model && (m.state ?? 'ready') === 'ready');
|
||||||
|
} catch (e) {
|
||||||
|
logError('[LLM] isModelLoaded check failed', e);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if a specific model is available in the OpenAI-compatible API
|
* Check if a specific model is available in the OpenAI-compatible API
|
||||||
* @param model - The model ID to check for availability
|
* @param model - The model ID to check for availability
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
import { createLLM, checkModelAvailability } from './llm';
|
import { createLLM, checkModelAvailability, isModelLoaded } from './llm';
|
||||||
import { zodResponseFormat } from 'openai/helpers/zod';
|
import { zodResponseFormat } from 'openai/helpers/zod';
|
||||||
import { z } from 'zod';
|
import { z } from 'zod';
|
||||||
import { RECIPE_DETECTION_PROMPT, RECIPE_EXTRACTION_PROMPT } from './prompts/recipe-extraction';
|
import { RECIPE_DETECTION_PROMPT, RECIPE_EXTRACTION_PROMPT } from './prompts/recipe-extraction';
|
||||||
import { logError } from './utils/logger';
|
import { logError } from './utils/logger';
|
||||||
|
import type { ProgressCallback } from './extraction';
|
||||||
|
|
||||||
const RecipeSchema = z.object({
|
const RecipeSchema = z.object({
|
||||||
name: z.string(),
|
name: z.string(),
|
||||||
@@ -144,11 +145,33 @@ export async function parseRecipe(text: string): Promise<Recipe> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Complete workflow: detect recipe and parse if found
|
* Complete workflow: detect recipe and parse if found.
|
||||||
|
*
|
||||||
|
* Emits a `model_loading` progress event (if a callback is supplied) when the
|
||||||
|
* configured llama-swap model is not yet warm — the first request after idle
|
||||||
|
* blocks for several seconds while llama-swap loads the model into VRAM.
|
||||||
|
*
|
||||||
* @param text - The text to analyze
|
* @param text - The text to analyze
|
||||||
|
* @param progressCallback - Optional callback for surfacing cold-load state
|
||||||
* @returns Parsed recipe object if detected, null otherwise
|
* @returns Parsed recipe object if detected, null otherwise
|
||||||
*/
|
*/
|
||||||
export async function extractRecipe(text: string): Promise<Recipe | null> {
|
export async function extractRecipe(
|
||||||
|
text: string,
|
||||||
|
progressCallback?: ProgressCallback
|
||||||
|
): Promise<Recipe | null> {
|
||||||
|
if (progressCallback) {
|
||||||
|
const { model } = createLLM();
|
||||||
|
const warm = await isModelLoaded(model);
|
||||||
|
if (!warm) {
|
||||||
|
progressCallback({
|
||||||
|
type: 'model_loading',
|
||||||
|
message: `Inference server cold — loading ${model} into VRAM (5–30s)...`,
|
||||||
|
data: { model },
|
||||||
|
timestamp: new Date().toISOString()
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const isRecipe = await detectRecipe(text);
|
const isRecipe = await detectRecipe(text);
|
||||||
|
|
||||||
if (!isRecipe) {
|
if (!isRecipe) {
|
||||||
|
|||||||
@@ -12,15 +12,30 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { queueManager } from './QueueManager';
|
import { queueManager } from './QueueManager';
|
||||||
import { extractTextAndThumbnail } from '$lib/server/extraction';
|
import { extractTextAndThumbnail as extractWithPlaywright } from '$lib/server/extraction';
|
||||||
|
import { extractTextAndThumbnail as extractWithYtDlp } from '$lib/server/instagram-extractor';
|
||||||
import { extractRecipe } from '$lib/server/parser';
|
import { extractRecipe } from '$lib/server/parser';
|
||||||
import { uploadRecipeWithIngredientsDTO, uploadRecipeImage } from '$lib/server/tandoor';
|
import { uploadRecipeWithIngredientsDTO, uploadRecipeImage } from '$lib/server/tandoor';
|
||||||
import { pushNotificationService } from '$lib/server/notifications/PushNotificationService';
|
import { pushNotificationService } from '$lib/server/notifications/PushNotificationService';
|
||||||
import { queueConfig } from './config';
|
import { queueConfig } from './config';
|
||||||
import { logError } from '../utils/logger';
|
import { logError } from '../utils/logger';
|
||||||
import type { ProgressEvent } from '$lib/server/extraction';
|
import { env } from '$env/dynamic/private';
|
||||||
|
import type { ProgressEvent, ExtractedContent, ProgressCallback } from '$lib/server/extraction';
|
||||||
import type { QueueItem } from './types';
|
import type { QueueItem } from './types';
|
||||||
|
|
||||||
|
// Feature flag: pick which Instagram extractor backend to invoke.
|
||||||
|
// Default to yt-dlp; set EXTRACTOR_BACKEND=playwright to fall back to the
|
||||||
|
// legacy stealth scraper while we verify the new path.
|
||||||
|
const extractTextAndThumbnail = (
|
||||||
|
url: string,
|
||||||
|
cb?: ProgressCallback
|
||||||
|
): Promise<ExtractedContent> => {
|
||||||
|
const backend = (env.EXTRACTOR_BACKEND ?? 'ytdlp').toLowerCase();
|
||||||
|
return backend === 'playwright'
|
||||||
|
? extractWithPlaywright(url, cb)
|
||||||
|
: extractWithYtDlp(url, cb);
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Queue processor with configurable concurrency
|
* Queue processor with configurable concurrency
|
||||||
*
|
*
|
||||||
@@ -250,7 +265,9 @@ export class QueueProcessor {
|
|||||||
});
|
});
|
||||||
|
|
||||||
console.log(`[QueueProcessor] Parsing recipe: ${item.id}`);
|
console.log(`[QueueProcessor] Parsing recipe: ${item.id}`);
|
||||||
const recipe = await extractRecipe(item.extractedText);
|
const recipe = await extractRecipe(item.extractedText, (event) => {
|
||||||
|
queueManager.addProgressEvent(item.id, event);
|
||||||
|
});
|
||||||
|
|
||||||
if (!recipe) {
|
if (!recipe) {
|
||||||
throw new Error('Failed to parse recipe from extracted text');
|
throw new Error('Failed to parse recipe from extracted text');
|
||||||
|
|||||||
@@ -1,34 +1,48 @@
|
|||||||
import { json } from '@sveltejs/kit';
|
import { json } from '@sveltejs/kit';
|
||||||
import { checkLLMHealth } from '$lib/server/llm';
|
import { env } from '$env/dynamic/private';
|
||||||
|
import { checkLLMHealth, isModelLoaded } from '$lib/server/llm';
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Health check endpoint for LLM service
|
* Health check endpoint for the LLM service (llama-swap on ideapad).
|
||||||
* Tests connectivity to LM Studio or OpenAI-compatible endpoint
|
*
|
||||||
|
* Three states:
|
||||||
|
* - ok → endpoint reachable AND configured model is loaded in VRAM
|
||||||
|
* - warming → endpoint reachable but configured model not yet loaded
|
||||||
|
* (next request will trigger a cold load)
|
||||||
|
* - error → endpoint unreachable
|
||||||
*/
|
*/
|
||||||
export async function GET() {
|
export async function GET() {
|
||||||
try {
|
try {
|
||||||
const isHealthy = await checkLLMHealth();
|
const reachable = await checkLLMHealth();
|
||||||
|
const configuredModel = env.LLM_MODEL || 'gpt-4o';
|
||||||
|
|
||||||
if (isHealthy) {
|
if (!reachable) {
|
||||||
return json({
|
|
||||||
status: 'healthy',
|
|
||||||
message: 'LLM service is accessible'
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
return json(
|
return json(
|
||||||
{
|
{
|
||||||
status: 'unhealthy',
|
status: 'error',
|
||||||
message: 'LLM service is not accessible'
|
message: 'LLM service is not accessible',
|
||||||
|
configuredModel
|
||||||
},
|
},
|
||||||
{ status: 503 }
|
{ status: 503 }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const warm = await isModelLoaded(configuredModel);
|
||||||
|
return json({
|
||||||
|
status: warm ? 'ok' : 'warming',
|
||||||
|
message: warm
|
||||||
|
? `Model ${configuredModel} loaded and ready`
|
||||||
|
: `Model ${configuredModel} configured; next request will trigger a cold load`,
|
||||||
|
configuredModel,
|
||||||
|
loaded: warm
|
||||||
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
||||||
return json(
|
return json(
|
||||||
{
|
{
|
||||||
status: 'error',
|
status: 'error',
|
||||||
message: errorMessage
|
message: errorMessage,
|
||||||
|
configuredModel: env.LLM_MODEL || 'gpt-4o'
|
||||||
},
|
},
|
||||||
{ status: 500 }
|
{ status: 500 }
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
<script lang="ts">
|
<script lang="ts">
|
||||||
import { onMount } from 'svelte';
|
import { onMount } from 'svelte';
|
||||||
|
|
||||||
|
type HealthStatus = 'checking' | 'ok' | 'warming' | 'error';
|
||||||
|
|
||||||
interface HealthState {
|
interface HealthState {
|
||||||
status: 'checking' | 'healthy' | 'unhealthy' | 'error';
|
status: HealthStatus;
|
||||||
message: string;
|
message: string;
|
||||||
|
configuredModel: string;
|
||||||
lastChecked: Date | null;
|
lastChecked: Date | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -14,6 +17,7 @@
|
|||||||
let health = $state<HealthState>({
|
let health = $state<HealthState>({
|
||||||
status: 'checking',
|
status: 'checking',
|
||||||
message: '',
|
message: '',
|
||||||
|
configuredModel: '',
|
||||||
lastChecked: null
|
lastChecked: null
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -21,24 +25,26 @@
|
|||||||
try {
|
try {
|
||||||
const res = await fetch('/api/llm-health');
|
const res = await fetch('/api/llm-health');
|
||||||
const data = await res.json();
|
const data = await res.json();
|
||||||
|
const status: HealthStatus =
|
||||||
|
data.status === 'ok' ? 'ok' : data.status === 'warming' ? 'warming' : 'error';
|
||||||
health = {
|
health = {
|
||||||
status: data.status === 'healthy' ? 'healthy' : 'unhealthy',
|
status,
|
||||||
message: data.message,
|
message: data.message ?? '',
|
||||||
|
configuredModel: data.configuredModel ?? '',
|
||||||
lastChecked: new Date()
|
lastChecked: new Date()
|
||||||
};
|
};
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
health = {
|
health = {
|
||||||
status: 'error',
|
status: 'error',
|
||||||
message: e instanceof Error ? e.message : 'Network error',
|
message: e instanceof Error ? e.message : 'Network error',
|
||||||
|
configuredModel: '',
|
||||||
lastChecked: new Date()
|
lastChecked: new Date()
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use onMount instead of $effect for timer-based side effects
|
|
||||||
// onMount only runs in browser, no SSR guard needed
|
|
||||||
onMount(() => {
|
onMount(() => {
|
||||||
checkHealth(); // Initial check
|
checkHealth();
|
||||||
const interval = setInterval(checkHealth, pollInterval);
|
const interval = setInterval(checkHealth, pollInterval);
|
||||||
return () => clearInterval(interval);
|
return () => clearInterval(interval);
|
||||||
});
|
});
|
||||||
@@ -48,12 +54,12 @@
|
|||||||
<div class="flex items-center gap-1">
|
<div class="flex items-center gap-1">
|
||||||
{#if health.status === 'checking'}
|
{#if health.status === 'checking'}
|
||||||
🟡 <span>Checking LLM...</span>
|
🟡 <span>Checking LLM...</span>
|
||||||
{:else if health.status === 'healthy'}
|
{:else if health.status === 'ok'}
|
||||||
🟢 <span class="text-green-600">LLM Ready</span>
|
🟢 <span class="text-green-600">LLM Ready</span>
|
||||||
{:else if health.status === 'unhealthy'}
|
{:else if health.status === 'warming'}
|
||||||
🔴 <span class="text-red-600">LLM Unavailable</span>
|
🟡 <span class="text-yellow-600">LLM Cold ({health.configuredModel})</span>
|
||||||
{:else}
|
{:else}
|
||||||
🔴 <span class="text-red-600">LLM Error</span>
|
🔴 <span class="text-red-600">LLM Unavailable</span>
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
<div class="text-xs text-gray-500" title={health.message}>
|
<div class="text-xs text-gray-500" title={health.message}>
|
||||||
|
|||||||
49
src/tests/instagram-extractor.integration.spec.ts
Normal file
49
src/tests/instagram-extractor.integration.spec.ts
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
/**
|
||||||
|
* E2E integration test for the yt-dlp Instagram extractor.
|
||||||
|
*
|
||||||
|
* Makes real network calls (yt-dlp + Instagram CDN). Requires:
|
||||||
|
* - yt-dlp installed on PATH
|
||||||
|
* - Network access to instagram.com
|
||||||
|
* - EXTRACTOR_E2E=1 env var (safety guard to avoid running in normal test runs)
|
||||||
|
*
|
||||||
|
* Run with:
|
||||||
|
* EXTRACTOR_E2E=1 npm test -- src/tests/instagram-extractor.e2e.spec.ts
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { extractTextAndThumbnail } from '$lib/server/instagram-extractor';
|
||||||
|
|
||||||
|
const E2E = !!process.env.EXTRACTOR_E2E;
|
||||||
|
|
||||||
|
describe.skipIf(!E2E)('instagram-extractor E2E (requires yt-dlp + network)', () => {
|
||||||
|
// Public reels that have previously been in the app queue
|
||||||
|
const TEST_REELS = [
|
||||||
|
{
|
||||||
|
url: 'https://www.instagram.com/reel/DX4XEDZt3qT/',
|
||||||
|
expectKeyword: 'pizza'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
url: 'https://www.instagram.com/reel/DUtHm2EiD26/',
|
||||||
|
expectKeyword: 'noodles'
|
||||||
|
}
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const { url, expectKeyword } of TEST_REELS) {
|
||||||
|
it(`extracts caption from ${url}`, async () => {
|
||||||
|
const events: { type: string; message: string }[] = [];
|
||||||
|
const result = await extractTextAndThumbnail(url, (e) =>
|
||||||
|
events.push(e as { type: string; message: string })
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.bodyText.length).toBeGreaterThan(20);
|
||||||
|
expect(result.bodyText.toLowerCase()).toContain(expectKeyword);
|
||||||
|
|
||||||
|
if (result.thumbnail !== null) {
|
||||||
|
expect(result.thumbnail).toMatch(/^data:image\//);
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(events.some((e) => e.type === 'complete')).toBe(true);
|
||||||
|
expect(events.some((e) => e.type === 'status' && e.message.includes('yt-dlp'))).toBe(true);
|
||||||
|
}, 90_000);
|
||||||
|
}
|
||||||
|
});
|
||||||
171
src/tests/instagram-extractor.spec.ts
Normal file
171
src/tests/instagram-extractor.spec.ts
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||||
|
|
||||||
|
// Mock node:child_process before importing the SUT. The SUT uses
|
||||||
|
// promisify(execFile); without the Node-internal special handling, promisify
|
||||||
|
// would only forward the first callback arg. We sidestep that by returning a
|
||||||
|
// pre-promisified function tagged with util.promisify.custom that resolves
|
||||||
|
// to {stdout, stderr}.
|
||||||
|
import * as util from 'node:util';
|
||||||
|
const execFileMock = vi.fn();
|
||||||
|
vi.mock('node:child_process', () => {
|
||||||
|
const execFile: any = () => {
|
||||||
|
throw new Error('callback form not used in tests');
|
||||||
|
};
|
||||||
|
execFile[util.promisify.custom] = (cmd: string, args: string[], opts: any) =>
|
||||||
|
execFileMock(cmd, args, opts);
|
||||||
|
return { execFile };
|
||||||
|
});
|
||||||
|
|
||||||
|
const existsSyncMock = vi.fn();
|
||||||
|
vi.mock('node:fs', () => ({
|
||||||
|
existsSync: (p: string) => existsSyncMock(p)
|
||||||
|
}));
|
||||||
|
|
||||||
|
import { extractTextAndThumbnail } from '../lib/server/instagram-extractor';
|
||||||
|
|
||||||
|
describe('instagram-extractor (yt-dlp backend)', () => {
|
||||||
|
const originalFetch = globalThis.fetch;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
execFileMock.mockReset();
|
||||||
|
existsSyncMock.mockReset();
|
||||||
|
existsSyncMock.mockReturnValue(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
globalThis.fetch = originalFetch;
|
||||||
|
});
|
||||||
|
|
||||||
|
it('parses yt-dlp JSON and returns bodyText + thumbnail data URI', async () => {
|
||||||
|
execFileMock.mockResolvedValue({
|
||||||
|
stdout: JSON.stringify({
|
||||||
|
description: 'Pasta carbonara: 200g spaghetti, 100g pancetta, 2 eggs.',
|
||||||
|
thumbnail: 'https://example.com/thumb.jpg'
|
||||||
|
}),
|
||||||
|
stderr: ''
|
||||||
|
});
|
||||||
|
|
||||||
|
globalThis.fetch = vi.fn().mockResolvedValue({
|
||||||
|
status: 200,
|
||||||
|
headers: { get: () => 'image/jpeg' },
|
||||||
|
arrayBuffer: () => Promise.resolve(new Uint8Array([1, 2, 3]).buffer)
|
||||||
|
}) as unknown as typeof fetch;
|
||||||
|
|
||||||
|
const result = await extractTextAndThumbnail('https://www.instagram.com/reel/abc123/');
|
||||||
|
|
||||||
|
expect(result.bodyText).toContain('carbonara');
|
||||||
|
expect(result.thumbnail).toMatch(/^data:image\/jpeg;base64,/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back to first thumbnails entry when top-level thumbnail is absent', async () => {
|
||||||
|
execFileMock.mockResolvedValue({
|
||||||
|
stdout: JSON.stringify({
|
||||||
|
description: 'Recipe text',
|
||||||
|
thumbnails: [{ url: 'https://example.com/alt-thumb.jpg' }]
|
||||||
|
}),
|
||||||
|
stderr: ''
|
||||||
|
});
|
||||||
|
|
||||||
|
globalThis.fetch = vi.fn().mockResolvedValue({
|
||||||
|
status: 200,
|
||||||
|
headers: { get: () => 'image/png' },
|
||||||
|
arrayBuffer: () => Promise.resolve(new Uint8Array([4, 5, 6]).buffer)
|
||||||
|
}) as unknown as typeof fetch;
|
||||||
|
|
||||||
|
const result = await extractTextAndThumbnail('https://www.instagram.com/reel/abc/');
|
||||||
|
expect(result.thumbnail).toMatch(/^data:image\/png;base64,/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns null thumbnail when fetch fails', async () => {
|
||||||
|
execFileMock.mockResolvedValue({
|
||||||
|
stdout: JSON.stringify({
|
||||||
|
description: 'Recipe text',
|
||||||
|
thumbnail: 'https://example.com/missing.jpg'
|
||||||
|
}),
|
||||||
|
stderr: ''
|
||||||
|
});
|
||||||
|
globalThis.fetch = vi.fn().mockResolvedValue({
|
||||||
|
status: 404,
|
||||||
|
headers: { get: () => 'text/html' },
|
||||||
|
arrayBuffer: () => Promise.resolve(new ArrayBuffer(0))
|
||||||
|
}) as unknown as typeof fetch;
|
||||||
|
|
||||||
|
const result = await extractTextAndThumbnail('https://www.instagram.com/reel/abc/');
|
||||||
|
expect(result.bodyText).toBe('Recipe text');
|
||||||
|
expect(result.thumbnail).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('passes --cookies flag when secrets/cookies.txt exists', async () => {
|
||||||
|
existsSyncMock.mockImplementation((p: string) => p.endsWith('cookies.txt'));
|
||||||
|
execFileMock.mockResolvedValue({
|
||||||
|
stdout: JSON.stringify({ description: 'x', thumbnail: null }),
|
||||||
|
stderr: ''
|
||||||
|
});
|
||||||
|
|
||||||
|
await extractTextAndThumbnail('https://www.instagram.com/reel/abc/');
|
||||||
|
|
||||||
|
const [, args] = execFileMock.mock.calls[0];
|
||||||
|
expect(args).toContain('--cookies');
|
||||||
|
const idx = (args as string[]).indexOf('--cookies');
|
||||||
|
expect((args as string[])[idx + 1]).toMatch(/cookies\.txt$/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('omits --cookies flag when no cookie file is present', async () => {
|
||||||
|
existsSyncMock.mockReturnValue(false);
|
||||||
|
execFileMock.mockResolvedValue({
|
||||||
|
stdout: JSON.stringify({ description: 'x', thumbnail: null }),
|
||||||
|
stderr: ''
|
||||||
|
});
|
||||||
|
|
||||||
|
await extractTextAndThumbnail('https://www.instagram.com/reel/abc/');
|
||||||
|
|
||||||
|
const [, args] = execFileMock.mock.calls[0];
|
||||||
|
expect(args).not.toContain('--cookies');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws non-recoverable error on "Login required" stderr', async () => {
|
||||||
|
const err: any = new Error('yt-dlp failed');
|
||||||
|
err.stderr = 'ERROR: [Instagram] xyz: Login required to access this post.';
|
||||||
|
execFileMock.mockRejectedValue(err);
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
extractTextAndThumbnail('https://www.instagram.com/reel/private/')
|
||||||
|
).rejects.toThrow(/authentication/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws clear error when yt-dlp binary is missing (ENOENT)', async () => {
|
||||||
|
const err: any = new Error('not found');
|
||||||
|
err.code = 'ENOENT';
|
||||||
|
execFileMock.mockRejectedValue(err);
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
extractTextAndThumbnail('https://www.instagram.com/reel/abc/')
|
||||||
|
).rejects.toThrow(/yt-dlp is not installed/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws when description is empty', async () => {
|
||||||
|
execFileMock.mockResolvedValue({
|
||||||
|
stdout: JSON.stringify({ description: '', thumbnail: null }),
|
||||||
|
stderr: ''
|
||||||
|
});
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
extractTextAndThumbnail('https://www.instagram.com/reel/empty/')
|
||||||
|
).rejects.toThrow(/no description/);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('emits progress events through the callback', async () => {
|
||||||
|
execFileMock.mockResolvedValue({
|
||||||
|
stdout: JSON.stringify({ description: 'x', thumbnail: null }),
|
||||||
|
stderr: ''
|
||||||
|
});
|
||||||
|
|
||||||
|
const events: any[] = [];
|
||||||
|
await extractTextAndThumbnail('https://www.instagram.com/reel/abc/', (e) =>
|
||||||
|
events.push(e)
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(events.some((e) => e.type === 'status' && e.message.includes('yt-dlp'))).toBe(true);
|
||||||
|
expect(events.some((e) => e.type === 'complete')).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -18,7 +18,7 @@ vi.mock('$lib/server/tandoor', () => ({
|
|||||||
}));
|
}));
|
||||||
|
|
||||||
import { queueManager } from '$lib/server/queue/QueueManager';
|
import { queueManager } from '$lib/server/queue/QueueManager';
|
||||||
import * as extraction from '$lib/server/extraction';
|
import * as instagramExtractor from '$lib/server/instagram-extractor';
|
||||||
import { queueProcessor } from '$lib/server/queue/QueueProcessor';
|
import { queueProcessor } from '$lib/server/queue/QueueProcessor';
|
||||||
|
|
||||||
describe('QueueProcessor logging', () => {
|
describe('QueueProcessor logging', () => {
|
||||||
@@ -50,8 +50,8 @@ describe('QueueProcessor logging', () => {
|
|||||||
(complexError as any).code = 'ERR_TEST';
|
(complexError as any).code = 'ERR_TEST';
|
||||||
(complexError as any).details = { phase: 'extraction', retries: 3 };
|
(complexError as any).details = { phase: 'extraction', retries: 3 };
|
||||||
|
|
||||||
// Mock extraction to fail BEFORE starting processor
|
// Mock extraction to fail BEFORE starting processor (default backend = ytdlp)
|
||||||
const extractSpy = vi.spyOn(extraction, 'extractTextAndThumbnail');
|
const extractSpy = vi.spyOn(instagramExtractor, 'extractTextAndThumbnail');
|
||||||
extractSpy.mockRejectedValueOnce(complexError);
|
extractSpy.mockRejectedValueOnce(complexError);
|
||||||
|
|
||||||
const item = queueManager.enqueue('https://instagram.com/p/TEST');
|
const item = queueManager.enqueue('https://instagram.com/p/TEST');
|
||||||
|
|||||||
@@ -35,13 +35,21 @@ vi.mock('$lib/server/queue/config', () => ({
|
|||||||
}
|
}
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Mock external dependencies BEFORE importing QueueProcessor
|
// Mock external dependencies BEFORE importing QueueProcessor.
|
||||||
|
// QueueProcessor.extractionPhase picks between two extractor modules based on
|
||||||
|
// EXTRACTOR_BACKEND; mock both so behavior is identical regardless of default.
|
||||||
vi.mock('$lib/server/extraction', () => ({
|
vi.mock('$lib/server/extraction', () => ({
|
||||||
extractTextAndThumbnail: vi.fn().mockResolvedValue({
|
extractTextAndThumbnail: vi.fn().mockResolvedValue({
|
||||||
bodyText: 'Default recipe text',
|
bodyText: 'Default recipe text',
|
||||||
thumbnail: null
|
thumbnail: null
|
||||||
})
|
})
|
||||||
}));
|
}));
|
||||||
|
vi.mock('$lib/server/instagram-extractor', () => ({
|
||||||
|
extractTextAndThumbnail: vi.fn().mockResolvedValue({
|
||||||
|
bodyText: 'Default recipe text',
|
||||||
|
thumbnail: null
|
||||||
|
})
|
||||||
|
}));
|
||||||
|
|
||||||
vi.mock('$lib/server/parser', () => ({
|
vi.mock('$lib/server/parser', () => ({
|
||||||
extractRecipe: vi.fn().mockResolvedValue({
|
extractRecipe: vi.fn().mockResolvedValue({
|
||||||
@@ -62,11 +70,16 @@ vi.mock('$lib/server/tandoor', () => ({
|
|||||||
})
|
})
|
||||||
}));
|
}));
|
||||||
|
|
||||||
import { extractTextAndThumbnail } from '$lib/server/extraction';
|
import { extractTextAndThumbnail as extractFromExtraction } from '$lib/server/extraction';
|
||||||
|
import { extractTextAndThumbnail as extractFromYtDlp } from '$lib/server/instagram-extractor';
|
||||||
import { extractRecipe } from '$lib/server/parser';
|
import { extractRecipe } from '$lib/server/parser';
|
||||||
import { uploadRecipeWithIngredientsDTO, uploadRecipeImage } from '$lib/server/tandoor';
|
import { uploadRecipeWithIngredientsDTO, uploadRecipeImage } from '$lib/server/tandoor';
|
||||||
import * as configModule from '$lib/server/queue/config';
|
import * as configModule from '$lib/server/queue/config';
|
||||||
|
|
||||||
|
// Alias used by existing assertions; default backend is ytdlp so the new
|
||||||
|
// instagram-extractor mock is what the processor actually invokes.
|
||||||
|
const extractTextAndThumbnail = extractFromYtDlp;
|
||||||
|
|
||||||
// Import processor AFTER mocks - it will auto-start (imported for side effects)
|
// Import processor AFTER mocks - it will auto-start (imported for side effects)
|
||||||
import '$lib/server/queue/QueueProcessor';
|
import '$lib/server/queue/QueueProcessor';
|
||||||
|
|
||||||
@@ -78,8 +91,13 @@ describe('QueueProcessor Integration Tests', () => {
|
|||||||
// Reset mocks and their implementations
|
// Reset mocks and their implementations
|
||||||
vi.resetAllMocks();
|
vi.resetAllMocks();
|
||||||
|
|
||||||
// Set default mock implementations
|
// Set default mock implementations on BOTH backend modules so the test
|
||||||
vi.mocked(extractTextAndThumbnail).mockResolvedValue({
|
// behavior is invariant to EXTRACTOR_BACKEND.
|
||||||
|
vi.mocked(extractFromExtraction).mockResolvedValue({
|
||||||
|
bodyText: 'Default recipe text',
|
||||||
|
thumbnail: null
|
||||||
|
});
|
||||||
|
vi.mocked(extractFromYtDlp).mockResolvedValue({
|
||||||
bodyText: 'Default recipe text',
|
bodyText: 'Default recipe text',
|
||||||
thumbnail: null
|
thumbnail: null
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user