feat: replace Playwright extractor with yt-dlp subprocess

- Add instagram-extractor.ts: yt-dlp subprocess backend for Instagram
  caption extraction. No in-process browser state, maintained against
  Instagram frontend churn, supports cookies.txt for auth-walled reels.
- Add feature flag EXTRACTOR_BACKEND (ytdlp|playwright) in QueueProcessor
  so the old Playwright path remains available as fallback.
- Add 9 unit tests and 2 live-network integration tests for the new extractor.
- Dockerfile: install yt-dlp via pip3 alongside existing Chromium deps.
- docker-compose: expose EXTRACTOR_BACKEND env var (default: ytdlp).

Also in this commit:
- LLM: configurable per-request timeout via LLM_REQUEST_TIMEOUT_MS (default 120s);
  set maxRetries=0 to surface errors immediately; llama-swap /running health probe.
- QueueProcessor: thread progress callback through parser phase.
- LlmHealthIndicator: surface llama-swap loaded-model name.
- Logging: improve error serialization in queue-processor tests.
- .env.example: document llama-swap endpoint and model options.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Giancarmine Salucci
2026-05-12 20:46:31 +02:00
parent 6849a1fb26
commit 5b5bb947ef
14 changed files with 628 additions and 50 deletions

View File

@@ -1,8 +1,9 @@
import { createLLM, checkModelAvailability } from './llm';
import { createLLM, checkModelAvailability, isModelLoaded } from './llm';
import { zodResponseFormat } from 'openai/helpers/zod';
import { z } from 'zod';
import { RECIPE_DETECTION_PROMPT, RECIPE_EXTRACTION_PROMPT } from './prompts/recipe-extraction';
import { logError } from './utils/logger';
import type { ProgressCallback } from './extraction';
const RecipeSchema = z.object({
name: z.string(),
@@ -144,11 +145,33 @@ export async function parseRecipe(text: string): Promise<Recipe> {
}
/**
* Complete workflow: detect recipe and parse if found
* Complete workflow: detect recipe and parse if found.
*
* Emits a `model_loading` progress event (if a callback is supplied) when the
* configured llama-swap model is not yet warm — the first request after idle
* blocks for several seconds while llama-swap loads the model into VRAM.
*
* @param text - The text to analyze
* @param progressCallback - Optional callback for surfacing cold-load state
* @returns Parsed recipe object if detected, null otherwise
*/
export async function extractRecipe(text: string): Promise<Recipe | null> {
export async function extractRecipe(
text: string,
progressCallback?: ProgressCallback
): Promise<Recipe | null> {
if (progressCallback) {
const { model } = createLLM();
const warm = await isModelLoaded(model);
if (!warm) {
progressCallback({
type: 'model_loading',
message: `Inference server cold — loading ${model} into VRAM (530s)...`,
data: { model },
timestamp: new Date().toISOString()
});
}
}
const isRecipe = await detectRecipe(text);
if (!isRecipe) {