feat: replace Playwright extractor with yt-dlp subprocess

- Add instagram-extractor.ts: yt-dlp subprocess backend for Instagram caption extraction. No in-process browser state, maintained against Instagram frontend churn, supports cookies.txt for auth-walled reels. - Add feature flag EXTRACTOR_BACKEND (ytdlp|playwright) in QueueProcessor so the old Playwright path remains available as fallback. - Add 9 unit tests and 2 live-network integration tests for the new extractor. - Dockerfile: install yt-dlp via pip3 alongside existing Chromium deps. - docker-compose: expose EXTRACTOR_BACKEND env var (default: ytdlp). Also in this commit: - LLM: configurable per-request timeout via LLM_REQUEST_TIMEOUT_MS (default 120s); set maxRetries=0 to surface errors immediately; llama-swap /running health probe. - QueueProcessor: thread progress callback through parser phase. - LlmHealthIndicator: surface llama-swap loaded-model name. - Logging: improve error serialization in queue-processor tests. - .env.example: document llama-swap endpoint and model options. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-12 20:46:31 +02:00
parent 6849a1fb26
commit 5b5bb947ef
14 changed files with 628 additions and 50 deletions
--- a/src/lib/server/parser.ts
+++ b/src/lib/server/parser.ts
@@ -1,8 +1,9 @@
-import { createLLM, checkModelAvailability } from './llm';
+import { createLLM, checkModelAvailability, isModelLoaded } from './llm';
 import { zodResponseFormat } from 'openai/helpers/zod';
 import { z } from 'zod';
 import { RECIPE_DETECTION_PROMPT, RECIPE_EXTRACTION_PROMPT } from './prompts/recipe-extraction';
 import { logError } from './utils/logger';
+import type { ProgressCallback } from './extraction';

 const RecipeSchema = z.object({
 	name: z.string(),
@@ -144,11 +145,33 @@ export async function parseRecipe(text: string): Promise<Recipe> {
 }

 /**
- * Complete workflow: detect recipe and parse if found
+ * Complete workflow: detect recipe and parse if found.
+ *
+ * Emits a `model_loading` progress event (if a callback is supplied) when the
+ * configured llama-swap model is not yet warm — the first request after idle
+ * blocks for several seconds while llama-swap loads the model into VRAM.
+ *
 * @param text - The text to analyze
+ * @param progressCallback - Optional callback for surfacing cold-load state
 * @returns Parsed recipe object if detected, null otherwise
 */
-export async function extractRecipe(text: string): Promise<Recipe | null> {
+export async function extractRecipe(
+	text: string,
+	progressCallback?: ProgressCallback
+): Promise<Recipe | null> {
+	if (progressCallback) {
+		const { model } = createLLM();
+		const warm = await isModelLoaded(model);
+		if (!warm) {
+			progressCallback({
+				type: 'model_loading',
+				message: `Inference server cold — loading ${model} into VRAM (5–30s)...`,
+				data: { model },
+				timestamp: new Date().toISOString()
+			});
+		}
+	}
+
 	const isRecipe = await detectRecipe(text);

 	if (!isRecipe) {