feat: replace Playwright extractor with yt-dlp subprocess

- Add instagram-extractor.ts: yt-dlp subprocess backend for Instagram caption extraction. No in-process browser state, maintained against Instagram frontend churn, supports cookies.txt for auth-walled reels. - Add feature flag EXTRACTOR_BACKEND (ytdlp|playwright) in QueueProcessor so the old Playwright path remains available as fallback. - Add 9 unit tests and 2 live-network integration tests for the new extractor. - Dockerfile: install yt-dlp via pip3 alongside existing Chromium deps. - docker-compose: expose EXTRACTOR_BACKEND env var (default: ytdlp). Also in this commit: - LLM: configurable per-request timeout via LLM_REQUEST_TIMEOUT_MS (default 120s); set maxRetries=0 to surface errors immediately; llama-swap /running health probe. - QueueProcessor: thread progress callback through parser phase. - LlmHealthIndicator: surface llama-swap loaded-model name. - Logging: improve error serialization in queue-processor tests. - .env.example: document llama-swap endpoint and model options. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-12 20:46:31 +02:00
parent 6849a1fb26
commit 5b5bb947ef
14 changed files with 628 additions and 50 deletions
--- a/.env.example
+++ b/.env.example
@@ -7,15 +7,23 @@
 # ==============================================================================
 # LLM Configuration (REQUIRED)
 # ==============================================================================
-# OpenAI-compatible API endpoint (OpenAI, LM Studio, Ollama, LiteLLM, etc.)
-OPENAI_BASE_URL=http://localhost:1234/v1
+# OpenAI-compatible API endpoint. Production: llama-swap on ideapad.
+# llama-swap loads models on demand and unloads them after globalTTL (10 min).
+OPENAI_BASE_URL=http://192.168.1.50:8080/v1

-# API key for authentication
-OPENAI_API_KEY=your-api-key-here
+# API key for authentication (llama-swap accepts any non-empty value).
+OPENAI_API_KEY=sk-llama-local

-# Model to use for recipe extraction
-# Examples: gpt-4o, gpt-4o-mini, llama-3.1, mistral, etc.
-LLM_MODEL=google/gemma-3-4b
+# Model to use for recipe extraction. Available on the ideapad llama-swap stack:
+#   gemma4-e4b-q6k       (recommended — 4B, 65k ctx, 31 TPS)
+#   gemma4-e2b-q8_0      (faster — 2B, 65k ctx, 55 TPS)
+#   qwen3.5-4b-q8_0      (fallback — 22 TPS)
+#   phi4-mini-q8_0, granite-3.3-8b-q6k, plus larger MoE variants
+LLM_MODEL=gemma4-e4b-q6k
+
+# Per-request LLM timeout in ms. Must cover llama-swap cold-load (~5–30s for
+# small models) plus generation time. Default 120000.
+LLM_REQUEST_TIMEOUT_MS=120000

 # ==============================================================================
 # Queue Configuration (OPTIONAL)
@@ -55,9 +63,23 @@ VAPID_PUBLIC_KEY=BNextdcB_fQ0BVvyGioM5L8Tf9vKQjs-WnF-rUbnU8MdWIZQYfggIHxBnW21I-l
 VAPID_PRIVATE_KEY=JwxI_KcsBcehYcTOufMcbVWJjCq1QbH5FJmSyQuG680

 # ==============================================================================
-# Authentication Scheduler (OPTIONAL)
+# Instagram Extraction Backend
 # ==============================================================================
-# Enable automatic Instagram authentication renewal
+# Which extractor to use:
+#   ytdlp      (default) — yt-dlp subprocess, stateless, Sablier-safe
+#   playwright           — legacy Playwright stealth scraper, requires
+#                          secrets/auth.json + AUTH_SCHEDULER_* below
+EXTRACTOR_BACKEND=ytdlp
+
+# Optional Netscape-format cookies file for login-walled reels.
+# yt-dlp picks it up automatically if it exists at /app/secrets/cookies.txt
+# (Docker) or ./secrets/cookies.txt (local). No automation; export from a
+# browser when an extraction starts hitting login walls.
+
+# ==============================================================================
+# Authentication Scheduler (LEGACY — only relevant when EXTRACTOR_BACKEND=playwright)
+# ==============================================================================
+# Enable automatic Instagram authentication renewal (Playwright backend only)
 AUTH_SCHEDULER_ENABLED=true

 # Renewal interval in minutes (default: 720 = 12 hours)