- Add instagram-extractor.ts: yt-dlp subprocess backend for Instagram caption extraction. No in-process browser state, maintained against Instagram frontend churn, supports cookies.txt for auth-walled reels. - Add feature flag EXTRACTOR_BACKEND (ytdlp|playwright) in QueueProcessor so the old Playwright path remains available as fallback. - Add 9 unit tests and 2 live-network integration tests for the new extractor. - Dockerfile: install yt-dlp via pip3 alongside existing Chromium deps. - docker-compose: expose EXTRACTOR_BACKEND env var (default: ytdlp). Also in this commit: - LLM: configurable per-request timeout via LLM_REQUEST_TIMEOUT_MS (default 120s); set maxRetries=0 to surface errors immediately; llama-swap /running health probe. - QueueProcessor: thread progress callback through parser phase. - LlmHealthIndicator: surface llama-swap loaded-model name. - Logging: improve error serialization in queue-processor tests. - .env.example: document llama-swap endpoint and model options. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
96 lines
4.1 KiB
Plaintext
96 lines
4.1 KiB
Plaintext
# ==============================================================================
|
||
# InstaRecipe - Environment Configuration
|
||
# ==============================================================================
|
||
# Copy this file to .env and update with your values
|
||
# Some variables have sensible defaults and are optional
|
||
|
||
# ==============================================================================
|
||
# LLM Configuration (REQUIRED)
|
||
# ==============================================================================
|
||
# OpenAI-compatible API endpoint. Production: llama-swap on ideapad.
|
||
# llama-swap loads models on demand and unloads them after globalTTL (10 min).
|
||
OPENAI_BASE_URL=http://192.168.1.50:8080/v1
|
||
|
||
# API key for authentication (llama-swap accepts any non-empty value).
|
||
OPENAI_API_KEY=sk-llama-local
|
||
|
||
# Model to use for recipe extraction. Available on the ideapad llama-swap stack:
|
||
# gemma4-e4b-q6k (recommended — 4B, 65k ctx, 31 TPS)
|
||
# gemma4-e2b-q8_0 (faster — 2B, 65k ctx, 55 TPS)
|
||
# qwen3.5-4b-q8_0 (fallback — 22 TPS)
|
||
# phi4-mini-q8_0, granite-3.3-8b-q6k, plus larger MoE variants
|
||
LLM_MODEL=gemma4-e4b-q6k
|
||
|
||
# Per-request LLM timeout in ms. Must cover llama-swap cold-load (~5–30s for
|
||
# small models) plus generation time. Default 120000.
|
||
LLM_REQUEST_TIMEOUT_MS=120000
|
||
|
||
# ==============================================================================
|
||
# Queue Configuration (OPTIONAL)
|
||
# ==============================================================================
|
||
# Number of recipes to process simultaneously (default: 2)
|
||
QUEUE_CONCURRENCY=2
|
||
|
||
# Maximum retry attempts for failed extractions (default: 3)
|
||
QUEUE_MAX_RETRIES=3
|
||
|
||
# ==============================================================================
|
||
# Tandoor Integration (OPTIONAL)
|
||
# ==============================================================================
|
||
# Enable automatic upload to Tandoor Recipe Manager
|
||
TANDOOR_ENABLED=true
|
||
|
||
# Tandoor server URL (no trailing slash)
|
||
TANDOOR_SERVER_URL=https://cook.gsalucci.cloud/
|
||
|
||
# Tandoor space ID (default: 1)
|
||
TANDOOR_SPACE=1
|
||
|
||
# Tandoor API token (generate in Tandoor settings)
|
||
TANDOOR_TOKEN=tda_f9460962_c8dd_491a_a716_f11b0b3288f0
|
||
|
||
# ==============================================================================
|
||
# Push Notifications (OPTIONAL)
|
||
# ==============================================================================
|
||
# Web Push VAPID keys for browser notifications
|
||
# Generate with: npx web-push generate-vapid-keys
|
||
# Default keys are provided for testing but should be changed in production
|
||
|
||
# VAPID Public Key
|
||
VAPID_PUBLIC_KEY=BNextdcB_fQ0BVvyGioM5L8Tf9vKQjs-WnF-rUbnU8MdWIZQYfggIHxBnW21I-lq_0HykLCdMpYj8d5joavWdxQ
|
||
|
||
# VAPID Private Key
|
||
VAPID_PRIVATE_KEY=JwxI_KcsBcehYcTOufMcbVWJjCq1QbH5FJmSyQuG680
|
||
|
||
# ==============================================================================
|
||
# Instagram Extraction Backend
|
||
# ==============================================================================
|
||
# Which extractor to use:
|
||
# ytdlp (default) — yt-dlp subprocess, stateless, Sablier-safe
|
||
# playwright — legacy Playwright stealth scraper, requires
|
||
# secrets/auth.json + AUTH_SCHEDULER_* below
|
||
EXTRACTOR_BACKEND=ytdlp
|
||
|
||
# Optional Netscape-format cookies file for login-walled reels.
|
||
# yt-dlp picks it up automatically if it exists at /app/secrets/cookies.txt
|
||
# (Docker) or ./secrets/cookies.txt (local). No automation; export from a
|
||
# browser when an extraction starts hitting login walls.
|
||
|
||
# ==============================================================================
|
||
# Authentication Scheduler (LEGACY — only relevant when EXTRACTOR_BACKEND=playwright)
|
||
# ==============================================================================
|
||
# Enable automatic Instagram authentication renewal (Playwright backend only)
|
||
AUTH_SCHEDULER_ENABLED=true
|
||
|
||
# Renewal interval in minutes (default: 720 = 12 hours)
|
||
AUTH_SCHEDULER_INTERVAL_MINUTES=15
|
||
|
||
# ==============================================================================
|
||
# Development Settings
|
||
# ==============================================================================
|
||
# Node.js environment (production or development)
|
||
NODE_ENV=production
|
||
|
||
# Port for the application (default: 3000)
|
||
PORT=3000
|