feat: replace Playwright extractor with yt-dlp subprocess

- Add instagram-extractor.ts: yt-dlp subprocess backend for Instagram caption extraction. No in-process browser state, maintained against Instagram frontend churn, supports cookies.txt for auth-walled reels. - Add feature flag EXTRACTOR_BACKEND (ytdlp|playwright) in QueueProcessor so the old Playwright path remains available as fallback. - Add 9 unit tests and 2 live-network integration tests for the new extractor. - Dockerfile: install yt-dlp via pip3 alongside existing Chromium deps. - docker-compose: expose EXTRACTOR_BACKEND env var (default: ytdlp). Also in this commit: - LLM: configurable per-request timeout via LLM_REQUEST_TIMEOUT_MS (default 120s); set maxRetries=0 to surface errors immediately; llama-swap /running health probe. - QueueProcessor: thread progress callback through parser phase. - LlmHealthIndicator: surface llama-swap loaded-model name. - Logging: improve error serialization in queue-processor tests. - .env.example: document llama-swap endpoint and model options. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-12 20:46:31 +02:00
parent 6849a1fb26
commit 5b5bb947ef
14 changed files with 628 additions and 50 deletions
--- a/src/routes/api/llm-health/+server.ts
+++ b/src/routes/api/llm-health/+server.ts
@@ -1,34 +1,48 @@
 import { json } from '@sveltejs/kit';
-import { checkLLMHealth } from '$lib/server/llm';
+import { env } from '$env/dynamic/private';
+import { checkLLMHealth, isModelLoaded } from '$lib/server/llm';

 /**
- * Health check endpoint for LLM service
- * Tests connectivity to LM Studio or OpenAI-compatible endpoint
+ * Health check endpoint for the LLM service (llama-swap on ideapad).
+ *
+ * Three states:
+ *  - ok      → endpoint reachable AND configured model is loaded in VRAM
+ *  - warming → endpoint reachable but configured model not yet loaded
+ *              (next request will trigger a cold load)
+ *  - error   → endpoint unreachable
 */
 export async function GET() {
 	try {
-		const isHealthy = await checkLLMHealth();
+		const reachable = await checkLLMHealth();
+		const configuredModel = env.LLM_MODEL || 'gpt-4o';

-		if (isHealthy) {
-			return json({
-				status: 'healthy',
-				message: 'LLM service is accessible'
-			});
-		} else {
+		if (!reachable) {
 			return json(
 				{
-					status: 'unhealthy',
-					message: 'LLM service is not accessible'
+					status: 'error',
+					message: 'LLM service is not accessible',
+					configuredModel
 				},
 				{ status: 503 }
 			);
 		}
+
+		const warm = await isModelLoaded(configuredModel);
+		return json({
+			status: warm ? 'ok' : 'warming',
+			message: warm
+				? `Model ${configuredModel} loaded and ready`
+				: `Model ${configuredModel} configured; next request will trigger a cold load`,
+			configuredModel,
+			loaded: warm
+		});
 	} catch (error) {
 		const errorMessage = error instanceof Error ? error.message : 'Unknown error';
 		return json(
 			{
 				status: 'error',
-				message: errorMessage
+				message: errorMessage,
+				configuredModel: env.LLM_MODEL || 'gpt-4o'
 			},
 			{ status: 500 }
 		);