feat: replace Playwright extractor with yt-dlp subprocess
- Add instagram-extractor.ts: yt-dlp subprocess backend for Instagram caption extraction. No in-process browser state, maintained against Instagram frontend churn, supports cookies.txt for auth-walled reels. - Add feature flag EXTRACTOR_BACKEND (ytdlp|playwright) in QueueProcessor so the old Playwright path remains available as fallback. - Add 9 unit tests and 2 live-network integration tests for the new extractor. - Dockerfile: install yt-dlp via pip3 alongside existing Chromium deps. - docker-compose: expose EXTRACTOR_BACKEND env var (default: ytdlp). Also in this commit: - LLM: configurable per-request timeout via LLM_REQUEST_TIMEOUT_MS (default 120s); set maxRetries=0 to surface errors immediately; llama-swap /running health probe. - QueueProcessor: thread progress callback through parser phase. - LlmHealthIndicator: surface llama-swap loaded-model name. - Logging: improve error serialization in queue-processor tests. - .env.example: document llama-swap endpoint and model options. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -35,13 +35,21 @@ vi.mock('$lib/server/queue/config', () => ({
|
||||
}
|
||||
}));
|
||||
|
||||
// Mock external dependencies BEFORE importing QueueProcessor
|
||||
// Mock external dependencies BEFORE importing QueueProcessor.
|
||||
// QueueProcessor.extractionPhase picks between two extractor modules based on
|
||||
// EXTRACTOR_BACKEND; mock both so behavior is identical regardless of default.
|
||||
vi.mock('$lib/server/extraction', () => ({
|
||||
extractTextAndThumbnail: vi.fn().mockResolvedValue({
|
||||
bodyText: 'Default recipe text',
|
||||
thumbnail: null
|
||||
})
|
||||
}));
|
||||
vi.mock('$lib/server/instagram-extractor', () => ({
|
||||
extractTextAndThumbnail: vi.fn().mockResolvedValue({
|
||||
bodyText: 'Default recipe text',
|
||||
thumbnail: null
|
||||
})
|
||||
}));
|
||||
|
||||
vi.mock('$lib/server/parser', () => ({
|
||||
extractRecipe: vi.fn().mockResolvedValue({
|
||||
@@ -62,11 +70,16 @@ vi.mock('$lib/server/tandoor', () => ({
|
||||
})
|
||||
}));
|
||||
|
||||
import { extractTextAndThumbnail } from '$lib/server/extraction';
|
||||
import { extractTextAndThumbnail as extractFromExtraction } from '$lib/server/extraction';
|
||||
import { extractTextAndThumbnail as extractFromYtDlp } from '$lib/server/instagram-extractor';
|
||||
import { extractRecipe } from '$lib/server/parser';
|
||||
import { uploadRecipeWithIngredientsDTO, uploadRecipeImage } from '$lib/server/tandoor';
|
||||
import * as configModule from '$lib/server/queue/config';
|
||||
|
||||
// Alias used by existing assertions; default backend is ytdlp so the new
|
||||
// instagram-extractor mock is what the processor actually invokes.
|
||||
const extractTextAndThumbnail = extractFromYtDlp;
|
||||
|
||||
// Import processor AFTER mocks - it will auto-start (imported for side effects)
|
||||
import '$lib/server/queue/QueueProcessor';
|
||||
|
||||
@@ -78,8 +91,13 @@ describe('QueueProcessor Integration Tests', () => {
|
||||
// Reset mocks and their implementations
|
||||
vi.resetAllMocks();
|
||||
|
||||
// Set default mock implementations
|
||||
vi.mocked(extractTextAndThumbnail).mockResolvedValue({
|
||||
// Set default mock implementations on BOTH backend modules so the test
|
||||
// behavior is invariant to EXTRACTOR_BACKEND.
|
||||
vi.mocked(extractFromExtraction).mockResolvedValue({
|
||||
bodyText: 'Default recipe text',
|
||||
thumbnail: null
|
||||
});
|
||||
vi.mocked(extractFromYtDlp).mockResolvedValue({
|
||||
bodyText: 'Default recipe text',
|
||||
thumbnail: null
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user