fix(RECIPE-0006): complete iteration 0 — fix Instagram recipe extraction

This commit is contained in:
Giancarmine Salucci
2026-02-17 10:14:52 +01:00
parent b0b5c3579b
commit b304f5266a
2 changed files with 73 additions and 13 deletions

View File

@@ -0,0 +1,47 @@
import { describe, it, expect } from 'vitest';
import { extractTextAndThumbnail } from '$lib/server/extraction';
describe('Instagram Caption Extraction E2E', () => {
it('should extract complete recipe without metadata prefix', async () => {
const testUrl = 'https://www.instagram.com/reel/DP6oN7JCEo8/?utm_source=ig_web_button_share_sheet';
const result = await extractTextAndThumbnail(testUrl);
// Verify extraction succeeded
expect(result).toBeDefined();
expect(result.bodyText).toBeDefined();
expect(result.bodyText.length).toBeGreaterThan(100);
console.log('[Test] Extracted text length:', result.bodyText.length);
console.log('[Test] First 200 chars:', result.bodyText.substring(0, 200));
// Should NOT contain metadata prefix patterns
expect(result.bodyText).not.toMatch(/^\d+K?\s+likes,/);
expect(result.bodyText).not.toMatch(/^\d+\s+likes,/);
expect(result.bodyText).not.toMatch(/\d+\s+comments/);
expect(result.bodyText).not.toMatch(/\w+\s+on\s+\w+\s+\d+/);
// Should start with recipe title
expect(result.bodyText).toMatch(/^La cacio e pepe/i);
// Should NOT contain hashtags at the end
expect(result.bodyText).not.toMatch(/#\w+\s*$/);
expect(result.bodyText).not.toContain('#cacioepepe');
expect(result.bodyText).not.toContain('#ricettefacili');
// Should contain ingredients section
expect(result.bodyText).toContain('pecorino');
expect(result.bodyText).toContain('pepe');
// Should contain procedure section
expect(result.bodyText).toContain('pasta');
expect(result.bodyText).toContain('acqua');
// Should NOT be truncated
expect(result.bodyText).not.toContain('...');
}, 30000);
it.skip('should handle invalid Instagram URL gracefully', async () => {
// Placeholder for future test
});
});