fix(RECIPE-0006): complete iteration 0 — fix Instagram recipe extraction
This commit is contained in:
47
src/tests/instagram-caption-extraction.e2e.spec.ts
Normal file
47
src/tests/instagram-caption-extraction.e2e.spec.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { extractTextAndThumbnail } from '$lib/server/extraction';
|
||||
|
||||
describe('Instagram Caption Extraction E2E', () => {
|
||||
it('should extract complete recipe without metadata prefix', async () => {
|
||||
const testUrl = 'https://www.instagram.com/reel/DP6oN7JCEo8/?utm_source=ig_web_button_share_sheet';
|
||||
|
||||
const result = await extractTextAndThumbnail(testUrl);
|
||||
|
||||
// Verify extraction succeeded
|
||||
expect(result).toBeDefined();
|
||||
expect(result.bodyText).toBeDefined();
|
||||
expect(result.bodyText.length).toBeGreaterThan(100);
|
||||
|
||||
console.log('[Test] Extracted text length:', result.bodyText.length);
|
||||
console.log('[Test] First 200 chars:', result.bodyText.substring(0, 200));
|
||||
|
||||
// Should NOT contain metadata prefix patterns
|
||||
expect(result.bodyText).not.toMatch(/^\d+K?\s+likes,/);
|
||||
expect(result.bodyText).not.toMatch(/^\d+\s+likes,/);
|
||||
expect(result.bodyText).not.toMatch(/\d+\s+comments/);
|
||||
expect(result.bodyText).not.toMatch(/\w+\s+on\s+\w+\s+\d+/);
|
||||
|
||||
// Should start with recipe title
|
||||
expect(result.bodyText).toMatch(/^La cacio e pepe/i);
|
||||
|
||||
// Should NOT contain hashtags at the end
|
||||
expect(result.bodyText).not.toMatch(/#\w+\s*$/);
|
||||
expect(result.bodyText).not.toContain('#cacioepepe');
|
||||
expect(result.bodyText).not.toContain('#ricettefacili');
|
||||
|
||||
// Should contain ingredients section
|
||||
expect(result.bodyText).toContain('pecorino');
|
||||
expect(result.bodyText).toContain('pepe');
|
||||
|
||||
// Should contain procedure section
|
||||
expect(result.bodyText).toContain('pasta');
|
||||
expect(result.bodyText).toContain('acqua');
|
||||
|
||||
// Should NOT be truncated
|
||||
expect(result.bodyText).not.toContain('...');
|
||||
}, 30000);
|
||||
|
||||
it.skip('should handle invalid Instagram URL gracefully', async () => {
|
||||
// Placeholder for future test
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user