refactor(transcript): drop Tonemark rewrite

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-12 00:10:32 +02:00
parent df50e74939
commit 929c482497
10 changed files with 161 additions and 540 deletions
--- a/src/tests/downloader.test.ts
+++ b/src/tests/downloader.test.ts
@@ -0,0 +1,80 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { rm } from 'fs/promises';
+import type { TranscriptResponse } from 'youtube-transcript';
+
+const { mockExecFile, mockFetchTranscript } = vi.hoisted(() => ({
+	mockExecFile: vi.fn(),
+	mockFetchTranscript: vi.fn()
+}));
+
+const TEST_DATA_DIR = `/tmp/tonemark-downloader-test-${Date.now()}`;
+vi.stubEnv('DATA_DIR', TEST_DATA_DIR);
+
+vi.mock('child_process', () => ({
+	execFile: mockExecFile
+}));
+
+vi.mock('youtube-transcript', () => ({
+	fetchTranscript: mockFetchTranscript
+}));
+
+import { downloadYouTube, transcriptEntriesToSegments } from '$lib/server/downloader.js';
+
+beforeEach(() => {
+	vi.clearAllMocks();
+	mockExecFile.mockImplementation((...args: unknown[]) => {
+		const cb = args.at(-1) as (...callbackArgs: unknown[]) => void;
+		cb(null, JSON.stringify({ title: 'Fetched Title' }), '');
+	});
+});
+
+afterEach(async () => {
+	await rm(TEST_DATA_DIR, { recursive: true, force: true }).catch(() => {});
+});
+
+describe('transcriptEntriesToSegments', () => {
+	it('converts millisecond transcript offsets into second-based segments', () => {
+		const entries: TranscriptResponse[] = [
+			{ text: 'Hello everyone.', offset: 15240, duration: 4240, lang: 'en' },
+			{ text: 'Um, welcome to this talk.', offset: 16600, duration: 5080, lang: 'en' }
+		];
+
+		expect(transcriptEntriesToSegments(entries)).toEqual([
+			{ index: 0, start: 15.24, end: 19.48, text: 'Hello everyone.', words: [] },
+			{ index: 1, start: 16.6, end: 21.68, text: 'Um, welcome to this talk.', words: [] }
+		]);
+	});
+
+	it('preserves second-based transcript offsets and drops empty text', () => {
+		const entries: TranscriptResponse[] = [
+			{ text: '  ', offset: 0, duration: 1.5, lang: 'en' },
+			{ text: 'Clean caption cue', offset: 91.08, duration: 3.72, lang: 'en' }
+		];
+
+		expect(transcriptEntriesToSegments(entries)).toEqual([
+			{ index: 0, start: 91.08, end: 94.8, text: 'Clean caption cue', words: [] }
+		]);
+	});
+});
+
+describe('downloadYouTube', () => {
+	it('uses fetched transcript entries directly for caption jobs', async () => {
+		mockFetchTranscript.mockResolvedValue([
+			{ text: 'Hello everyone.', offset: 15240, duration: 4240, lang: 'en' },
+			{ text: 'Um, welcome to this talk.', offset: 16600, duration: 5080, lang: 'en' }
+		] satisfies TranscriptResponse[]);
+
+		const result = await downloadYouTube('https://youtube.com/watch?v=qdh_x-uRs9g', 'job-1');
+
+		expect(mockFetchTranscript).toHaveBeenCalledWith('https://youtube.com/watch?v=qdh_x-uRs9g', {
+			lang: 'en'
+		});
+		expect(result).toMatchObject({
+			type: 'captions',
+			segments: [
+				{ index: 0, start: 15.24, end: 19.48, text: 'Hello everyone.', words: [] },
+				{ index: 1, start: 16.6, end: 21.68, text: 'Um, welcome to this talk.', words: [] }
+			]
+		});
+	});
+});