refactor(transcript): drop Tonemark rewrite
All checks were successful
Build & Push Docker Image / test (push) Successful in 10s
Build & Push Docker Image / build-and-push (push) Successful in 50s

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-12 00:10:32 +02:00
parent df50e74939
commit 929c482497
10 changed files with 161 additions and 540 deletions

View File

@@ -0,0 +1,80 @@
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { rm } from 'fs/promises';
import type { TranscriptResponse } from 'youtube-transcript';
const { mockExecFile, mockFetchTranscript } = vi.hoisted(() => ({
mockExecFile: vi.fn(),
mockFetchTranscript: vi.fn()
}));
const TEST_DATA_DIR = `/tmp/tonemark-downloader-test-${Date.now()}`;
vi.stubEnv('DATA_DIR', TEST_DATA_DIR);
vi.mock('child_process', () => ({
execFile: mockExecFile
}));
vi.mock('youtube-transcript', () => ({
fetchTranscript: mockFetchTranscript
}));
import { downloadYouTube, transcriptEntriesToSegments } from '$lib/server/downloader.js';
beforeEach(() => {
vi.clearAllMocks();
mockExecFile.mockImplementation((...args: unknown[]) => {
const cb = args.at(-1) as (...callbackArgs: unknown[]) => void;
cb(null, JSON.stringify({ title: 'Fetched Title' }), '');
});
});
afterEach(async () => {
await rm(TEST_DATA_DIR, { recursive: true, force: true }).catch(() => {});
});
describe('transcriptEntriesToSegments', () => {
it('converts millisecond transcript offsets into second-based segments', () => {
const entries: TranscriptResponse[] = [
{ text: 'Hello everyone.', offset: 15240, duration: 4240, lang: 'en' },
{ text: 'Um, welcome to this talk.', offset: 16600, duration: 5080, lang: 'en' }
];
expect(transcriptEntriesToSegments(entries)).toEqual([
{ index: 0, start: 15.24, end: 19.48, text: 'Hello everyone.', words: [] },
{ index: 1, start: 16.6, end: 21.68, text: 'Um, welcome to this talk.', words: [] }
]);
});
it('preserves second-based transcript offsets and drops empty text', () => {
const entries: TranscriptResponse[] = [
{ text: ' ', offset: 0, duration: 1.5, lang: 'en' },
{ text: 'Clean caption cue', offset: 91.08, duration: 3.72, lang: 'en' }
];
expect(transcriptEntriesToSegments(entries)).toEqual([
{ index: 0, start: 91.08, end: 94.8, text: 'Clean caption cue', words: [] }
]);
});
});
describe('downloadYouTube', () => {
it('uses fetched transcript entries directly for caption jobs', async () => {
mockFetchTranscript.mockResolvedValue([
{ text: 'Hello everyone.', offset: 15240, duration: 4240, lang: 'en' },
{ text: 'Um, welcome to this talk.', offset: 16600, duration: 5080, lang: 'en' }
] satisfies TranscriptResponse[]);
const result = await downloadYouTube('https://youtube.com/watch?v=qdh_x-uRs9g', 'job-1');
expect(mockFetchTranscript).toHaveBeenCalledWith('https://youtube.com/watch?v=qdh_x-uRs9g', {
lang: 'en'
});
expect(result).toMatchObject({
type: 'captions',
segments: [
{ index: 0, start: 15.24, end: 19.48, text: 'Hello everyone.', words: [] },
{ index: 1, start: 16.6, end: 21.68, text: 'Um, welcome to this talk.', words: [] }
]
});
});
});