81 lines
2.7 KiB
TypeScript
81 lines
2.7 KiB
TypeScript
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
|
|
import { rm } from 'fs/promises';
|
|
import type { TranscriptResponse } from 'youtube-transcript';
|
|
|
|
const { mockExecFile, mockFetchTranscript } = vi.hoisted(() => ({
|
|
mockExecFile: vi.fn(),
|
|
mockFetchTranscript: vi.fn()
|
|
}));
|
|
|
|
const TEST_DATA_DIR = `/tmp/tonemark-downloader-test-${Date.now()}`;
|
|
vi.stubEnv('DATA_DIR', TEST_DATA_DIR);
|
|
|
|
vi.mock('child_process', () => ({
|
|
execFile: mockExecFile
|
|
}));
|
|
|
|
vi.mock('youtube-transcript', () => ({
|
|
fetchTranscript: mockFetchTranscript
|
|
}));
|
|
|
|
import { downloadYouTube, transcriptEntriesToSegments } from '$lib/server/downloader.js';
|
|
|
|
beforeEach(() => {
|
|
vi.clearAllMocks();
|
|
mockExecFile.mockImplementation((...args: unknown[]) => {
|
|
const cb = args.at(-1) as (...callbackArgs: unknown[]) => void;
|
|
cb(null, JSON.stringify({ title: 'Fetched Title' }), '');
|
|
});
|
|
});
|
|
|
|
afterEach(async () => {
|
|
await rm(TEST_DATA_DIR, { recursive: true, force: true }).catch(() => {});
|
|
});
|
|
|
|
describe('transcriptEntriesToSegments', () => {
|
|
it('converts millisecond transcript offsets into second-based segments', () => {
|
|
const entries: TranscriptResponse[] = [
|
|
{ text: 'Hello everyone.', offset: 15240, duration: 4240, lang: 'en' },
|
|
{ text: 'Um, welcome to this talk.', offset: 16600, duration: 5080, lang: 'en' }
|
|
];
|
|
|
|
expect(transcriptEntriesToSegments(entries)).toEqual([
|
|
{ index: 0, start: 15.24, end: 19.48, text: 'Hello everyone.', words: [] },
|
|
{ index: 1, start: 16.6, end: 21.68, text: 'Um, welcome to this talk.', words: [] }
|
|
]);
|
|
});
|
|
|
|
it('preserves second-based transcript offsets and drops empty text', () => {
|
|
const entries: TranscriptResponse[] = [
|
|
{ text: ' ', offset: 0, duration: 1.5, lang: 'en' },
|
|
{ text: 'Clean caption cue', offset: 91.08, duration: 3.72, lang: 'en' }
|
|
];
|
|
|
|
expect(transcriptEntriesToSegments(entries)).toEqual([
|
|
{ index: 0, start: 91.08, end: 94.8, text: 'Clean caption cue', words: [] }
|
|
]);
|
|
});
|
|
});
|
|
|
|
describe('downloadYouTube', () => {
|
|
it('uses fetched transcript entries directly for caption jobs', async () => {
|
|
mockFetchTranscript.mockResolvedValue([
|
|
{ text: 'Hello everyone.', offset: 15240, duration: 4240, lang: 'en' },
|
|
{ text: 'Um, welcome to this talk.', offset: 16600, duration: 5080, lang: 'en' }
|
|
] satisfies TranscriptResponse[]);
|
|
|
|
const result = await downloadYouTube('https://youtube.com/watch?v=qdh_x-uRs9g', 'job-1');
|
|
|
|
expect(mockFetchTranscript).toHaveBeenCalledWith('https://youtube.com/watch?v=qdh_x-uRs9g', {
|
|
lang: 'en'
|
|
});
|
|
expect(result).toMatchObject({
|
|
type: 'captions',
|
|
segments: [
|
|
{ index: 0, start: 15.24, end: 19.48, text: 'Hello everyone.', words: [] },
|
|
{ index: 1, start: 16.6, end: 21.68, text: 'Um, welcome to this talk.', words: [] }
|
|
]
|
|
});
|
|
});
|
|
});
|