refactor(transcript): drop Tonemark rewrite
All checks were successful
Build & Push Docker Image / test (push) Successful in 10s
Build & Push Docker Image / build-and-push (push) Successful in 50s

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-12 00:10:32 +02:00
parent df50e74939
commit 929c482497
10 changed files with 161 additions and 540 deletions

View File

@@ -0,0 +1,80 @@
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { rm } from 'fs/promises';
import type { TranscriptResponse } from 'youtube-transcript';
const { mockExecFile, mockFetchTranscript } = vi.hoisted(() => ({
mockExecFile: vi.fn(),
mockFetchTranscript: vi.fn()
}));
const TEST_DATA_DIR = `/tmp/tonemark-downloader-test-${Date.now()}`;
vi.stubEnv('DATA_DIR', TEST_DATA_DIR);
vi.mock('child_process', () => ({
execFile: mockExecFile
}));
vi.mock('youtube-transcript', () => ({
fetchTranscript: mockFetchTranscript
}));
import { downloadYouTube, transcriptEntriesToSegments } from '$lib/server/downloader.js';
beforeEach(() => {
vi.clearAllMocks();
mockExecFile.mockImplementation((...args: unknown[]) => {
const cb = args.at(-1) as (...callbackArgs: unknown[]) => void;
cb(null, JSON.stringify({ title: 'Fetched Title' }), '');
});
});
afterEach(async () => {
await rm(TEST_DATA_DIR, { recursive: true, force: true }).catch(() => {});
});
describe('transcriptEntriesToSegments', () => {
it('converts millisecond transcript offsets into second-based segments', () => {
const entries: TranscriptResponse[] = [
{ text: 'Hello everyone.', offset: 15240, duration: 4240, lang: 'en' },
{ text: 'Um, welcome to this talk.', offset: 16600, duration: 5080, lang: 'en' }
];
expect(transcriptEntriesToSegments(entries)).toEqual([
{ index: 0, start: 15.24, end: 19.48, text: 'Hello everyone.', words: [] },
{ index: 1, start: 16.6, end: 21.68, text: 'Um, welcome to this talk.', words: [] }
]);
});
it('preserves second-based transcript offsets and drops empty text', () => {
const entries: TranscriptResponse[] = [
{ text: ' ', offset: 0, duration: 1.5, lang: 'en' },
{ text: 'Clean caption cue', offset: 91.08, duration: 3.72, lang: 'en' }
];
expect(transcriptEntriesToSegments(entries)).toEqual([
{ index: 0, start: 91.08, end: 94.8, text: 'Clean caption cue', words: [] }
]);
});
});
describe('downloadYouTube', () => {
it('uses fetched transcript entries directly for caption jobs', async () => {
mockFetchTranscript.mockResolvedValue([
{ text: 'Hello everyone.', offset: 15240, duration: 4240, lang: 'en' },
{ text: 'Um, welcome to this talk.', offset: 16600, duration: 5080, lang: 'en' }
] satisfies TranscriptResponse[]);
const result = await downloadYouTube('https://youtube.com/watch?v=qdh_x-uRs9g', 'job-1');
expect(mockFetchTranscript).toHaveBeenCalledWith('https://youtube.com/watch?v=qdh_x-uRs9g', {
lang: 'en'
});
expect(result).toMatchObject({
type: 'captions',
segments: [
{ index: 0, start: 15.24, end: 19.48, text: 'Hello everyone.', words: [] },
{ index: 1, start: 16.6, end: 21.68, text: 'Um, welcome to this talk.', words: [] }
]
});
});
});

View File

@@ -1,204 +0,0 @@
import { describe, it, expect } from 'vitest';
import {
deduplicateSegments
} from '$lib/server/postprocess.js';
import type { Segment } from '$lib/types.js';
// ── helpers ──────────────────────────────────────────────────────────────────
function seg(index: number, start: number, end: number, text: string): Segment {
return { index, start, end, text, words: [] };
}
// ── collapseRepeats (tested indirectly via deduplicateSegments) ───────────────
describe('deduplicateSegments — collapseRepeats', () => {
it('leaves text without repetition unchanged', () => {
const input = [seg(0, 0, 5, ' Hello world, this is a sentence.')];
const [out] = deduplicateSegments(input);
expect(out.text).toBe('Hello world, this is a sentence.');
});
it('collapses a consecutive repeated phrase inside a segment', () => {
const input = [seg(0, 0, 5, ' the quick brown fox the quick brown fox')];
const [out] = deduplicateSegments(input);
expect(out.text).not.toMatch(/the quick brown fox.*the quick brown fox/i);
});
it('handles multiple repetitions recursively', () => {
// "welcome everyone" = 16 chars — qualifies for the ≥10-char collapse regex
const input = [seg(0, 0, 5, ' welcome everyone welcome everyone welcome everyone')];
const result = deduplicateSegments(input);
const text = result[0]?.text ?? '';
expect((text.match(/welcome everyone/gi) ?? []).length).toBeLessThan(3);
});
});
// ── mergeConsecutive ──────────────────────────────────────────────────────────
describe('deduplicateSegments — mergeConsecutive', () => {
it('merges adjacent segments with identical text', () => {
const input = [
seg(0, 0, 2, ' Hello world.'),
seg(1, 2, 4, ' Hello world.')
];
const result = deduplicateSegments(input);
expect(result).toHaveLength(1);
expect(result[0].end).toBe(4);
});
it('keeps adjacent segments with different text', () => {
const input = [
seg(0, 0, 2, ' First sentence.'),
seg(1, 2, 4, ' Second sentence.')
];
const result = deduplicateSegments(input);
expect(result).toHaveLength(2);
});
it('normalises punctuation and case for merge comparison', () => {
const input = [
seg(0, 0, 2, ' Hello, World!'),
seg(1, 2, 4, ' hello world')
];
const result = deduplicateSegments(input);
expect(result).toHaveLength(1);
});
});
// ── rolling prefix/suffix chain collapse ───────────────────────────────────────
describe('deduplicateSegments — rolling backend hypotheses', () => {
it('collapses prefix-growth chains from stored backend segments', () => {
const input = [
seg(0, 15.24, 16.6, 'Hello everyone.'),
seg(1, 16.6, 19.47, 'Hello everyone. Um, welcome to this talk.'),
seg(2, 19.47, 19.48, 'Um, welcome to this talk.'),
seg(3, 19.48, 21.67, "Um, welcome to this talk. I'll be speaking about small model"),
seg(4, 21.67, 21.68, "I'll be speaking about small model"),
seg(5, 21.68, 24.59, "I'll be speaking about small model inference and a gap that we've")
];
const result = deduplicateSegments(input);
expect(result).toHaveLength(2);
expect(result[0]).toMatchObject({
index: 0,
start: 15.24,
end: 19.48,
text: 'Hello everyone. Um, welcome to this talk.'
});
expect(result[1]).toMatchObject({
index: 1,
start: 19.48,
end: 24.59,
text: "I'll be speaking about small model inference and a gap that we've"
});
});
it('does not collapse similar phrases when there is a real timing gap', () => {
const input = [
seg(0, 0, 1, 'Hello everyone.'),
seg(1, 2, 4, 'Hello everyone. Welcome back.')
];
const result = deduplicateSegments(input);
expect(result).toHaveLength(2);
expect(result[0].text).toBe('Hello everyone.');
expect(result[1].text).toBe('Hello everyone. Welcome back.');
});
it('collapses tiny one-word carry-over segments from caption-style output', () => {
const input = [
seg(0, 94.8, 96.4, 'world.'),
seg(1, 96.4, 98.96, 'world. And that aspect that I overlooked was'),
seg(2, 98.96, 100.72, 'inference.'),
seg(3, 100.72, 103.92, 'inference. So, as someone who kind of wants to'),
seg(4, 107.19, 107.2, 'and'),
seg(5, 107.2, 109.56, 'and work to understand the problems and the')
];
const result = deduplicateSegments(input);
expect(result).toHaveLength(3);
expect(result[0].text).toBe('world. And that aspect that I overlooked was');
expect(result[1].text).toBe('inference. So, as someone who kind of wants to');
expect(result[2].text).toBe('and work to understand the problems and the');
});
it('trims single-word suffix-prefix overlap between adjacent segments', () => {
const input = [
seg(0, 94.8, 96.4, 'world.'),
seg(1, 96.4, 98.96, 'world. And that aspect that I overlooked was'),
seg(2, 120.12, 123.71, 'to find more about inference.'),
seg(3, 123.72, 126.92, "inference. So, I've done a lot of work with VLAM,")
];
const result = deduplicateSegments(input);
expect(result).toHaveLength(3);
expect(result[0].text).toBe('world. And that aspect that I overlooked was');
expect(result[2].text).toBe("So, I've done a lot of work with VLAM,");
});
});
// ── ngramDedup ────────────────────────────────────────────────────────────────
describe('deduplicateSegments — ngramDedup', () => {
it('passes through completely unique segments', () => {
const input = [
seg(0, 0, 5, ' The cat sat on the mat quite happily today.'),
seg(1, 5, 10, ' Later the dog ran across the yard chasing a ball.')
];
expect(deduplicateSegments(input)).toHaveLength(2);
});
it('removes a segment that is highly similar to recent context', () => {
// Repeat a long sentence verbatim — should be caught as duplicate
const longText =
' This is a very specific and unique sentence about transcription quality matters greatly.';
const input = [seg(0, 0, 5, longText), seg(1, 5, 10, longText)];
// After mergeConsecutive the second one is already merged, so result is 1
expect(deduplicateSegments(input)).toHaveLength(1);
});
});
// ── deduplicateSegments — full pipeline ──────────────────────────────────────
describe('deduplicateSegments — full pipeline', () => {
it('returns empty array for empty input', () => {
expect(deduplicateSegments([])).toEqual([]);
});
it('removes segments whose text is empty after trimming', () => {
const input = [seg(0, 0, 1, ' '), seg(1, 1, 2, ' Hello.')];
const result = deduplicateSegments(input);
expect(result).toHaveLength(1);
expect(result[0].text).toBe('Hello.');
});
it('re-indexes output segments starting from 0', () => {
const input = [
seg(5, 0, 2, ' First unique sentence here.'),
seg(8, 2, 4, ' Second different sentence there.')
];
const result = deduplicateSegments(input);
result.forEach((s, i) => expect(s.index).toBe(i));
});
it('runs the full pipeline: trim → remove empty → merge → ngram → merge → reindex', () => {
const input = [
seg(0, 0, 2, ' Good morning everyone.'),
seg(1, 2, 3, ' '), // empty — removed
seg(2, 3, 5, ' Good morning everyone.'), // duplicate — merged
seg(3, 5, 7, ' Welcome to our presentation today.')
];
const result = deduplicateSegments(input);
expect(result).toHaveLength(2);
expect(result[0].text).toBe('Good morning everyone.');
expect(result[1].text).toBe('Welcome to our presentation today.');
expect(result[0].index).toBe(0);
expect(result[1].index).toBe(1);
});
});

View File

@@ -7,7 +7,6 @@ const {
mockGetJob,
mockUpdateJob,
mockSetJobStatus,
mockDeduplicateSegments,
mockWriteOutputs,
mockSendNotification,
mockCleanupJobTmp,
@@ -16,7 +15,6 @@ const {
mockGetJob: vi.fn(),
mockUpdateJob: vi.fn(),
mockSetJobStatus: vi.fn(),
mockDeduplicateSegments: vi.fn((segs: Segment[]) => segs),
mockWriteOutputs: vi.fn(),
mockSendNotification: vi.fn(),
mockCleanupJobTmp: vi.fn(),
@@ -29,10 +27,6 @@ vi.mock('$lib/server/db.js', () => ({
setJobStatus: mockSetJobStatus
}));
vi.mock('$lib/server/postprocess.js', () => ({
deduplicateSegments: mockDeduplicateSegments
}));
vi.mock('$lib/server/formatter.js', () => ({
writeOutputs: mockWriteOutputs
}));
@@ -91,7 +85,6 @@ function makeSeg(index: number, text: string): Segment {
beforeEach(() => {
vi.clearAllMocks();
mockDeduplicateSegments.mockImplementation((segs: Segment[]) => segs);
mockWriteOutputs.mockResolvedValue({
srt: '/out/dir/title.srt',
txt: '/out/dir/title.txt',
@@ -218,25 +211,21 @@ describe('POST /api/webhook/[jobId] — whisper failure', () => {
describe('POST /api/webhook/[jobId] — success with segments', () => {
const segments = [makeSeg(0, 'Hello world.'), makeSeg(1, 'This is a test.')];
it('runs deduplication on received segments', async () => {
it('passes received segments through unchanged', async () => {
mockGetJob.mockReturnValue(makeJob('job-3'));
await POST(makeEvent('job-3', makeWhisperJob({ segments })) as any);
expect(mockDeduplicateSegments).toHaveBeenCalledWith(segments);
expect(mockWriteOutputs).toHaveBeenCalledWith(segments, 'Test Video', 'job-3');
});
it('calls writeOutputs with the deduplicated segments and job title', async () => {
it('calls writeOutputs with the received segments and job title', async () => {
mockGetJob.mockReturnValue(makeJob('job-4', 'My Lecture'));
const deduped = [makeSeg(0, 'Hello world.')];
mockDeduplicateSegments.mockReturnValue(deduped);
await POST(makeEvent('job-4', makeWhisperJob({ segments })) as any);
expect(mockWriteOutputs).toHaveBeenCalledWith(deduped, 'My Lecture', 'job-4');
expect(mockWriteOutputs).toHaveBeenCalledWith(segments, 'My Lecture', 'job-4');
});
it('stores serialised segments_json in the database', async () => {
mockGetJob.mockReturnValue(makeJob('job-5'));
const deduped = [makeSeg(0, 'Result text.')];
mockDeduplicateSegments.mockReturnValue(deduped);
await POST(makeEvent('job-5', makeWhisperJob({ segments })) as any);
@@ -244,7 +233,7 @@ describe('POST /api/webhook/[jobId] — success with segments', () => {
expect.objectContaining({
id: 'job-5',
status: 'done',
segmentsJson: JSON.stringify(deduped)
segmentsJson: JSON.stringify(segments)
})
);
});