fix(transcript): collapse rolling segment echoes
Normalize incremental backend hypothesis chains before persistence and ignore stale or replayed webhook callbacks so duplicate transcript text does not survive ingest. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -7,17 +7,18 @@ import { describe, it, expect, vi, afterEach } from 'vitest';
|
||||
|
||||
const execFileMock = vi.hoisted(() => {
|
||||
const fn = vi.fn();
|
||||
type ExecFilePromisifyArgs = [string, string[]];
|
||||
type ExecFileCallback = (err: Error | null, stdout: string, stderr: string) => void;
|
||||
type ExecFileMock = (...args: [...ExecFilePromisifyArgs, ExecFileCallback]) => void;
|
||||
const invoke = fn as unknown as ExecFileMock;
|
||||
Object.defineProperty(fn, Symbol.for('nodejs.util.promisify.custom'), {
|
||||
configurable: true,
|
||||
value: (...args: unknown[]) =>
|
||||
value: (...args: ExecFilePromisifyArgs) =>
|
||||
new Promise<{ stdout: string; stderr: string }>((resolve, reject) => {
|
||||
(fn as ReturnType<typeof vi.fn>)(
|
||||
...args,
|
||||
(err: Error | null, stdout: string, stderr: string) => {
|
||||
if (err) reject(err);
|
||||
else resolve({ stdout, stderr });
|
||||
}
|
||||
);
|
||||
invoke(...args, (err: Error | null, stdout: string, stderr: string) => {
|
||||
if (err) reject(err);
|
||||
else resolve({ stdout, stderr });
|
||||
});
|
||||
})
|
||||
});
|
||||
return fn;
|
||||
|
||||
@@ -66,6 +66,50 @@ describe('deduplicateSegments — mergeConsecutive', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ── rolling prefix/suffix chain collapse ───────────────────────────────────────
|
||||
|
||||
describe('deduplicateSegments — rolling backend hypotheses', () => {
|
||||
it('collapses prefix-growth chains from stored backend segments', () => {
|
||||
const input = [
|
||||
seg(0, 15.24, 16.6, 'Hello everyone.'),
|
||||
seg(1, 16.6, 19.47, 'Hello everyone. Um, welcome to this talk.'),
|
||||
seg(2, 19.47, 19.48, 'Um, welcome to this talk.'),
|
||||
seg(3, 19.48, 21.67, "Um, welcome to this talk. I'll be speaking about small model"),
|
||||
seg(4, 21.67, 21.68, "I'll be speaking about small model"),
|
||||
seg(5, 21.68, 24.59, "I'll be speaking about small model inference and a gap that we've")
|
||||
];
|
||||
|
||||
const result = deduplicateSegments(input);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0]).toMatchObject({
|
||||
index: 0,
|
||||
start: 15.24,
|
||||
end: 19.48,
|
||||
text: 'Hello everyone. Um, welcome to this talk.'
|
||||
});
|
||||
expect(result[1]).toMatchObject({
|
||||
index: 1,
|
||||
start: 19.48,
|
||||
end: 24.59,
|
||||
text: "I'll be speaking about small model inference and a gap that we've"
|
||||
});
|
||||
});
|
||||
|
||||
it('does not collapse similar phrases when there is a real timing gap', () => {
|
||||
const input = [
|
||||
seg(0, 0, 1, 'Hello everyone.'),
|
||||
seg(1, 2, 4, 'Hello everyone. Welcome back.')
|
||||
];
|
||||
|
||||
const result = deduplicateSegments(input);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0].text).toBe('Hello everyone.');
|
||||
expect(result[1].text).toBe('Hello everyone. Welcome back.');
|
||||
});
|
||||
});
|
||||
|
||||
// ── ngramDedup ────────────────────────────────────────────────────────────────
|
||||
|
||||
describe('deduplicateSegments — ngramDedup', () => {
|
||||
|
||||
@@ -132,6 +132,43 @@ describe('POST /api/webhook/[jobId] — locally cancelled job', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ── Duplicate / stale callback guards ──────────────────────────────────────────
|
||||
|
||||
describe('POST /api/webhook/[jobId] — duplicate and stale callbacks', () => {
|
||||
it('ignores replayed success callbacks after the transcript is already done', async () => {
|
||||
mockGetJob.mockReturnValue({
|
||||
...makeJob('job-done'),
|
||||
status: 'done',
|
||||
segmentsJson: JSON.stringify([makeSeg(0, 'Already saved.')]),
|
||||
whisperJobId: 'whisper-id'
|
||||
});
|
||||
|
||||
const res = await POST(makeEvent('job-done', makeWhisperJob()) as any);
|
||||
expect(res.status).toBe(200);
|
||||
expect(await res.json()).toEqual({ ok: true, ignored: 'duplicate_webhook' });
|
||||
expect(mockSetJobStatus).not.toHaveBeenCalled();
|
||||
expect(mockUpdateJob).not.toHaveBeenCalled();
|
||||
expect(mockWriteOutputs).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('ignores stale callbacks from an older whisper job after retry', async () => {
|
||||
mockGetJob.mockReturnValue({
|
||||
...makeJob('job-stale'),
|
||||
status: 'transcribing',
|
||||
whisperJobId: 'current-whisper-job'
|
||||
});
|
||||
|
||||
const res = await POST(
|
||||
makeEvent('job-stale', makeWhisperJob({ id: 'old-whisper-job', segments: [makeSeg(0, 'stale')] })) as any
|
||||
);
|
||||
expect(res.status).toBe(200);
|
||||
expect(await res.json()).toEqual({ ok: true, ignored: 'stale_whisper_job' });
|
||||
expect(mockSetJobStatus).not.toHaveBeenCalled();
|
||||
expect(mockUpdateJob).not.toHaveBeenCalled();
|
||||
expect(mockWriteOutputs).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Whisper job failed / cancelled ───────────────────────────────────────────
|
||||
|
||||
describe('POST /api/webhook/[jobId] — whisper failure', () => {
|
||||
|
||||
Reference in New Issue
Block a user