diff --git a/src/lib/server/postprocess.ts b/src/lib/server/postprocess.ts index 1a01641..0dad3ac 100644 --- a/src/lib/server/postprocess.ts +++ b/src/lib/server/postprocess.ts @@ -78,6 +78,10 @@ function isMeaningfulPhrase(words: string[]): boolean { return words.length >= MIN_MEANINGFUL_WORDS && words.join(' ').length >= MIN_MEANINGFUL_CHARS; } +function isShortCarryover(seg: Segment, words: string[]): boolean { + return seg.end - seg.start <= 0.2 || words.length <= 2 || words.join(' ').length <= 16; +} + function trimLeadingWords(text: string, count: number): string { return splitWords(text).slice(count).join(' ').trim(); } @@ -115,7 +119,7 @@ function collapseIncrementalSegments(segments: Segment[]): Segment[] { if ( currentWords.length > lastWords.length && startsWithWords(currentWords, lastWords) && - isMeaningfulPhrase(lastWords) + (isMeaningfulPhrase(lastWords) || isShortCarryover(last, lastWords)) ) { last.text = current.text; last.end = current.end; @@ -123,7 +127,10 @@ function collapseIncrementalSegments(segments: Segment[]): Segment[] { continue; } - if (endsWithWords(lastWords, currentWords) && isMeaningfulPhrase(currentWords)) { + if ( + endsWithWords(lastWords, currentWords) && + (isMeaningfulPhrase(currentWords) || isShortCarryover(current, currentWords)) + ) { last.end = Math.max(last.end, current.end); continue; } diff --git a/src/tests/postprocess.test.ts b/src/tests/postprocess.test.ts index 81b3dc5..cf47ace 100644 --- a/src/tests/postprocess.test.ts +++ b/src/tests/postprocess.test.ts @@ -108,6 +108,24 @@ describe('deduplicateSegments — rolling backend hypotheses', () => { expect(result[0].text).toBe('Hello everyone.'); expect(result[1].text).toBe('Hello everyone. Welcome back.'); }); + + it('collapses tiny one-word carry-over segments from caption-style output', () => { + const input = [ + seg(0, 94.8, 96.4, 'world.'), + seg(1, 96.4, 98.96, 'world. And that aspect that I overlooked was'), + seg(2, 98.96, 100.72, 'inference.'), + seg(3, 100.72, 103.92, 'inference. So, as someone who kind of wants to'), + seg(4, 107.19, 107.2, 'and'), + seg(5, 107.2, 109.56, 'and work to understand the problems and the') + ]; + + const result = deduplicateSegments(input); + + expect(result).toHaveLength(3); + expect(result[0].text).toBe('world. And that aspect that I overlooked was'); + expect(result[1].text).toBe('inference. So, as someone who kind of wants to'); + expect(result[2].text).toBe('and work to understand the problems and the'); + }); }); // ── ngramDedup ────────────────────────────────────────────────────────────────