From ef9c04b070049432e9fde1f776c0bd3ff50986a3 Mon Sep 17 00:00:00 2001
From: mozempk <moze@sal.giize.com>
Date: Wed, 6 May 2026 02:13:20 +0200
Subject: [PATCH] fix: trim trailing silence from each chunk before whisper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Whisper hallucinates filler tokens (Bye., Thank you., etc.) into
end-of-chunk silence. This is especially visible on the final chunk
of long audio where the outro silence triggers a 10× repetition loop.

Fix: after slicing each PCM chunk, scan backwards to find the last
sample above −35 dB, then keep 0.5 s of padding and truncate.
Applied to every chunk, not just the last — any chunk ending in a long
silence period gets the same protection.

Constants match the silencedetect filter already used for chunking:
  THRESHOLD = 0.0178  (−35 dB)
  PADDING   = 8000 samples (0.5 s at 16 kHz)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 src/worker.rs | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)
diff --git a/src/worker.rs b/src/worker.rs
index 3649859..6942cb7 100644
--- a/src/worker.rs
+++ b/src/worker.rs
@@ -326,7 +326,8 @@ async fn process_job(
     for (ci, (chunk_start, chunk_end)) in chunks.iter().enumerate() {
         let s0 = (*chunk_start * 16_000.0) as usize;
         let s1 = ((*chunk_end   * 16_000.0) as usize).min(pcm.len());
-        let chunk_pcm = pcm[s0..s1].to_vec();
+        let mut chunk_pcm = pcm[s0..s1].to_vec();
+        trim_trailing_silence(&mut chunk_pcm);
 
         // Base percent this chunk starts at.
         let base  = (ci * 100 / n) as u8;
@@ -403,6 +404,29 @@ async fn process_job(
     Ok((all_segments, language, total_secs))
 }
 
+/// Trim trailing silence from a 16 kHz mono PCM buffer.
+///
+/// Scans backwards to find the last sample above −35 dB, then keeps
+/// 0.5 s of padding after it. This prevents whisper from hallucinating
+/// filler tokens into end-of-chunk silence.
+fn trim_trailing_silence(pcm: &mut Vec<f32>) {
+    const THRESHOLD: f32 = 0.017_8; // −35 dB  (10^(−35/20))
+    const PADDING:   usize = 8_000;  // 0.5 s at 16 kHz
+
+    if let Some(last_loud) = pcm.iter().rposition(|&s| s.abs() > THRESHOLD) {
+        let new_len = (last_loud + 1 + PADDING).min(pcm.len());
+        if new_len < pcm.len() {
+            tracing::trace!(
+                original_samples = pcm.len(),
+                trimmed_samples  = pcm.len() - new_len,
+                "trimmed trailing silence"
+            );
+            pcm.truncate(new_len);
+        }
+    }
+    // All-silent chunk: keep as-is — whisper will produce zero segments, which is correct.
+}
+
 /// Decode any audio file to 16 kHz mono PCM f32 using ffmpeg.
 async fn decode_audio(path: &std::path::Path) -> crate::Result<Vec<f32>> {
     use tokio::process::Command;