feat: progress reporting with chunk context + live job persistence
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 6m38s

- ProgressEvent::Progress now carries chunk index and total count
- SsePayload::Progress gains chunk / chunks_total fields
  → SSE clients can show 'chunk N/51' instead of bare percent
- process_job persists job.progress to storage at each chunk boundary
  → GET /jobs/:id now shows live progress (not stuck at 0)
- Emits Progress event at chunk START (boundary event), not just on
  whisper's internal callback
- entropy_thold raised to 3.5 (catches medium-phrase loops; triggers
  whisper's own temperature-retry instead of silent repetition)
- no_speech_thold removed (confirmed // TODO: not implemented in
  whisper.cpp source; was a no-op)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
mozempk
2026-05-06 02:00:46 +02:00
parent fb8556441c
commit 35e7ea8d28
4 changed files with 74 additions and 36 deletions

View File

@@ -22,7 +22,8 @@ pub type ProgressTx = broadcast::Sender<ProgressEvent>;
#[derive(Debug, Clone)]
pub enum ProgressEvent {
Progress(u8),
/// `percent` — overall 0100; `chunk` — 1-based; `total` — total chunks.
Progress { percent: u8, chunk: usize, total: usize },
Done(Box<Job>),
Error(String),
}
@@ -93,7 +94,7 @@ fn transcriber_thread(
}
}
pub(crate) async fn run(
async fn run(
mut job_rx: mpsc::UnboundedReceiver<JobId>,
storage: Arc<Storage>,
queue_depth: Arc<AtomicUsize>,
@@ -134,7 +135,7 @@ pub(crate) async fn run(
let audio_path = audio_path_for(&job_id);
let result = process_job(&job, &audio_path, &progress_tx, &tx_req).await;
let result = process_job(&job, &audio_path, &progress_tx, &tx_req, &storage).await;
let _ = tokio::fs::remove_file(&audio_path).await;
@@ -176,8 +177,9 @@ pub(crate) async fn run(
// ── Silence-based chunking ────────────────────────────────────────────────────
/// Target chunk length. Smaller = safer (less hallucination budget per chunk).
const TARGET_CHUNK_SECS: f32 = 180.0;
/// Target chunk length. 60s ≈ 2× whisper's native 30s window — short enough
/// that a hallucinated phrase can't compound beyond a single window.
const TARGET_CHUNK_SECS: f32 = 60.0;
/// How far from the target we'll snap to a silence midpoint.
const SNAP_WINDOW_SECS: f32 = 30.0;
/// Silence below this level (dB) counts as a split candidate.
@@ -296,12 +298,13 @@ async fn process_job(
audio_path: &std::path::Path,
progress_tx: &ProgressTx,
tx_req: &std::sync::mpsc::Sender<TranscribeRequest>,
storage: &Arc<Storage>,
) -> crate::Result<(Vec<Segment>, String, f32)> {
// 1. Decode full audio to 16 kHz mono PCM.
let pcm = decode_audio(audio_path).await?;
let total_secs = pcm.len() as f32 / 16_000.0;
// 2. Detect silence from the original file (fast amplitude scan).
// 2. Detect silence midpoints from original file.
let silence_mids = detect_silence_midpoints(audio_path).await;
// 3. Build silence-snapped chunk boundaries.
@@ -325,13 +328,32 @@ async fn process_job(
let s1 = ((*chunk_end * 16_000.0) as usize).min(pcm.len());
let chunk_pcm = pcm[s0..s1].to_vec();
// Scale chunk's 0-100 progress into the job's 0-100 range.
// Base percent this chunk starts at.
let base = (ci * 100 / n) as u8;
let span = (100usize / n).max(1) as u8;
// Emit a progress event and persist it at the start of every chunk.
let _ = progress_tx.send(ProgressEvent::Progress {
percent: base,
chunk: ci + 1,
total: n,
});
let mut snapshot = job.clone();
snapshot.progress = base;
if let Err(e) = storage.save(&snapshot).await {
tracing::warn!(error = %e, "failed to persist mid-job progress");
}
// Scale whisper's per-chunk 0100 into the job's overall range.
let tx = progress_tx.clone();
let chunk_num = ci + 1;
let on_progress = Box::new(move |p: u8| {
let overall = base.saturating_add(p.saturating_mul(span) / 100);
let _ = tx.send(ProgressEvent::Progress(overall));
let _ = tx.send(ProgressEvent::Progress {
percent: overall,
chunk: chunk_num,
total: n,
});
});
let (reply_tx, reply_rx) = oneshot::channel();
@@ -377,7 +399,7 @@ async fn process_job(
seg.index = i as i32;
}
let _ = progress_tx.send(ProgressEvent::Progress(100));
let _ = progress_tx.send(ProgressEvent::Progress { percent: 100, chunk: n, total: n });
Ok((all_segments, language, total_secs))
}