feat: progress reporting with chunk context + live job persistence
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 6m38s
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 6m38s
- ProgressEvent::Progress now carries chunk index and total count - SsePayload::Progress gains chunk / chunks_total fields → SSE clients can show 'chunk N/51' instead of bare percent - process_job persists job.progress to storage at each chunk boundary → GET /jobs/:id now shows live progress (not stuck at 0) - Emits Progress event at chunk START (boundary event), not just on whisper's internal callback - entropy_thold raised to 3.5 (catches medium-phrase loops; triggers whisper's own temperature-retry instead of silent repetition) - no_speech_thold removed (confirmed // TODO: not implemented in whisper.cpp source; was a no-op) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -22,7 +22,8 @@ pub type ProgressTx = broadcast::Sender<ProgressEvent>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ProgressEvent {
|
||||
Progress(u8),
|
||||
/// `percent` — overall 0–100; `chunk` — 1-based; `total` — total chunks.
|
||||
Progress { percent: u8, chunk: usize, total: usize },
|
||||
Done(Box<Job>),
|
||||
Error(String),
|
||||
}
|
||||
@@ -93,7 +94,7 @@ fn transcriber_thread(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn run(
|
||||
async fn run(
|
||||
mut job_rx: mpsc::UnboundedReceiver<JobId>,
|
||||
storage: Arc<Storage>,
|
||||
queue_depth: Arc<AtomicUsize>,
|
||||
@@ -134,7 +135,7 @@ pub(crate) async fn run(
|
||||
|
||||
let audio_path = audio_path_for(&job_id);
|
||||
|
||||
let result = process_job(&job, &audio_path, &progress_tx, &tx_req).await;
|
||||
let result = process_job(&job, &audio_path, &progress_tx, &tx_req, &storage).await;
|
||||
|
||||
let _ = tokio::fs::remove_file(&audio_path).await;
|
||||
|
||||
@@ -176,8 +177,9 @@ pub(crate) async fn run(
|
||||
|
||||
// ── Silence-based chunking ────────────────────────────────────────────────────
|
||||
|
||||
/// Target chunk length. Smaller = safer (less hallucination budget per chunk).
|
||||
const TARGET_CHUNK_SECS: f32 = 180.0;
|
||||
/// Target chunk length. 60s ≈ 2× whisper's native 30s window — short enough
|
||||
/// that a hallucinated phrase can't compound beyond a single window.
|
||||
const TARGET_CHUNK_SECS: f32 = 60.0;
|
||||
/// How far from the target we'll snap to a silence midpoint.
|
||||
const SNAP_WINDOW_SECS: f32 = 30.0;
|
||||
/// Silence below this level (dB) counts as a split candidate.
|
||||
@@ -296,12 +298,13 @@ async fn process_job(
|
||||
audio_path: &std::path::Path,
|
||||
progress_tx: &ProgressTx,
|
||||
tx_req: &std::sync::mpsc::Sender<TranscribeRequest>,
|
||||
storage: &Arc<Storage>,
|
||||
) -> crate::Result<(Vec<Segment>, String, f32)> {
|
||||
// 1. Decode full audio to 16 kHz mono PCM.
|
||||
let pcm = decode_audio(audio_path).await?;
|
||||
let total_secs = pcm.len() as f32 / 16_000.0;
|
||||
|
||||
// 2. Detect silence from the original file (fast amplitude scan).
|
||||
// 2. Detect silence midpoints from original file.
|
||||
let silence_mids = detect_silence_midpoints(audio_path).await;
|
||||
|
||||
// 3. Build silence-snapped chunk boundaries.
|
||||
@@ -325,13 +328,32 @@ async fn process_job(
|
||||
let s1 = ((*chunk_end * 16_000.0) as usize).min(pcm.len());
|
||||
let chunk_pcm = pcm[s0..s1].to_vec();
|
||||
|
||||
// Scale chunk's 0-100 progress into the job's 0-100 range.
|
||||
// Base percent this chunk starts at.
|
||||
let base = (ci * 100 / n) as u8;
|
||||
let span = (100usize / n).max(1) as u8;
|
||||
|
||||
// Emit a progress event and persist it at the start of every chunk.
|
||||
let _ = progress_tx.send(ProgressEvent::Progress {
|
||||
percent: base,
|
||||
chunk: ci + 1,
|
||||
total: n,
|
||||
});
|
||||
let mut snapshot = job.clone();
|
||||
snapshot.progress = base;
|
||||
if let Err(e) = storage.save(&snapshot).await {
|
||||
tracing::warn!(error = %e, "failed to persist mid-job progress");
|
||||
}
|
||||
|
||||
// Scale whisper's per-chunk 0–100 into the job's overall range.
|
||||
let tx = progress_tx.clone();
|
||||
let chunk_num = ci + 1;
|
||||
let on_progress = Box::new(move |p: u8| {
|
||||
let overall = base.saturating_add(p.saturating_mul(span) / 100);
|
||||
let _ = tx.send(ProgressEvent::Progress(overall));
|
||||
let _ = tx.send(ProgressEvent::Progress {
|
||||
percent: overall,
|
||||
chunk: chunk_num,
|
||||
total: n,
|
||||
});
|
||||
});
|
||||
|
||||
let (reply_tx, reply_rx) = oneshot::channel();
|
||||
@@ -377,7 +399,7 @@ async fn process_job(
|
||||
seg.index = i as i32;
|
||||
}
|
||||
|
||||
let _ = progress_tx.send(ProgressEvent::Progress(100));
|
||||
let _ = progress_tx.send(ProgressEvent::Progress { percent: 100, chunk: n, total: n });
|
||||
Ok((all_segments, language, total_secs))
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user