fix(worker): collapse incremental segments

Normalize rolling partial-hypothesis chains before final job persistence so downstream clients receive stable transcript segments instead of echoed continuations. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-11 22:46:38 +02:00
parent d3a67f11b3
commit cb0b07b2ff
10 changed files with 712 additions and 331 deletions
--- a/src/main.rs
+++ b/src/main.rs
@@ -97,10 +97,10 @@ async fn main() -> anyhow::Result<()> {
        .with(tracing_subscriber::fmt::layer().json())
        .init();

-    let data_dir   = std::env::var("DATA_DIR").unwrap_or_else(|_| "/data".into());
-    let model_path = std::env::var("WHISPER_MODEL_PATH")
-        .unwrap_or_else(|_| "/models/ggml-large-v3.bin".into());
-    let port       = std::env::var("PORT").unwrap_or_else(|_| "8080".into());
+    let data_dir = std::env::var("DATA_DIR").unwrap_or_else(|_| "/data".into());
+    let model_path =
+        std::env::var("WHISPER_MODEL_PATH").unwrap_or_else(|_| "/models/ggml-large-v3.bin".into());
+    let port = std::env::var("PORT").unwrap_or_else(|_| "8080".into());
    let model_name = std::env::var("WHISPER_MODEL").unwrap_or_else(|_| "large-v3".into());
    let gpu_device: u32 = std::env::var("CUDA_DEVICE")
        .ok()
@@ -132,7 +132,9 @@ async fn main() -> anyhow::Result<()> {
    // Model starts unloaded — lazy load on first job or POST /model/load.
    let model_state = Arc::new(RwLock::new(models::ModelState::Unloaded));
    let (model_event_tx, _) = broadcast::channel::<models::ModelEvent>(32);
-    let webhook_registry = Arc::new(std::sync::Mutex::new(std::collections::HashSet::<String>::new()));
+    let webhook_registry = Arc::new(std::sync::Mutex::new(
+        std::collections::HashSet::<String>::new(),
+    ));

    // Spawn single GPU worker; get back the SSE broadcast registry and cmd channel.
    let (progress, cmd_tx) = worker::start(
@@ -153,13 +155,13 @@ async fn main() -> anyhow::Result<()> {
        cmd_tx,
        storage: Arc::clone(&storage),
        progress,
-        model_name:        model_name.as_str().into(),
-        queue_depth:       Arc::clone(&queue_depth),
+        model_name: model_name.as_str().into(),
+        queue_depth: Arc::clone(&queue_depth),
        gpu_device,
        model_state,
        model_event_tx,
        webhook_registry,
-        idle_timeout:      std::time::Duration::from_secs(idle_timeout_secs),
+        idle_timeout: std::time::Duration::from_secs(idle_timeout_secs),
        gpu_poll_interval: std::time::Duration::from_secs(gpu_poll_interval_secs),
    };