fix(worker): collapse incremental segments

Normalize rolling partial-hypothesis chains before final job persistence so downstream clients receive stable transcript segments instead of echoed continuations. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-11 22:46:38 +02:00
parent d3a67f11b3
commit cb0b07b2ff
10 changed files with 712 additions and 331 deletions
--- a/src/routes/health.rs
+++ b/src/routes/health.rs
@@ -19,12 +19,12 @@ pub async fn health(State(state): State<AppState>) -> Result<Json<HealthResponse
    let model_state_tag = state.model_state.read().await.tag().to_string();

    Ok(Json(HealthResponse {
-        status:        "ok".into(),
+        status: "ok".into(),
        gpu_name,
        vram_total_mb,
-        model:         state.model_name.to_string(),
-        queue_depth:   state.queue_depth.load(Ordering::Relaxed),
-        model_state:   model_state_tag,
+        model: state.model_name.to_string(),
+        queue_depth: state.queue_depth.load(Ordering::Relaxed),
+        model_state: model_state_tag,
    }))
 }

@@ -50,9 +50,7 @@ fn gpu_info(device: u32) -> (Option<String>, Option<u64>) {
    let mut parts = line.splitn(2, ',');

    let name = parts.next().map(|s| s.trim().to_owned());
-    let vram = parts
-        .next()
-        .and_then(|s| s.trim().parse::<u64>().ok());
+    let vram = parts.next().and_then(|s| s.trim().parse::<u64>().ok());

    (name, vram)
 }