fix(worker): collapse incremental segments
All checks were successful
Build & Push Docker Image / test (push) Successful in 6m20s
Build & Push Docker Image / build-and-push (push) Successful in 6m29s

Normalize rolling partial-hypothesis chains before final job persistence so downstream clients receive stable transcript segments instead of echoed continuations.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-11 22:46:38 +02:00
parent d3a67f11b3
commit cb0b07b2ff
10 changed files with 712 additions and 331 deletions

View File

@@ -19,12 +19,12 @@ pub async fn health(State(state): State<AppState>) -> Result<Json<HealthResponse
let model_state_tag = state.model_state.read().await.tag().to_string();
Ok(Json(HealthResponse {
status: "ok".into(),
status: "ok".into(),
gpu_name,
vram_total_mb,
model: state.model_name.to_string(),
queue_depth: state.queue_depth.load(Ordering::Relaxed),
model_state: model_state_tag,
model: state.model_name.to_string(),
queue_depth: state.queue_depth.load(Ordering::Relaxed),
model_state: model_state_tag,
}))
}
@@ -50,9 +50,7 @@ fn gpu_info(device: u32) -> (Option<String>, Option<u64>) {
let mut parts = line.splitn(2, ',');
let name = parts.next().map(|s| s.trim().to_owned());
let vram = parts
.next()
.and_then(|s| s.trim().parse::<u64>().ok());
let vram = parts.next().and_then(|s| s.trim().parse::<u64>().ok());
(name, vram)
}