fix(worker): collapse incremental segments
Normalize rolling partial-hypothesis chains before final job persistence so downstream clients receive stable transcript segments instead of echoed continuations. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -19,12 +19,12 @@ pub async fn health(State(state): State<AppState>) -> Result<Json<HealthResponse
|
||||
let model_state_tag = state.model_state.read().await.tag().to_string();
|
||||
|
||||
Ok(Json(HealthResponse {
|
||||
status: "ok".into(),
|
||||
status: "ok".into(),
|
||||
gpu_name,
|
||||
vram_total_mb,
|
||||
model: state.model_name.to_string(),
|
||||
queue_depth: state.queue_depth.load(Ordering::Relaxed),
|
||||
model_state: model_state_tag,
|
||||
model: state.model_name.to_string(),
|
||||
queue_depth: state.queue_depth.load(Ordering::Relaxed),
|
||||
model_state: model_state_tag,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -50,9 +50,7 @@ fn gpu_info(device: u32) -> (Option<String>, Option<u64>) {
|
||||
let mut parts = line.splitn(2, ',');
|
||||
|
||||
let name = parts.next().map(|s| s.trim().to_owned());
|
||||
let vram = parts
|
||||
.next()
|
||||
.and_then(|s| s.trim().parse::<u64>().ok());
|
||||
let vram = parts.next().and_then(|s| s.trim().parse::<u64>().ok());
|
||||
|
||||
(name, vram)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user