fix(worker): collapse incremental segments
All checks were successful
Build & Push Docker Image / test (push) Successful in 6m20s
Build & Push Docker Image / build-and-push (push) Successful in 6m29s

Normalize rolling partial-hypothesis chains before final job persistence so downstream clients receive stable transcript segments instead of echoed continuations.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-11 22:46:38 +02:00
parent d3a67f11b3
commit cb0b07b2ff
10 changed files with 712 additions and 331 deletions

View File

@@ -1,10 +1,10 @@
use thiserror::Error;
use axum::{
http::{StatusCode, HeaderValue, header},
http::{header, HeaderValue, StatusCode},
response::{IntoResponse, Response},
Json,
};
use serde_json::json;
use thiserror::Error;
pub type Result<T> = std::result::Result<T, AppError>;
@@ -31,7 +31,10 @@ pub enum AppError {
/// Returned when a job is submitted but the model is not yet loaded.
/// Carries the current state tag and recommended Retry-After seconds.
#[error("model not ready: {state}")]
ModelNotReady { state: String, retry_after_secs: u64 },
ModelNotReady {
state: String,
retry_after_secs: u64,
},
}
impl AppError {
@@ -59,13 +62,20 @@ impl IntoResponse for AppError {
}
AppError::Internal(m) => {
tracing::error!(error = %m, "internal error");
(StatusCode::INTERNAL_SERVER_ERROR, Json(json!({ "error": m }))).into_response()
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({ "error": m })),
)
.into_response()
}
AppError::OutOfMemory(m) => {
tracing::warn!(error = %m, "GPU out of memory during model load");
(StatusCode::SERVICE_UNAVAILABLE, Json(json!({ "error": m }))).into_response()
}
AppError::ModelNotReady { state, retry_after_secs } => {
AppError::ModelNotReady {
state,
retry_after_secs,
} => {
let body = Json(json!({
"error": "model_not_ready",
"state": state,
@@ -117,17 +127,25 @@ mod tests {
#[tokio::test]
async fn test_model_not_ready_response_has_retry_after_header() {
let err = AppError::ModelNotReady { state: "loading".into(), retry_after_secs: 10 };
let err = AppError::ModelNotReady {
state: "loading".into(),
retry_after_secs: 10,
};
let resp = err.into_response();
assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
let retry_after = resp.headers().get(header::RETRY_AFTER)
let retry_after = resp
.headers()
.get(header::RETRY_AFTER)
.expect("Retry-After header missing");
assert_eq!(retry_after, "10");
}
#[tokio::test]
async fn test_model_not_ready_response_body() {
let err = AppError::ModelNotReady { state: "unloaded".into(), retry_after_secs: 30 };
let err = AppError::ModelNotReady {
state: "unloaded".into(),
retry_after_secs: 30,
};
let resp = err.into_response();
let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
let v: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
@@ -138,21 +156,21 @@ mod tests {
#[tokio::test]
async fn test_model_not_ready_loading_retry_after_10() {
let err = AppError::ModelNotReady { state: "loading".into(), retry_after_secs: 10 };
let err = AppError::ModelNotReady {
state: "loading".into(),
retry_after_secs: 10,
};
let resp = err.into_response();
assert_eq!(
resp.headers().get(header::RETRY_AFTER).unwrap(),
"10"
);
assert_eq!(resp.headers().get(header::RETRY_AFTER).unwrap(), "10");
}
#[tokio::test]
async fn test_model_not_ready_unloaded_retry_after_30() {
let err = AppError::ModelNotReady { state: "unloaded".into(), retry_after_secs: 30 };
let err = AppError::ModelNotReady {
state: "unloaded".into(),
retry_after_secs: 30,
};
let resp = err.into_response();
assert_eq!(
resp.headers().get(header::RETRY_AFTER).unwrap(),
"30"
);
assert_eq!(resp.headers().get(header::RETRY_AFTER).unwrap(), "30");
}
}