fix(worker): collapse incremental segments
Normalize rolling partial-hypothesis chains before final job persistence so downstream clients receive stable transcript segments instead of echoed continuations. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
54
src/error.rs
54
src/error.rs
@@ -1,10 +1,10 @@
|
||||
use thiserror::Error;
|
||||
use axum::{
|
||||
http::{StatusCode, HeaderValue, header},
|
||||
http::{header, HeaderValue, StatusCode},
|
||||
response::{IntoResponse, Response},
|
||||
Json,
|
||||
};
|
||||
use serde_json::json;
|
||||
use thiserror::Error;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, AppError>;
|
||||
|
||||
@@ -31,7 +31,10 @@ pub enum AppError {
|
||||
/// Returned when a job is submitted but the model is not yet loaded.
|
||||
/// Carries the current state tag and recommended Retry-After seconds.
|
||||
#[error("model not ready: {state}")]
|
||||
ModelNotReady { state: String, retry_after_secs: u64 },
|
||||
ModelNotReady {
|
||||
state: String,
|
||||
retry_after_secs: u64,
|
||||
},
|
||||
}
|
||||
|
||||
impl AppError {
|
||||
@@ -59,13 +62,20 @@ impl IntoResponse for AppError {
|
||||
}
|
||||
AppError::Internal(m) => {
|
||||
tracing::error!(error = %m, "internal error");
|
||||
(StatusCode::INTERNAL_SERVER_ERROR, Json(json!({ "error": m }))).into_response()
|
||||
(
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
Json(json!({ "error": m })),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
AppError::OutOfMemory(m) => {
|
||||
tracing::warn!(error = %m, "GPU out of memory during model load");
|
||||
(StatusCode::SERVICE_UNAVAILABLE, Json(json!({ "error": m }))).into_response()
|
||||
}
|
||||
AppError::ModelNotReady { state, retry_after_secs } => {
|
||||
AppError::ModelNotReady {
|
||||
state,
|
||||
retry_after_secs,
|
||||
} => {
|
||||
let body = Json(json!({
|
||||
"error": "model_not_ready",
|
||||
"state": state,
|
||||
@@ -117,17 +127,25 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_model_not_ready_response_has_retry_after_header() {
|
||||
let err = AppError::ModelNotReady { state: "loading".into(), retry_after_secs: 10 };
|
||||
let err = AppError::ModelNotReady {
|
||||
state: "loading".into(),
|
||||
retry_after_secs: 10,
|
||||
};
|
||||
let resp = err.into_response();
|
||||
assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
|
||||
let retry_after = resp.headers().get(header::RETRY_AFTER)
|
||||
let retry_after = resp
|
||||
.headers()
|
||||
.get(header::RETRY_AFTER)
|
||||
.expect("Retry-After header missing");
|
||||
assert_eq!(retry_after, "10");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_model_not_ready_response_body() {
|
||||
let err = AppError::ModelNotReady { state: "unloaded".into(), retry_after_secs: 30 };
|
||||
let err = AppError::ModelNotReady {
|
||||
state: "unloaded".into(),
|
||||
retry_after_secs: 30,
|
||||
};
|
||||
let resp = err.into_response();
|
||||
let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
|
||||
let v: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
|
||||
@@ -138,21 +156,21 @@ mod tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_model_not_ready_loading_retry_after_10() {
|
||||
let err = AppError::ModelNotReady { state: "loading".into(), retry_after_secs: 10 };
|
||||
let err = AppError::ModelNotReady {
|
||||
state: "loading".into(),
|
||||
retry_after_secs: 10,
|
||||
};
|
||||
let resp = err.into_response();
|
||||
assert_eq!(
|
||||
resp.headers().get(header::RETRY_AFTER).unwrap(),
|
||||
"10"
|
||||
);
|
||||
assert_eq!(resp.headers().get(header::RETRY_AFTER).unwrap(), "10");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_model_not_ready_unloaded_retry_after_30() {
|
||||
let err = AppError::ModelNotReady { state: "unloaded".into(), retry_after_secs: 30 };
|
||||
let err = AppError::ModelNotReady {
|
||||
state: "unloaded".into(),
|
||||
retry_after_secs: 30,
|
||||
};
|
||||
let resp = err.into_response();
|
||||
assert_eq!(
|
||||
resp.headers().get(header::RETRY_AFTER).unwrap(),
|
||||
"30"
|
||||
);
|
||||
assert_eq!(resp.headers().get(header::RETRY_AFTER).unwrap(), "30");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user