use axum::{ http::{header, HeaderValue, StatusCode}, response::{IntoResponse, Response}, Json, }; use serde_json::json; use thiserror::Error; pub type Result = std::result::Result; #[derive(Debug, Error)] pub enum AppError { #[error("not found: {0}")] NotFound(String), #[error("bad request: {0}")] BadRequest(String), #[error("conflict: {0}")] Conflict(String), #[error("internal error: {0}")] Internal(String), /// Returned when `whisper_init_state` or `cudaMalloc` fails due to /// insufficient VRAM. The worker uses this to distinguish a recoverable /// VRAM-pressure failure from a hard internal error. #[error("out of GPU memory: {0}")] OutOfMemory(String), /// Returned when a job is submitted but the model is not yet loaded. /// Carries the current state tag and recommended Retry-After seconds. #[error("model not ready: {state}")] ModelNotReady { state: String, retry_after_secs: u64, }, } impl AppError { /// Returns true if the error string contains patterns emitted by /// whisper.cpp / GGML when a CUDA memory allocation fails. pub fn is_oom(msg: &str) -> bool { msg.contains("cudaMalloc failed") || msg.contains("out of memory") || msg.contains("CUDA error: out of memory") || msg.contains("alloc_buffer") } } impl IntoResponse for AppError { fn into_response(self) -> Response { match self { AppError::NotFound(m) => { (StatusCode::NOT_FOUND, Json(json!({ "error": m }))).into_response() } AppError::BadRequest(m) => { (StatusCode::BAD_REQUEST, Json(json!({ "error": m }))).into_response() } AppError::Conflict(m) => { (StatusCode::CONFLICT, Json(json!({ "error": m }))).into_response() } AppError::Internal(m) => { tracing::error!(error = %m, "internal error"); ( StatusCode::INTERNAL_SERVER_ERROR, Json(json!({ "error": m })), ) .into_response() } AppError::OutOfMemory(m) => { tracing::warn!(error = %m, "GPU out of memory during model load"); (StatusCode::SERVICE_UNAVAILABLE, Json(json!({ "error": m }))).into_response() } AppError::ModelNotReady { state, retry_after_secs, } => { let body = Json(json!({ "error": "model_not_ready", "state": state, "retry_after_secs": retry_after_secs, })); let mut resp = (StatusCode::SERVICE_UNAVAILABLE, body).into_response(); resp.headers_mut().insert( header::RETRY_AFTER, HeaderValue::from_str(&retry_after_secs.to_string()) .unwrap_or(HeaderValue::from_static("30")), ); resp } } } } // ── Unit tests ─────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; use axum::body::to_bytes; #[test] fn test_is_oom_cuda_malloc() { assert!(AppError::is_oom("cudaMalloc failed: out of memory")); } #[test] fn test_is_oom_alloc_buffer() { // Exact message from ggml_backend_cuda_buffer_type_alloc_buffer assert!(AppError::is_oom( "ggml_backend_cuda_buffer_type_alloc_buffer: allocating 2951.01 MiB on device 0: cudaMalloc failed: out of memory" )); } #[test] fn test_is_oom_generic_out_of_memory() { assert!(AppError::is_oom("CUDA error: out of memory")); } #[test] fn test_is_oom_other_error() { assert!(!AppError::is_oom("failed to open model file")); assert!(!AppError::is_oom("invalid model format")); assert!(!AppError::is_oom("")); } #[tokio::test] async fn test_model_not_ready_response_has_retry_after_header() { let err = AppError::ModelNotReady { state: "loading".into(), retry_after_secs: 10, }; let resp = err.into_response(); assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE); let retry_after = resp .headers() .get(header::RETRY_AFTER) .expect("Retry-After header missing"); assert_eq!(retry_after, "10"); } #[tokio::test] async fn test_model_not_ready_response_body() { let err = AppError::ModelNotReady { state: "unloaded".into(), retry_after_secs: 30, }; let resp = err.into_response(); let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap(); let v: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); assert_eq!(v["error"], "model_not_ready"); assert_eq!(v["state"], "unloaded"); assert_eq!(v["retry_after_secs"], 30); } #[tokio::test] async fn test_model_not_ready_loading_retry_after_10() { let err = AppError::ModelNotReady { state: "loading".into(), retry_after_secs: 10, }; let resp = err.into_response(); assert_eq!(resp.headers().get(header::RETRY_AFTER).unwrap(), "10"); } #[tokio::test] async fn test_model_not_ready_unloaded_retry_after_30() { let err = AppError::ModelNotReady { state: "unloaded".into(), retry_after_secs: 30, }; let resp = err.into_response(); assert_eq!(resp.headers().get(header::RETRY_AFTER).unwrap(), "30"); } }