Files
whisper-rtx2080/src/error.rs
Giancarmine Salucci cb0b07b2ff
All checks were successful
Build & Push Docker Image / test (push) Successful in 6m20s
Build & Push Docker Image / build-and-push (push) Successful in 6m29s
fix(worker): collapse incremental segments
Normalize rolling partial-hypothesis chains before final job persistence so downstream clients receive stable transcript segments instead of echoed continuations.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-11 22:46:38 +02:00

177 lines
5.8 KiB
Rust

use axum::{
http::{header, HeaderValue, StatusCode},
response::{IntoResponse, Response},
Json,
};
use serde_json::json;
use thiserror::Error;
pub type Result<T> = std::result::Result<T, AppError>;
#[derive(Debug, Error)]
pub enum AppError {
#[error("not found: {0}")]
NotFound(String),
#[error("bad request: {0}")]
BadRequest(String),
#[error("conflict: {0}")]
Conflict(String),
#[error("internal error: {0}")]
Internal(String),
/// Returned when `whisper_init_state` or `cudaMalloc` fails due to
/// insufficient VRAM. The worker uses this to distinguish a recoverable
/// VRAM-pressure failure from a hard internal error.
#[error("out of GPU memory: {0}")]
OutOfMemory(String),
/// Returned when a job is submitted but the model is not yet loaded.
/// Carries the current state tag and recommended Retry-After seconds.
#[error("model not ready: {state}")]
ModelNotReady {
state: String,
retry_after_secs: u64,
},
}
impl AppError {
/// Returns true if the error string contains patterns emitted by
/// whisper.cpp / GGML when a CUDA memory allocation fails.
pub fn is_oom(msg: &str) -> bool {
msg.contains("cudaMalloc failed")
|| msg.contains("out of memory")
|| msg.contains("CUDA error: out of memory")
|| msg.contains("alloc_buffer")
}
}
impl IntoResponse for AppError {
fn into_response(self) -> Response {
match self {
AppError::NotFound(m) => {
(StatusCode::NOT_FOUND, Json(json!({ "error": m }))).into_response()
}
AppError::BadRequest(m) => {
(StatusCode::BAD_REQUEST, Json(json!({ "error": m }))).into_response()
}
AppError::Conflict(m) => {
(StatusCode::CONFLICT, Json(json!({ "error": m }))).into_response()
}
AppError::Internal(m) => {
tracing::error!(error = %m, "internal error");
(
StatusCode::INTERNAL_SERVER_ERROR,
Json(json!({ "error": m })),
)
.into_response()
}
AppError::OutOfMemory(m) => {
tracing::warn!(error = %m, "GPU out of memory during model load");
(StatusCode::SERVICE_UNAVAILABLE, Json(json!({ "error": m }))).into_response()
}
AppError::ModelNotReady {
state,
retry_after_secs,
} => {
let body = Json(json!({
"error": "model_not_ready",
"state": state,
"retry_after_secs": retry_after_secs,
}));
let mut resp = (StatusCode::SERVICE_UNAVAILABLE, body).into_response();
resp.headers_mut().insert(
header::RETRY_AFTER,
HeaderValue::from_str(&retry_after_secs.to_string())
.unwrap_or(HeaderValue::from_static("30")),
);
resp
}
}
}
}
// ── Unit tests ───────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use axum::body::to_bytes;
#[test]
fn test_is_oom_cuda_malloc() {
assert!(AppError::is_oom("cudaMalloc failed: out of memory"));
}
#[test]
fn test_is_oom_alloc_buffer() {
// Exact message from ggml_backend_cuda_buffer_type_alloc_buffer
assert!(AppError::is_oom(
"ggml_backend_cuda_buffer_type_alloc_buffer: allocating 2951.01 MiB on device 0: cudaMalloc failed: out of memory"
));
}
#[test]
fn test_is_oom_generic_out_of_memory() {
assert!(AppError::is_oom("CUDA error: out of memory"));
}
#[test]
fn test_is_oom_other_error() {
assert!(!AppError::is_oom("failed to open model file"));
assert!(!AppError::is_oom("invalid model format"));
assert!(!AppError::is_oom(""));
}
#[tokio::test]
async fn test_model_not_ready_response_has_retry_after_header() {
let err = AppError::ModelNotReady {
state: "loading".into(),
retry_after_secs: 10,
};
let resp = err.into_response();
assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
let retry_after = resp
.headers()
.get(header::RETRY_AFTER)
.expect("Retry-After header missing");
assert_eq!(retry_after, "10");
}
#[tokio::test]
async fn test_model_not_ready_response_body() {
let err = AppError::ModelNotReady {
state: "unloaded".into(),
retry_after_secs: 30,
};
let resp = err.into_response();
let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
let v: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
assert_eq!(v["error"], "model_not_ready");
assert_eq!(v["state"], "unloaded");
assert_eq!(v["retry_after_secs"], 30);
}
#[tokio::test]
async fn test_model_not_ready_loading_retry_after_10() {
let err = AppError::ModelNotReady {
state: "loading".into(),
retry_after_secs: 10,
};
let resp = err.into_response();
assert_eq!(resp.headers().get(header::RETRY_AFTER).unwrap(), "10");
}
#[tokio::test]
async fn test_model_not_ready_unloaded_retry_after_30() {
let err = AppError::ModelNotReady {
state: "unloaded".into(),
retry_after_secs: 30,
};
let resp = err.into_response();
assert_eq!(resp.headers().get(header::RETRY_AFTER).unwrap(), "30");
}
}