whisper-rtx2080/src/error.rs

use axum::{
    http::{header, HeaderValue, StatusCode},
    response::{IntoResponse, Response},
    Json,
};
use serde_json::json;
use thiserror::Error;

pub type Result<T> = std::result::Result<T, AppError>;

#[derive(Debug, Error)]
pub enum AppError {
    #[error("not found: {0}")]
    NotFound(String),

    #[error("bad request: {0}")]
    BadRequest(String),

    #[error("conflict: {0}")]
    Conflict(String),

    #[error("internal error: {0}")]
    Internal(String),

    /// Returned when `whisper_init_state` or `cudaMalloc` fails due to
    /// insufficient VRAM. The worker uses this to distinguish a recoverable
    /// VRAM-pressure failure from a hard internal error.
    #[error("out of GPU memory: {0}")]
    OutOfMemory(String),

    /// Returned when a job is submitted but the model is not yet loaded.
    /// Carries the current state tag and recommended Retry-After seconds.
    #[error("model not ready: {state}")]
    ModelNotReady {
        state: String,
        retry_after_secs: u64,
    },
}

impl AppError {
    /// Returns true if the error string contains patterns emitted by
    /// whisper.cpp / GGML when a CUDA memory allocation fails.
    pub fn is_oom(msg: &str) -> bool {
        msg.contains("cudaMalloc failed")
            || msg.contains("out of memory")
            || msg.contains("CUDA error: out of memory")
            || msg.contains("alloc_buffer")
    }
}

impl IntoResponse for AppError {
    fn into_response(self) -> Response {
        match self {
            AppError::NotFound(m) => {
                (StatusCode::NOT_FOUND, Json(json!({ "error": m }))).into_response()
            }
            AppError::BadRequest(m) => {
                (StatusCode::BAD_REQUEST, Json(json!({ "error": m }))).into_response()
            }
            AppError::Conflict(m) => {
                (StatusCode::CONFLICT, Json(json!({ "error": m }))).into_response()
            }
            AppError::Internal(m) => {
                tracing::error!(error = %m, "internal error");
                (
                    StatusCode::INTERNAL_SERVER_ERROR,
                    Json(json!({ "error": m })),
                )
                    .into_response()
            }
            AppError::OutOfMemory(m) => {
                tracing::warn!(error = %m, "GPU out of memory during model load");
                (StatusCode::SERVICE_UNAVAILABLE, Json(json!({ "error": m }))).into_response()
            }
            AppError::ModelNotReady {
                state,
                retry_after_secs,
            } => {
                let body = Json(json!({
                    "error":            "model_not_ready",
                    "state":            state,
                    "retry_after_secs": retry_after_secs,
                }));
                let mut resp = (StatusCode::SERVICE_UNAVAILABLE, body).into_response();
                resp.headers_mut().insert(
                    header::RETRY_AFTER,
                    HeaderValue::from_str(&retry_after_secs.to_string())
                        .unwrap_or(HeaderValue::from_static("30")),
                );
                resp
            }
        }
    }
}

// ── Unit tests ───────────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;
    use axum::body::to_bytes;

    #[test]
    fn test_is_oom_cuda_malloc() {
        assert!(AppError::is_oom("cudaMalloc failed: out of memory"));
    }

    #[test]
    fn test_is_oom_alloc_buffer() {
        // Exact message from ggml_backend_cuda_buffer_type_alloc_buffer
        assert!(AppError::is_oom(
            "ggml_backend_cuda_buffer_type_alloc_buffer: allocating 2951.01 MiB on device 0: cudaMalloc failed: out of memory"
        ));
    }

    #[test]
    fn test_is_oom_generic_out_of_memory() {
        assert!(AppError::is_oom("CUDA error: out of memory"));
    }

    #[test]
    fn test_is_oom_other_error() {
        assert!(!AppError::is_oom("failed to open model file"));
        assert!(!AppError::is_oom("invalid model format"));
        assert!(!AppError::is_oom(""));
    }

    #[tokio::test]
    async fn test_model_not_ready_response_has_retry_after_header() {
        let err = AppError::ModelNotReady {
            state: "loading".into(),
            retry_after_secs: 10,
        };
        let resp = err.into_response();
        assert_eq!(resp.status(), StatusCode::SERVICE_UNAVAILABLE);
        let retry_after = resp
            .headers()
            .get(header::RETRY_AFTER)
            .expect("Retry-After header missing");
        assert_eq!(retry_after, "10");
    }

    #[tokio::test]
    async fn test_model_not_ready_response_body() {
        let err = AppError::ModelNotReady {
            state: "unloaded".into(),
            retry_after_secs: 30,
        };
        let resp = err.into_response();
        let bytes = to_bytes(resp.into_body(), usize::MAX).await.unwrap();
        let v: serde_json::Value = serde_json::from_slice(&bytes).unwrap();
        assert_eq!(v["error"], "model_not_ready");
        assert_eq!(v["state"], "unloaded");
        assert_eq!(v["retry_after_secs"], 30);
    }

    #[tokio::test]
    async fn test_model_not_ready_loading_retry_after_10() {
        let err = AppError::ModelNotReady {
            state: "loading".into(),
            retry_after_secs: 10,
        };
        let resp = err.into_response();
        assert_eq!(resp.headers().get(header::RETRY_AFTER).unwrap(), "10");
    }

    #[tokio::test]
    async fn test_model_not_ready_unloaded_retry_after_30() {
        let err = AppError::ModelNotReady {
            state: "unloaded".into(),
            retry_after_secs: 30,
        };
        let resp = err.into_response();
        assert_eq!(resp.headers().get(header::RETRY_AFTER).unwrap(), "30");
    }
}