feat: GPU-accelerated Whisper API for RTX 2080 (sm_75)

- Pure Rust: Axum 0.7 + whisper-rs 0.13 (CUDA FFI) - Async job queue with SSE progress streaming - Webhook delivery with 5x exponential backoff - Disk-persisted job state (survives restarts) - Anti-hallucination params: no_speech_thold, entropy_thold, suppress_blank - CUDA sm_75 flags: GGML_CUDA_FORCE_MMQ, GGML_CUDA_GRAPHS, GGML_CUDA_FA_ALL_QUANTS - Configurable via env: CUDA_DEVICE, WHISPER_MODEL_PATH, PORT, DATA_DIR - Gitea Actions CI: build + push to git.sal.giize.com registry - Multi-stage Dockerfile with customizable CUDA_VERSION ARG Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-05 22:47:24 +02:00
commit 16cb6ca661
18 changed files with 1898 additions and 0 deletions
--- a/src/error.rs
+++ b/src/error.rs
@@ -0,0 +1,39 @@
+use thiserror::Error;
+use axum::{
+    http::StatusCode,
+    response::{IntoResponse, Response},
+    Json,
+};
+use serde_json::json;
+
+pub type Result<T> = std::result::Result<T, AppError>;
+
+#[derive(Debug, Error)]
+pub enum AppError {
+    #[error("not found: {0}")]
+    NotFound(String),
+
+    #[error("bad request: {0}")]
+    BadRequest(String),
+
+    #[error("conflict: {0}")]
+    Conflict(String),
+
+    #[error("internal error: {0}")]
+    Internal(String),
+}
+
+impl IntoResponse for AppError {
+    fn into_response(self) -> Response {
+        let (status, message) = match &self {
+            AppError::NotFound(m)   => (StatusCode::NOT_FOUND,             m.clone()),
+            AppError::BadRequest(m) => (StatusCode::BAD_REQUEST,           m.clone()),
+            AppError::Conflict(m)   => (StatusCode::CONFLICT,              m.clone()),
+            AppError::Internal(m)   => (StatusCode::INTERNAL_SERVER_ERROR, m.clone()),
+        };
+
+        tracing::error!(status = status.as_u16(), error = %message);
+
+        (status, Json(json!({ "error": message }))).into_response()
+    }
+}