Files
whisper-rtx2080/Cargo.toml
mozempk 16cb6ca661
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 11m13s
feat: GPU-accelerated Whisper API for RTX 2080 (sm_75)
- Pure Rust: Axum 0.7 + whisper-rs 0.13 (CUDA FFI)
- Async job queue with SSE progress streaming
- Webhook delivery with 5x exponential backoff
- Disk-persisted job state (survives restarts)
- Anti-hallucination params: no_speech_thold, entropy_thold, suppress_blank
- CUDA sm_75 flags: GGML_CUDA_FORCE_MMQ, GGML_CUDA_GRAPHS, GGML_CUDA_FA_ALL_QUANTS
- Configurable via env: CUDA_DEVICE, WHISPER_MODEL_PATH, PORT, DATA_DIR
- Gitea Actions CI: build + push to git.sal.giize.com registry
- Multi-stage Dockerfile with customizable CUDA_VERSION ARG

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-05 22:47:24 +02:00

53 lines
1.5 KiB
TOML

[package]
name = "whisper-server"
version = "0.1.0"
edition = "2021"
[[bin]]
name = "whisper-server"
path = "src/main.rs"
[dependencies]
# Web framework
axum = { version = "0.7", features = ["multipart"] }
axum-extra = { version = "0.9", features = ["typed-header"] }
tokio = { version = "1", features = ["full"] }
tokio-stream = { version = "0.1", features = ["sync"] }
tower = { version = "0.4" }
tower-http = { version = "0.5", features = ["cors", "trace", "limit"] }
# Whisper inference
whisper-rs = { version = "0.13", features = ["cuda"] }
# Serialisation
serde = { version = "1", features = ["derive"] }
serde_json = "1"
# OpenAPI / Swagger
utoipa = { version = "4", features = ["axum_extras", "uuid"] }
utoipa-swagger-ui = { version = "7", features = ["axum"] }
# HTTP client (webhooks)
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
# Utilities
uuid = { version = "1", features = ["v4", "serde"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
anyhow = "1"
thiserror = "1"
tempfile = "3"
num_cpus = "1"
chrono = { version = "0.4", features = ["serde"] }
tokio-util = { version = "0.7", features = ["io"] }
futures = "0.3"
async-stream = "0.3"
bytes = "1"
dashmap = "6"
[profile.release]
opt-level = 3
lto = "thin"
codegen-units = 1
strip = "symbols"