feat: GPU-accelerated Whisper API for RTX 2080 (sm_75)
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 11m13s
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 11m13s
- Pure Rust: Axum 0.7 + whisper-rs 0.13 (CUDA FFI) - Async job queue with SSE progress streaming - Webhook delivery with 5x exponential backoff - Disk-persisted job state (survives restarts) - Anti-hallucination params: no_speech_thold, entropy_thold, suppress_blank - CUDA sm_75 flags: GGML_CUDA_FORCE_MMQ, GGML_CUDA_GRAPHS, GGML_CUDA_FA_ALL_QUANTS - Configurable via env: CUDA_DEVICE, WHISPER_MODEL_PATH, PORT, DATA_DIR - Gitea Actions CI: build + push to git.sal.giize.com registry - Multi-stage Dockerfile with customizable CUDA_VERSION ARG Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
52
Cargo.toml
Normal file
52
Cargo.toml
Normal file
@@ -0,0 +1,52 @@
|
||||
[package]
|
||||
name = "whisper-server"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[[bin]]
|
||||
name = "whisper-server"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
# Web framework
|
||||
axum = { version = "0.7", features = ["multipart"] }
|
||||
axum-extra = { version = "0.9", features = ["typed-header"] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
tokio-stream = { version = "0.1", features = ["sync"] }
|
||||
tower = { version = "0.4" }
|
||||
tower-http = { version = "0.5", features = ["cors", "trace", "limit"] }
|
||||
|
||||
# Whisper inference
|
||||
whisper-rs = { version = "0.13", features = ["cuda"] }
|
||||
|
||||
# Serialisation
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
|
||||
# OpenAPI / Swagger
|
||||
utoipa = { version = "4", features = ["axum_extras", "uuid"] }
|
||||
utoipa-swagger-ui = { version = "7", features = ["axum"] }
|
||||
|
||||
# HTTP client (webhooks)
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
||||
|
||||
# Utilities
|
||||
uuid = { version = "1", features = ["v4", "serde"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
|
||||
anyhow = "1"
|
||||
thiserror = "1"
|
||||
tempfile = "3"
|
||||
num_cpus = "1"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
tokio-util = { version = "0.7", features = ["io"] }
|
||||
futures = "0.3"
|
||||
async-stream = "0.3"
|
||||
bytes = "1"
|
||||
dashmap = "6"
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
lto = "thin"
|
||||
codegen-units = 1
|
||||
strip = "symbols"
|
||||
Reference in New Issue
Block a user