All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 11m13s
- Pure Rust: Axum 0.7 + whisper-rs 0.13 (CUDA FFI) - Async job queue with SSE progress streaming - Webhook delivery with 5x exponential backoff - Disk-persisted job state (survives restarts) - Anti-hallucination params: no_speech_thold, entropy_thold, suppress_blank - CUDA sm_75 flags: GGML_CUDA_FORCE_MMQ, GGML_CUDA_GRAPHS, GGML_CUDA_FA_ALL_QUANTS - Configurable via env: CUDA_DEVICE, WHISPER_MODEL_PATH, PORT, DATA_DIR - Gitea Actions CI: build + push to git.sal.giize.com registry - Multi-stage Dockerfile with customizable CUDA_VERSION ARG Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
53 lines
1.4 KiB
YAML
53 lines
1.4 KiB
YAML
services:
|
|
whisper:
|
|
image: whisper-rtx2080:latest
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile
|
|
args:
|
|
# ── CUDA / base image ─────────────────────────────────────
|
|
# CUDA 12.x: CUDNN_TAG = "cudnn"
|
|
# CUDA 11.x: CUDNN_TAG = "cudnn8"
|
|
CUDA_VERSION: "12.4.1"
|
|
CUDNN_TAG: "cudnn"
|
|
UBUNTU_VERSION: "22.04"
|
|
|
|
# ── GPU access (requires NVIDIA Container Toolkit on host) ───
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: 1
|
|
capabilities: [gpu]
|
|
|
|
ports:
|
|
- "8080:8080"
|
|
|
|
volumes:
|
|
# Job state — survives container restarts
|
|
- whisper-data:/data
|
|
# Model cache — avoids re-downloading large-v3 on every start
|
|
- whisper-models:/models
|
|
|
|
environment:
|
|
PORT: "8080"
|
|
RUST_LOG: "info"
|
|
DATA_DIR: "/data"
|
|
WHISPER_MODEL: "large-v3"
|
|
WHISPER_MODEL_PATH: "/models/ggml-large-v3.bin"
|
|
|
|
restart: unless-stopped
|
|
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-sf", "http://localhost:8080/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
# Give the server time to load the model on first start
|
|
start_period: 90s
|
|
|
|
volumes:
|
|
whisper-data:
|
|
whisper-models:
|