All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 11m13s
- Pure Rust: Axum 0.7 + whisper-rs 0.13 (CUDA FFI) - Async job queue with SSE progress streaming - Webhook delivery with 5x exponential backoff - Disk-persisted job state (survives restarts) - Anti-hallucination params: no_speech_thold, entropy_thold, suppress_blank - CUDA sm_75 flags: GGML_CUDA_FORCE_MMQ, GGML_CUDA_GRAPHS, GGML_CUDA_FA_ALL_QUANTS - Configurable via env: CUDA_DEVICE, WHISPER_MODEL_PATH, PORT, DATA_DIR - Gitea Actions CI: build + push to git.sal.giize.com registry - Multi-stage Dockerfile with customizable CUDA_VERSION ARG Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
130 lines
6.3 KiB
Docker
130 lines
6.3 KiB
Docker
# ============================================================
|
|
# whisper-rtx2080 — Multi-stage Dockerfile
|
|
# Optimised for NVIDIA RTX 2080 (Turing, sm_75, 8 GB VRAM)
|
|
# ============================================================
|
|
#
|
|
# Build-arg reference:
|
|
#
|
|
# CUDA_VERSION CUDA toolkit version default: 12.4.1
|
|
# CUDNN_TAG cuDNN tag suffix default: cudnn
|
|
# (CUDA 12.x → "cudnn", CUDA 11.x → "cudnn8")
|
|
# UBUNTU_VERSION Ubuntu base version default: 22.04
|
|
#
|
|
# Examples:
|
|
# docker build -t whisper-rtx2080 .
|
|
# docker build --build-arg CUDA_VERSION=12.1.0 --build-arg CUDNN_TAG=cudnn8 -t whisper-rtx2080:cu121 .
|
|
# docker build --build-arg CUDA_VERSION=11.8.0 --build-arg CUDNN_TAG=cudnn8 --build-arg UBUNTU_VERSION=20.04 -t whisper-rtx2080:cu118 .
|
|
|
|
ARG CUDA_VERSION=12.4.1
|
|
ARG CUDNN_TAG=cudnn
|
|
ARG UBUNTU_VERSION=22.04
|
|
|
|
# ╔══════════════════════════════════════════════════════════╗
|
|
# ║ STAGE 1 — builder ║
|
|
# ║ Full CUDA devel image + Rust toolchain ║
|
|
# ║ Compiles whisper.cpp (CUDA kernels) + Rust binary ║
|
|
# ╚══════════════════════════════════════════════════════════╝
|
|
FROM nvidia/cuda:${CUDA_VERSION}-${CUDNN_TAG}-devel-ubuntu${UBUNTU_VERSION} AS builder
|
|
|
|
ARG CUDA_VERSION=12.4.1
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
# ── System build dependencies ────────────────────────────────────────────────
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
cmake \
|
|
git \
|
|
curl \
|
|
pkg-config \
|
|
libclang-dev \
|
|
clang \
|
|
ca-certificates \
|
|
# ffmpeg headers (not strictly needed at build time, but avoids surprises)
|
|
libavformat-dev \
|
|
libavcodec-dev \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# ── Rust toolchain ───────────────────────────────────────────────────────────
|
|
ENV RUSTUP_HOME=/usr/local/rustup \
|
|
CARGO_HOME=/usr/local/cargo \
|
|
PATH=/usr/local/cargo/bin:$PATH
|
|
|
|
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
|
|
| sh -s -- -y --default-toolchain stable --profile minimal \
|
|
&& rustup component add rustfmt
|
|
|
|
# ── Clone whisper.cpp (whisper-rs pins a specific commit via its build.rs) ──
|
|
# whisper-rs downloads and builds whisper.cpp automatically via its build script.
|
|
# We only need to ensure the CUDA flags are forwarded through env vars.
|
|
|
|
# ── CUDA architecture flags for RTX 2080 (sm_75) ────────────────────────────
|
|
# These are picked up by whisper-rs's build.rs when it invokes cmake internally.
|
|
ENV GGML_CUDA=ON \
|
|
CMAKE_CUDA_ARCHITECTURES=75 \
|
|
GGML_CUDA_FORCE_MMQ=ON \
|
|
GGML_CUDA_GRAPHS=ON \
|
|
GGML_CUDA_FA_ALL_QUANTS=ON \
|
|
GGML_CUDA_F16=ON \
|
|
# Tell whisper-rs / cmake where nvcc lives
|
|
CUDA_PATH=/usr/local/cuda \
|
|
LIBCLANG_PATH=/usr/lib/llvm-14/lib
|
|
|
|
# ── Copy source and build ────────────────────────────────────────────────────
|
|
WORKDIR /build
|
|
COPY Cargo.toml ./
|
|
COPY src/ ./src/
|
|
|
|
# Build in release mode — LTO + single codegen unit (see Cargo.toml profile)
|
|
RUN --mount=type=cache,target=/usr/local/cargo/registry \
|
|
--mount=type=cache,target=/build/target \
|
|
cargo build --release \
|
|
&& cp target/release/whisper-server /usr/local/bin/whisper-server
|
|
|
|
|
|
# ╔══════════════════════════════════════════════════════════╗
|
|
# ║ STAGE 2 — runtime ║
|
|
# ║ Minimal CUDA runtime image — no build tools ║
|
|
# ╚══════════════════════════════════════════════════════════╝
|
|
FROM nvidia/cuda:${CUDA_VERSION}-${CUDNN_TAG}-runtime-ubuntu${UBUNTU_VERSION}
|
|
|
|
ARG CUDA_VERSION=12.4.1
|
|
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
# ── Runtime dependencies only ────────────────────────────────────────────────
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
ffmpeg \
|
|
ca-certificates \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# ── NVIDIA container runtime ─────────────────────────────────────────────────
|
|
ENV NVIDIA_VISIBLE_DEVICES=all \
|
|
NVIDIA_DRIVER_CAPABILITIES=compute,utility \
|
|
CUDA_DEVICE_ORDER=PCI_BUS_ID
|
|
|
|
# ── CTranslate2 / GGML VRAM tuning for RTX 2080 ─────────────────────────────
|
|
# Limit CUDA allocator chunk size to avoid fragmenting the 8 GB pool.
|
|
ENV GGML_CUDA_NO_VMM=0
|
|
|
|
# ── Application defaults (all overridable at runtime) ────────────────────────
|
|
ENV PORT=8080 \
|
|
RUST_LOG=info \
|
|
DATA_DIR=/data \
|
|
WHISPER_MODEL=large-v3 \
|
|
WHISPER_MODEL_PATH=/models/ggml-large-v3.bin
|
|
|
|
# ── Binary ───────────────────────────────────────────────────────────────────
|
|
COPY --from=builder /usr/local/bin/whisper-server /app/whisper-server
|
|
RUN chmod +x /app/whisper-server
|
|
|
|
# ── Volumes & ports ──────────────────────────────────────────────────────────
|
|
RUN mkdir -p /data /models
|
|
VOLUME ["/data", "/models"]
|
|
EXPOSE 8080
|
|
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
|
CMD curl -sf http://localhost:${PORT}/health || exit 1
|
|
|
|
ENTRYPOINT ["/app/whisper-server"]
|