# ============================================================ # whisper-rtx2080 — Multi-stage Dockerfile # Optimised for NVIDIA RTX 2080 (Turing, sm_75, 8 GB VRAM) # ============================================================ # # Build-arg reference: # # CUDA_VERSION CUDA toolkit version default: 12.4.1 # CUDNN_TAG cuDNN tag suffix default: cudnn # (CUDA 12.x → "cudnn", CUDA 11.x → "cudnn8") # UBUNTU_VERSION Ubuntu base version default: 22.04 # # Examples: # docker build -t whisper-rtx2080 . # docker build --build-arg CUDA_VERSION=12.1.0 --build-arg CUDNN_TAG=cudnn8 -t whisper-rtx2080:cu121 . # docker build --build-arg CUDA_VERSION=11.8.0 --build-arg CUDNN_TAG=cudnn8 --build-arg UBUNTU_VERSION=20.04 -t whisper-rtx2080:cu118 . ARG CUDA_VERSION=12.4.1 ARG CUDNN_TAG=cudnn ARG UBUNTU_VERSION=22.04 # ╔══════════════════════════════════════════════════════════╗ # ║ STAGE 1 — builder ║ # ║ Full CUDA devel image + Rust toolchain ║ # ║ Compiles whisper.cpp (CUDA kernels) + Rust binary ║ # ╚══════════════════════════════════════════════════════════╝ FROM nvidia/cuda:${CUDA_VERSION}-${CUDNN_TAG}-devel-ubuntu${UBUNTU_VERSION} AS builder ARG CUDA_VERSION=12.4.1 ENV DEBIAN_FRONTEND=noninteractive # ── System build dependencies ──────────────────────────────────────────────── RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ cmake \ git \ curl \ pkg-config \ libclang-dev \ clang \ ca-certificates \ # ffmpeg headers (not strictly needed at build time, but avoids surprises) libavformat-dev \ libavcodec-dev \ && rm -rf /var/lib/apt/lists/* # ── Rust toolchain ─────────────────────────────────────────────────────────── ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ PATH=/usr/local/cargo/bin:$PATH RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \ | sh -s -- -y --default-toolchain stable --profile minimal \ && rustup component add rustfmt # ── Clone whisper.cpp (whisper-rs pins a specific commit via its build.rs) ── # whisper-rs downloads and builds whisper.cpp automatically via its build script. # We only need to ensure the CUDA flags are forwarded through env vars. # ── CUDA architecture flags for RTX 2080 (sm_75) ──────────────────────────── # These are picked up by whisper-rs's build.rs when it invokes cmake internally. ENV GGML_CUDA=ON \ CMAKE_CUDA_ARCHITECTURES=75 \ GGML_CUDA_FORCE_MMQ=ON \ GGML_CUDA_GRAPHS=ON \ GGML_CUDA_FA_ALL_QUANTS=ON \ GGML_CUDA_F16=ON \ # Tell whisper-rs / cmake where nvcc lives CUDA_PATH=/usr/local/cuda \ LIBCLANG_PATH=/usr/lib/llvm-14/lib # ── Copy source and build ──────────────────────────────────────────────────── WORKDIR /build COPY Cargo.toml ./ COPY src/ ./src/ # Build in release mode — LTO + single codegen unit (see Cargo.toml profile) RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/build/target \ cargo build --release \ && cp target/release/whisper-server /usr/local/bin/whisper-server # ╔══════════════════════════════════════════════════════════╗ # ║ STAGE 1b — tester ║ # ║ Runs unit tests against the release build artifacts ║ # ║ Uses CUDA stubs so tests run without a physical GPU ║ # ║ ║ # ║ Usage: ║ # ║ docker build --target tester . ║ # ╚══════════════════════════════════════════════════════════╝ FROM builder AS tester # libcuda.so.1 stub — satisfies the dynamic linker without a real driver RUN ln -sf /usr/local/cuda/lib64/stubs/libcuda.so \ /usr/local/cuda/lib64/stubs/libcuda.so.1 # Reuse the same cache mounts so no recompilation is needed RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/build/target \ LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs \ cargo test --release # ╔══════════════════════════════════════════════════════════╗ # ║ STAGE 2 — runtime ║ # ║ Minimal CUDA runtime image — no build tools ║ # ╚══════════════════════════════════════════════════════════╝ FROM nvidia/cuda:${CUDA_VERSION}-${CUDNN_TAG}-runtime-ubuntu${UBUNTU_VERSION} ARG CUDA_VERSION=12.4.1 ENV DEBIAN_FRONTEND=noninteractive # ── Runtime dependencies only ──────────────────────────────────────────────── RUN apt-get update && apt-get install -y --no-install-recommends \ ffmpeg \ ca-certificates \ && rm -rf /var/lib/apt/lists/* # ── NVIDIA container runtime ───────────────────────────────────────────────── ENV NVIDIA_VISIBLE_DEVICES=all \ NVIDIA_DRIVER_CAPABILITIES=compute,utility \ CUDA_DEVICE_ORDER=PCI_BUS_ID # ── CTranslate2 / GGML VRAM tuning for RTX 2080 ───────────────────────────── # Limit CUDA allocator chunk size to avoid fragmenting the 8 GB pool. ENV GGML_CUDA_NO_VMM=0 # ── Application defaults (all overridable at runtime) ──────────────────────── ENV PORT=8080 \ RUST_LOG=info \ DATA_DIR=/data \ WHISPER_MODEL=large-v3 \ WHISPER_MODEL_PATH=/models/ggml-large-v3.bin # ── Binary ─────────────────────────────────────────────────────────────────── COPY --from=builder /usr/local/bin/whisper-server /app/whisper-server RUN chmod +x /app/whisper-server # ── Volumes & ports ────────────────────────────────────────────────────────── RUN mkdir -p /data /models VOLUME ["/data", "/models"] EXPOSE 8080 HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ CMD curl -sf http://localhost:${PORT}/health || exit 1 ENTRYPOINT ["/app/whisper-server"]