services:
  whisper:
    image: whisper-rtx2080:latest
    build:
      context: .
      dockerfile: Dockerfile
      args:
        # ── CUDA / base image ─────────────────────────────────────
        # CUDA 12.x: CUDNN_TAG = "cudnn"
        # CUDA 11.x: CUDNN_TAG = "cudnn8"
        CUDA_VERSION:  "12.4.1"
        CUDNN_TAG:     "cudnn"
        UBUNTU_VERSION: "22.04"

    # ── GPU access (requires NVIDIA Container Toolkit on host) ───
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]

    ports:
      - "8080:8080"

    volumes:
      # Job state — survives container restarts
      - whisper-data:/data
      # Model cache — avoids re-downloading large-v3 on every start
      - whisper-models:/models

    environment:
      PORT:                "8080"
      RUST_LOG:            "info"
      DATA_DIR:            "/data"
      WHISPER_MODEL:       "large-v3"
      WHISPER_MODEL_PATH:  "/models/ggml-large-v3.bin"

    restart: unless-stopped

    healthcheck:
      test:     ["CMD", "curl", "-sf", "http://localhost:8080/health"]
      interval: 30s
      timeout:  10s
      retries:  3
      # Give the server time to load the model on first start
      start_period: 90s

volumes:
  whisper-data:
  whisper-models: