Files
trueref/Dockerfile.gpu
moze c5f950c2c0
Some checks failed
Build and publish Docker image / Build and push (push) Failing after 1m27s
Initial commit: trueref v0.1.0-SNAPSHOT
Java 21 / Spring Boot 3.5.3 multi-module Maven project.
Hybrid BM25+HNSW search with RRF, cross-encoder reranker,
ONNX Runtime 1.22.0 (CPU + CUDA 12 GPU variants).
2026-05-06 00:49:16 +02:00

70 lines
2.8 KiB
Docker

# ─── Build stage ──────────────────────────────────────────────────────────────
FROM eclipse-temurin:21-jdk-jammy AS builder
RUN apt-get update \
&& apt-get install -y --no-install-recommends maven \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /build
COPY . .
RUN mvn -q package -DskipTests -T 1C
# ─── Runtime stage (NVIDIA GPU / CUDA 12 + cuDNN 9) ──────────────────────────
# nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 ships:
# - CUDA 12.4 runtime libs (libcuda.so, libcublas, etc.)
# - cuDNN 9 (cu12 build) required by ONNX Runtime CUDA execution provider
#
# Prerequisites on the Docker host:
# - NVIDIA GPU driver ≥ 550 (CUDA 12.4 compatible)
# - nvidia-container-toolkit installed and configured
#
# Run with: docker run --gpus all --device /dev/nvidia0 ...
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
LABEL org.opencontainers.image.title="TrueRef (GPU)"
LABEL org.opencontainers.image.description="Self-hosted documentation retrieval platform for AI coding assistants (NVIDIA GPU / CUDA 12 variant)"
LABEL org.opencontainers.image.url="https://git.sal.giize.com/mozempk/trueref"
LABEL org.opencontainers.image.source="https://git.sal.giize.com/mozempk/trueref"
# Install Eclipse Temurin 21 JRE onto the CUDA base image.
RUN apt-get update \
&& apt-get install -y --no-install-recommends wget apt-transport-https gnupg \
&& wget -q -O - https://packages.adoptium.net/artifactory/api/gpg/key/public \
| gpg --dearmor -o /usr/share/keyrings/adoptium.gpg \
&& echo "deb [signed-by=/usr/share/keyrings/adoptium.gpg] https://packages.adoptium.net/artifactory/deb jammy main" \
> /etc/apt/sources.list.d/adoptium.list \
&& apt-get update \
&& apt-get install -y --no-install-recommends temurin-21-jre \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY --from=builder /build/trueref-bootstrap/target/trueref.jar /app/trueref.jar
VOLUME /data
ENV TRUEREF_HOME=/data \
TRUEREF_PORT=18080 \
# Physical GPU index visible inside the container (0 after --gpus all remapping).
TRUEREF_GPU=0 \
# 0 = unbounded arena; set to e.g. 8589934592 (8 GiB) on shared hosts.
TRUEREF_MEM_LIMIT=0 \
JAVA_OPTS="" \
# CUDA_DEVICE_ORDER ensures nvidia-smi numbering matches CUDA runtime numbering.
CUDA_DEVICE_ORDER=PCI_BUS_ID
EXPOSE 18080
ENTRYPOINT ["sh", "-c", \
"exec java \
--enable-native-access=ALL-UNNAMED \
--add-modules=jdk.incubator.vector \
${JAVA_OPTS} \
-jar /app/trueref.jar \
--server.port=${TRUEREF_PORT} \
--trueref.home=${TRUEREF_HOME} \
--trueref.embedding.gpu-device-id=${TRUEREF_GPU} \
--trueref.embedding.gpu-mem-limit-bytes=${TRUEREF_MEM_LIMIT} \
\"$@\"", "--"]