Initial commit: trueref v0.1.0-SNAPSHOT
Some checks failed
Build and publish Docker image / Build and push (push) Failing after 1m27s
Some checks failed
Build and publish Docker image / Build and push (push) Failing after 1m27s
Java 21 / Spring Boot 3.5.3 multi-module Maven project. Hybrid BM25+HNSW search with RRF, cross-encoder reranker, ONNX Runtime 1.22.0 (CPU + CUDA 12 GPU variants).
This commit is contained in:
69
Dockerfile.gpu
Normal file
69
Dockerfile.gpu
Normal file
@@ -0,0 +1,69 @@
|
||||
# ─── Build stage ──────────────────────────────────────────────────────────────
|
||||
FROM eclipse-temurin:21-jdk-jammy AS builder
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends maven \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /build
|
||||
COPY . .
|
||||
|
||||
RUN mvn -q package -DskipTests -T 1C
|
||||
|
||||
# ─── Runtime stage (NVIDIA GPU / CUDA 12 + cuDNN 9) ──────────────────────────
|
||||
# nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 ships:
|
||||
# - CUDA 12.4 runtime libs (libcuda.so, libcublas, etc.)
|
||||
# - cuDNN 9 (cu12 build) required by ONNX Runtime CUDA execution provider
|
||||
#
|
||||
# Prerequisites on the Docker host:
|
||||
# - NVIDIA GPU driver ≥ 550 (CUDA 12.4 compatible)
|
||||
# - nvidia-container-toolkit installed and configured
|
||||
#
|
||||
# Run with: docker run --gpus all --device /dev/nvidia0 ...
|
||||
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
|
||||
|
||||
LABEL org.opencontainers.image.title="TrueRef (GPU)"
|
||||
LABEL org.opencontainers.image.description="Self-hosted documentation retrieval platform for AI coding assistants (NVIDIA GPU / CUDA 12 variant)"
|
||||
LABEL org.opencontainers.image.url="https://git.sal.giize.com/mozempk/trueref"
|
||||
LABEL org.opencontainers.image.source="https://git.sal.giize.com/mozempk/trueref"
|
||||
|
||||
# Install Eclipse Temurin 21 JRE onto the CUDA base image.
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends wget apt-transport-https gnupg \
|
||||
&& wget -q -O - https://packages.adoptium.net/artifactory/api/gpg/key/public \
|
||||
| gpg --dearmor -o /usr/share/keyrings/adoptium.gpg \
|
||||
&& echo "deb [signed-by=/usr/share/keyrings/adoptium.gpg] https://packages.adoptium.net/artifactory/deb jammy main" \
|
||||
> /etc/apt/sources.list.d/adoptium.list \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends temurin-21-jre \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=builder /build/trueref-bootstrap/target/trueref.jar /app/trueref.jar
|
||||
|
||||
VOLUME /data
|
||||
|
||||
ENV TRUEREF_HOME=/data \
|
||||
TRUEREF_PORT=18080 \
|
||||
# Physical GPU index visible inside the container (0 after --gpus all remapping).
|
||||
TRUEREF_GPU=0 \
|
||||
# 0 = unbounded arena; set to e.g. 8589934592 (8 GiB) on shared hosts.
|
||||
TRUEREF_MEM_LIMIT=0 \
|
||||
JAVA_OPTS="" \
|
||||
# CUDA_DEVICE_ORDER ensures nvidia-smi numbering matches CUDA runtime numbering.
|
||||
CUDA_DEVICE_ORDER=PCI_BUS_ID
|
||||
|
||||
EXPOSE 18080
|
||||
|
||||
ENTRYPOINT ["sh", "-c", \
|
||||
"exec java \
|
||||
--enable-native-access=ALL-UNNAMED \
|
||||
--add-modules=jdk.incubator.vector \
|
||||
${JAVA_OPTS} \
|
||||
-jar /app/trueref.jar \
|
||||
--server.port=${TRUEREF_PORT} \
|
||||
--trueref.home=${TRUEREF_HOME} \
|
||||
--trueref.embedding.gpu-device-id=${TRUEREF_GPU} \
|
||||
--trueref.embedding.gpu-mem-limit-bytes=${TRUEREF_MEM_LIMIT} \
|
||||
\"$@\"", "--"]
|
||||
Reference in New Issue
Block a user