diff --git a/compose.yaml b/compose.yaml index 8178f88..f106b65 100644 --- a/compose.yaml +++ b/compose.yaml @@ -67,19 +67,26 @@ x-server: &server soft: -1 hard: -1 restart: unless-stopped - entrypoint: ["/bin/sh", "-c"] - command: | - exec /app/llama-server \ - --model "/models/$$MODEL_FILE" \ - --host 0.0.0.0 --port 8080 \ - --n-gpu-layers $$N_GPU_LAYERS \ - --ctx-size $$CTX_SIZE \ - --threads $$THREADS --threads-batch $$THREADS_BATCH \ - --batch-size $$BATCH_SIZE --ubatch-size $$UBATCH_SIZE \ - --cache-type-k $$CACHE_TYPE_K --cache-type-v $$CACHE_TYPE_V \ - --cont-batching --parallel $$PARALLEL \ - $$EXTRA_ARGS \ - --log-disable + # NOTE: command must be a list with the shell as explicit elements — do NOT use + # `entrypoint: ["/bin/sh","-c"]` + `command: |` block scalar, because Compose + # shlex-splits the block scalar into a list and Docker then passes only "exec" + # as the -c argument (the rest become $0, $1 … → instant exit 0). + entrypoint: [] + command: + - /bin/sh + - -c + - | + exec /app/llama-server \ + --model "/models/$$MODEL_FILE" \ + --host 0.0.0.0 --port 8080 \ + --n-gpu-layers $$N_GPU_LAYERS \ + --ctx-size $$CTX_SIZE \ + --threads $$THREADS --threads-batch $$THREADS_BATCH \ + --batch-size $$BATCH_SIZE --ubatch-size $$UBATCH_SIZE \ + --cache-type-k $$CACHE_TYPE_K --cache-type-v $$CACHE_TYPE_V \ + --cont-batching --parallel $$PARALLEL \ + $$EXTRA_ARGS \ + --log-disable networks: llama-net: aliases: [llama-current]