From 322364e6fc7e2fd524412a3e7810e0db5226b017 Mon Sep 17 00:00:00 2001 From: Giancarmine Salucci Date: Wed, 6 May 2026 23:31:12 +0200 Subject: [PATCH] compose: fix server command structure (critical bug) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compose shlex-splits 'command: |' block scalar into a list when used with 'entrypoint: ["/bin/sh","-c"]'. Docker then runs '/bin/sh -c exec' where 'exec' is the only -c argument and '/app/llama-server' becomes $0. 'exec' with no program in sh exits 0 immediately → 37-restart crash-loop, no server. Fix: use 'entrypoint: []' and 'command: [/bin/sh, -c, <|block>]' so the full shell command is passed as a single list element — not further split by Compose. --- compose.yaml | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/compose.yaml b/compose.yaml index 8178f88..f106b65 100644 --- a/compose.yaml +++ b/compose.yaml @@ -67,19 +67,26 @@ x-server: &server soft: -1 hard: -1 restart: unless-stopped - entrypoint: ["/bin/sh", "-c"] - command: | - exec /app/llama-server \ - --model "/models/$$MODEL_FILE" \ - --host 0.0.0.0 --port 8080 \ - --n-gpu-layers $$N_GPU_LAYERS \ - --ctx-size $$CTX_SIZE \ - --threads $$THREADS --threads-batch $$THREADS_BATCH \ - --batch-size $$BATCH_SIZE --ubatch-size $$UBATCH_SIZE \ - --cache-type-k $$CACHE_TYPE_K --cache-type-v $$CACHE_TYPE_V \ - --cont-batching --parallel $$PARALLEL \ - $$EXTRA_ARGS \ - --log-disable + # NOTE: command must be a list with the shell as explicit elements — do NOT use + # `entrypoint: ["/bin/sh","-c"]` + `command: |` block scalar, because Compose + # shlex-splits the block scalar into a list and Docker then passes only "exec" + # as the -c argument (the rest become $0, $1 … → instant exit 0). + entrypoint: [] + command: + - /bin/sh + - -c + - | + exec /app/llama-server \ + --model "/models/$$MODEL_FILE" \ + --host 0.0.0.0 --port 8080 \ + --n-gpu-layers $$N_GPU_LAYERS \ + --ctx-size $$CTX_SIZE \ + --threads $$THREADS --threads-batch $$THREADS_BATCH \ + --batch-size $$BATCH_SIZE --ubatch-size $$UBATCH_SIZE \ + --cache-type-k $$CACHE_TYPE_K --cache-type-v $$CACHE_TYPE_V \ + --cont-batching --parallel $$PARALLEL \ + $$EXTRA_ARGS \ + --log-disable networks: llama-net: aliases: [llama-current]