diff --git a/compose.yaml b/compose.yaml
index 8178f88..f106b65 100644
--- a/compose.yaml
+++ b/compose.yaml
@@ -67,19 +67,26 @@ x-server: &server
       soft: -1
       hard: -1
   restart: unless-stopped
-  entrypoint: ["/bin/sh", "-c"]
-  command: |
-    exec /app/llama-server \
-      --model "/models/$$MODEL_FILE" \
-      --host 0.0.0.0 --port 8080 \
-      --n-gpu-layers $$N_GPU_LAYERS \
-      --ctx-size $$CTX_SIZE \
-      --threads $$THREADS --threads-batch $$THREADS_BATCH \
-      --batch-size $$BATCH_SIZE --ubatch-size $$UBATCH_SIZE \
-      --cache-type-k $$CACHE_TYPE_K --cache-type-v $$CACHE_TYPE_V \
-      --cont-batching --parallel $$PARALLEL \
-      $$EXTRA_ARGS \
-      --log-disable
+  # NOTE: command must be a list with the shell as explicit elements — do NOT use
+  # `entrypoint: ["/bin/sh","-c"]` + `command: |` block scalar, because Compose
+  # shlex-splits the block scalar into a list and Docker then passes only "exec"
+  # as the -c argument (the rest become $0, $1 … → instant exit 0).
+  entrypoint: []
+  command:
+    - /bin/sh
+    - -c
+    - |
+      exec /app/llama-server \
+        --model "/models/$$MODEL_FILE" \
+        --host 0.0.0.0 --port 8080 \
+        --n-gpu-layers $$N_GPU_LAYERS \
+        --ctx-size $$CTX_SIZE \
+        --threads $$THREADS --threads-batch $$THREADS_BATCH \
+        --batch-size $$BATCH_SIZE --ubatch-size $$UBATCH_SIZE \
+        --cache-type-k $$CACHE_TYPE_K --cache-type-v $$CACHE_TYPE_V \
+        --cont-batching --parallel $$PARALLEL \
+        $$EXTRA_ARGS \
+        --log-disable
   networks:
     llama-net:
       aliases: [llama-current]