llama+compose: fix bigctx startup timing
- compose: increase start_period for bigctx services - gemma4-e4b-bigctx: 60s -> 150s (5 GiB model + warmup + 163840 ctx takes ~90-120s) - gemma4-e2b-bigctx: 60s -> 120s (large ctx 393216 allocation) - smollm3/qwen3-4b bigctx: 60s -> 90s - llama: extend health poll from 30x2s=60s to 75x2s=150s - llama: require 3 consecutive unhealthy before giving up (avoids false positives during Docker start_period window)
This commit is contained in:
@@ -183,7 +183,7 @@ services:
|
|||||||
env_file: envs/.env.smollm3-3b-bigctx
|
env_file: envs/.env.smollm3-3b-bigctx
|
||||||
healthcheck:
|
healthcheck:
|
||||||
<<: *hc
|
<<: *hc
|
||||||
start_period: 60s
|
start_period: 90s
|
||||||
|
|
||||||
llama-gemma4-e2b-bigctx:
|
llama-gemma4-e2b-bigctx:
|
||||||
image: local/llama-cpp-turboquant:server-cuda-sm75-mmq
|
image: local/llama-cpp-turboquant:server-cuda-sm75-mmq
|
||||||
@@ -192,7 +192,7 @@ services:
|
|||||||
env_file: envs/.env.gemma4-e2b-bigctx
|
env_file: envs/.env.gemma4-e2b-bigctx
|
||||||
healthcheck:
|
healthcheck:
|
||||||
<<: *hc
|
<<: *hc
|
||||||
start_period: 60s
|
start_period: 120s # large ctx (393216) allocation takes extra time
|
||||||
|
|
||||||
llama-gemma4-e4b-bigctx:
|
llama-gemma4-e4b-bigctx:
|
||||||
image: local/llama-cpp-turboquant:server-cuda-sm75-mmq
|
image: local/llama-cpp-turboquant:server-cuda-sm75-mmq
|
||||||
@@ -201,7 +201,7 @@ services:
|
|||||||
env_file: envs/.env.gemma4-e4b-bigctx
|
env_file: envs/.env.gemma4-e4b-bigctx
|
||||||
healthcheck:
|
healthcheck:
|
||||||
<<: *hc
|
<<: *hc
|
||||||
start_period: 60s
|
start_period: 150s # 5 GiB model + warmup + 163840 ctx takes ~90-120s
|
||||||
|
|
||||||
llama-qwen3-4b-bigctx:
|
llama-qwen3-4b-bigctx:
|
||||||
image: local/llama-cpp-turboquant:server-cuda-sm75-mmq
|
image: local/llama-cpp-turboquant:server-cuda-sm75-mmq
|
||||||
@@ -210,7 +210,7 @@ services:
|
|||||||
env_file: envs/.env.qwen3-4b-bigctx
|
env_file: envs/.env.qwen3-4b-bigctx
|
||||||
healthcheck:
|
healthcheck:
|
||||||
<<: *hc
|
<<: *hc
|
||||||
start_period: 60s
|
start_period: 90s
|
||||||
|
|
||||||
# ── OPEN WEBUI ─────────────────────────────────────────────────────────────
|
# ── OPEN WEBUI ─────────────────────────────────────────────────────────────
|
||||||
# Separate profile — add to any running model:
|
# Separate profile — add to any running model:
|
||||||
|
|||||||
16
llama
16
llama
@@ -165,21 +165,33 @@ cmd_start() {
|
|||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
info "Waiting for health check..."
|
info "Waiting for health check..."
|
||||||
|
# bigctx / heavy models can take >2 min to load: poll up to 150s
|
||||||
|
local max_polls=75
|
||||||
local i=0
|
local i=0
|
||||||
while [[ $i -lt 30 ]]; do
|
local consecutive_unhealthy=0
|
||||||
|
while [[ $i -lt $max_polls ]]; do
|
||||||
local status
|
local status
|
||||||
status=$(docker inspect llama_server --format '{{.State.Health.Status}}' 2>/dev/null || echo "starting")
|
status=$(docker inspect llama_server --format '{{.State.Health.Status}}' 2>/dev/null || echo "starting")
|
||||||
if [[ "$status" == "healthy" ]]; then
|
if [[ "$status" == "healthy" ]]; then
|
||||||
|
echo ""
|
||||||
ok "Server is healthy → http://localhost:8080"
|
ok "Server is healthy → http://localhost:8080"
|
||||||
[[ "$webui" == "1" ]] && ok "Open WebUI → http://localhost:3000"
|
[[ "$webui" == "1" ]] && ok "Open WebUI → http://localhost:3000"
|
||||||
return 0
|
return 0
|
||||||
elif [[ "$status" == "unhealthy" ]]; then
|
elif [[ "$status" == "unhealthy" ]]; then
|
||||||
|
(( consecutive_unhealthy++ )) || true
|
||||||
|
# Only give up after 3 consecutive unhealthy — avoids false positives
|
||||||
|
# during start_period when Docker hasn't run healthchecks yet
|
||||||
|
if [[ $consecutive_unhealthy -ge 3 ]]; then
|
||||||
|
echo ""
|
||||||
warn "Server reported unhealthy. Check logs: ./llama logs"
|
warn "Server reported unhealthy. Check logs: ./llama logs"
|
||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
|
else
|
||||||
|
consecutive_unhealthy=0
|
||||||
|
fi
|
||||||
echo -n "."
|
echo -n "."
|
||||||
sleep 2
|
sleep 2
|
||||||
(( i++ ))
|
(( i++ )) || true
|
||||||
done
|
done
|
||||||
echo ""
|
echo ""
|
||||||
warn "Still starting (health check pending). Try: ./llama status"
|
warn "Still starting (health check pending). Try: ./llama status"
|
||||||
|
|||||||
Reference in New Issue
Block a user