All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 8m41s
- Model starts unloaded (lazy); loads on first job or POST /model/load
- Auto-unloads after IDLE_TIMEOUT_SECS (default 300) of inactivity
- POST /model/unload for immediate manual release
- GPU-busy detection: on VRAM OOM, enters WaitingForGpu and retries
every GPU_POLL_INTERVAL_SECS (default 30) indefinitely
- POST /jobs when unloaded → 503 + Retry-After header, triggers load
- AppError::OutOfMemory and AppError::ModelNotReady variants
- WorkerCmd channel (SyncSender<WorkerCmd>) replaces bare tx_req channel
- Idle timer via recv_timeout(1s) tick inside OS thread (no extra thread)
- Model lifecycle events broadcast via tokio broadcast channel (SSE + webhooks)
- webhook_registry: all clients that ever submitted a webhook_url receive
model_ready and model_unloaded webhooks
- GPU warmup retained on every (re)load
New routes:
GET /model/status — current state + VRAM stats
POST /model/load — trigger load (idempotent)
POST /model/unload — immediate unload
GET /model/events — SSE stream of model lifecycle events
New env vars:
IDLE_TIMEOUT_SECS (default 300)
GPU_POLL_INTERVAL_SECS (default 30)
Tests:
tests/test_model_lifecycle.sh — 18 integration tests (full state machine,
SSE events, webhooks, concurrency, unload-during-load)
tests/test_idle_timeout.sh — 5 tests with short IDLE_TIMEOUT_SECS=5
test_all.sh updated: loads model before job submission, asserts
model_state in /health, adds POST /model/unload at end
Docs:
docs/USAGE.md: model lifecycle section, new env vars, 503 retry pattern,
updated /health response shape
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
247 lines
8.3 KiB
Bash
Executable File
247 lines
8.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# tests/test_idle_timeout.sh
|
|
#
|
|
# Integration tests for the idle-timeout auto-unload feature.
|
|
# REQUIRES the server to be started with a short idle timeout:
|
|
#
|
|
# IDLE_TIMEOUT_SECS=5 ./whisper-server
|
|
# # or via Docker:
|
|
# docker run -e IDLE_TIMEOUT_SECS=5 ...
|
|
#
|
|
# The default idle timeout is 5 minutes; these tests use a 5-second window
|
|
# to keep the suite fast.
|
|
|
|
set -euo pipefail
|
|
|
|
BASE="${WHISPER_BASE_URL:-http://localhost:8080}"
|
|
IDLE_TIMEOUT="${EXPECTED_IDLE_TIMEOUT_SECS:-5}"
|
|
AUDIO="${TEST_AUDIO:-}"
|
|
|
|
GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[0;33m'; NC='\033[0m'
|
|
|
|
PASS=0; FAIL=0
|
|
|
|
ok() { echo -e "${GREEN}[PASS]${NC} $1"; PASS=$((PASS+1)); }
|
|
fail() { echo -e "${RED}[FAIL]${NC} $1"; FAIL=$((FAIL+1)); }
|
|
skip() { echo -e "${YELLOW}[SKIP]${NC} $1"; }
|
|
info() { echo " $1"; }
|
|
|
|
echo "=== Idle Timeout Tests ==="
|
|
echo " BASE: $BASE"
|
|
echo " IDLE_TIMEOUT_SECS: $IDLE_TIMEOUT (must be configured on the server)"
|
|
echo ""
|
|
echo "NOTE: These tests require the server to be running with IDLE_TIMEOUT_SECS=$IDLE_TIMEOUT"
|
|
echo ""
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
|
|
|
get_state() {
|
|
curl -sf "$BASE/model/status" | python3 -c "import sys,json; print(json.load(sys.stdin)['state'])"
|
|
}
|
|
|
|
ensure_ready() {
|
|
local state
|
|
state=$(get_state)
|
|
if [ "$state" = "ready" ]; then return 0; fi
|
|
curl -sf -X POST "$BASE/model/load" > /dev/null
|
|
local elapsed=0
|
|
while true; do
|
|
sleep 3; elapsed=$((elapsed+3))
|
|
state=$(get_state)
|
|
[ "$state" = "ready" ] && return 0
|
|
[ $elapsed -gt 180 ] && return 1
|
|
done
|
|
}
|
|
|
|
ensure_unloaded() {
|
|
curl -sf -X POST "$BASE/model/unload" > /dev/null || true
|
|
sleep 2
|
|
}
|
|
|
|
# ── TEST 1: Load model, complete a job, then wait for idle unload ─────────────
|
|
echo "--- Test 1: Idle timeout triggers auto-unload ---"
|
|
|
|
ensure_unloaded
|
|
ensure_ready || { fail "T1: model load failed"; }
|
|
|
|
WAIT_SECS=$((IDLE_TIMEOUT + 3))
|
|
info "Model is ready. Waiting $WAIT_SECS seconds (idle timeout=$IDLE_TIMEOUT + 3s buffer)..."
|
|
sleep $WAIT_SECS
|
|
|
|
STATE=$(get_state)
|
|
if [ "$STATE" = "unloaded" ]; then
|
|
ok "T1: model auto-unloaded after ${IDLE_TIMEOUT}s idle"
|
|
else
|
|
fail "T1: expected unloaded after idle timeout, got $STATE"
|
|
info "Is the server running with IDLE_TIMEOUT_SECS=$IDLE_TIMEOUT?"
|
|
fi
|
|
|
|
# ── TEST 2: model_unloaded webhook fires on idle timeout ─────────────────────
|
|
echo ""
|
|
echo "--- Test 2: model_unloaded webhook fires on idle timeout ---"
|
|
|
|
ensure_unloaded
|
|
|
|
# Start webhook receiver
|
|
python3 - <<'PYEOF' &
|
|
import http.server, json, sys, signal
|
|
|
|
class H(http.server.BaseHTTPRequestHandler):
|
|
def do_POST(self):
|
|
n = int(self.headers.get('Content-Length', 0))
|
|
body = json.loads(self.rfile.read(n))
|
|
with open('/tmp/idle_wh_event.json', 'w') as f:
|
|
json.dump(body, f)
|
|
self.send_response(200); self.end_headers()
|
|
def log_message(self, *a): pass
|
|
|
|
signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
|
|
http.server.HTTPServer(('', 9995), H).serve_forever()
|
|
PYEOF
|
|
WH_PID=$!
|
|
sleep 1
|
|
|
|
# Register webhook via a job submission (will 503 since unloaded)
|
|
curl -sf -X POST "$BASE/jobs" \
|
|
-F "audio=@/dev/urandom;type=audio/wav" \
|
|
-F "webhook_url=http://localhost:9995/wh" \
|
|
--max-time 5 > /dev/null 2>&1 || true
|
|
|
|
# Load model
|
|
ensure_ready || { fail "T2: model load failed"; kill $WH_PID 2>/dev/null; }
|
|
|
|
# Wait for idle timeout
|
|
WAIT_SECS=$((IDLE_TIMEOUT + 5))
|
|
info "Waiting ${WAIT_SECS}s for idle timeout..."
|
|
sleep $WAIT_SECS
|
|
|
|
kill $WH_PID 2>/dev/null || true
|
|
wait $WH_PID 2>/dev/null || true
|
|
|
|
if [ -f /tmp/idle_wh_event.json ]; then
|
|
EVENT_TYPE=$(python3 -c "import json; print(json.load(open('/tmp/idle_wh_event.json')).get('type','?'))")
|
|
rm -f /tmp/idle_wh_event.json
|
|
[ "$EVENT_TYPE" = "model_unloaded" ] && ok "T2: model_unloaded webhook fired on idle timeout" \
|
|
|| fail "T2: webhook type=$EVENT_TYPE (expected model_unloaded)"
|
|
else
|
|
fail "T2: no webhook received within timeout"
|
|
fi
|
|
|
|
# ── TEST 3: Job submission after idle timeout → 503 → triggers reload ─────────
|
|
echo ""
|
|
echo "--- Test 3: Job triggers reload after idle unload ---"
|
|
|
|
ensure_unloaded
|
|
ensure_ready || { fail "T3: initial load failed"; }
|
|
|
|
# Wait for auto-unload
|
|
WAIT_SECS=$((IDLE_TIMEOUT + 3))
|
|
info "Waiting ${WAIT_SECS}s for idle unload..."
|
|
sleep $WAIT_SECS
|
|
|
|
STATE=$(get_state)
|
|
[ "$STATE" = "unloaded" ] || info "Note: state=$STATE (expected unloaded)"
|
|
|
|
# Submit job → 503, triggers reload
|
|
HTTP=$(curl -s -o /tmp/t3_body.json -w "%{http_code}" -X POST "$BASE/jobs" \
|
|
-F "audio=@/dev/urandom;type=audio/wav" \
|
|
--max-time 5 2>/dev/null || echo "000")
|
|
|
|
if [ "$HTTP" = "503" ]; then
|
|
ok "T3a: POST /jobs → 503 after idle unload"
|
|
else
|
|
skip "T3a: POST /jobs returned $HTTP (model may have reloaded)"
|
|
fi
|
|
|
|
# State should be loading or ready (reload triggered by job submission)
|
|
sleep 2
|
|
STATE=$(get_state)
|
|
if [ "$STATE" = "loading" ] || [ "$STATE" = "ready" ]; then
|
|
ok "T3b: reload triggered by job submission ($STATE)"
|
|
else
|
|
fail "T3b: expected loading/ready, got $STATE"
|
|
fi
|
|
|
|
rm -f /tmp/t3_body.json
|
|
|
|
# ── TEST 4: Idle timer resets per job (wait 60% of timeout → still ready) ─────
|
|
echo ""
|
|
echo "--- Test 4: Idle timer resets with each completed job ---"
|
|
|
|
ensure_unloaded
|
|
ensure_ready || { fail "T4: model load failed"; }
|
|
|
|
HALF_WAIT=$((IDLE_TIMEOUT - 1))
|
|
info "Waiting ${HALF_WAIT}s (less than idle timeout)..."
|
|
sleep $HALF_WAIT
|
|
|
|
STATE=$(get_state)
|
|
if [ "$STATE" = "ready" ]; then
|
|
ok "T4a: model still ready after ${HALF_WAIT}s (less than ${IDLE_TIMEOUT}s timeout)"
|
|
else
|
|
fail "T4a: model unexpectedly $STATE after only ${HALF_WAIT}s"
|
|
fi
|
|
|
|
# Wait for full unload
|
|
REMAINING=$((IDLE_TIMEOUT - HALF_WAIT + 3))
|
|
info "Waiting another ${REMAINING}s for full idle unload..."
|
|
sleep $REMAINING
|
|
STATE=$(get_state)
|
|
[ "$STATE" = "unloaded" ] && ok "T4b: model unloaded after total > ${IDLE_TIMEOUT}s idle" \
|
|
|| fail "T4b: expected unloaded, got $STATE"
|
|
|
|
# ── TEST 5: Job resets idle timer ─────────────────────────────────────────────
|
|
echo ""
|
|
echo "--- Test 5: Completing a job resets the idle timer ---"
|
|
|
|
if [ -z "$AUDIO" ]; then
|
|
skip "T5: TEST_AUDIO not set — skipping timer-reset test"
|
|
else
|
|
ensure_unloaded
|
|
ensure_ready || { fail "T5: model load failed"; }
|
|
|
|
# Submit a job
|
|
SUBMIT=$(curl -sf -X POST "$BASE/jobs" \
|
|
-F "audio=@${AUDIO};type=audio/wav" \
|
|
-F "task=transcribe" 2>&1)
|
|
JOB_ID=$(echo "$SUBMIT" | python3 -c "import sys,json; print(json.load(sys.stdin)['job_id'])" 2>/dev/null || echo "")
|
|
|
|
if [ -z "$JOB_ID" ]; then
|
|
fail "T5: job submission failed"
|
|
else
|
|
# Wait for job to finish
|
|
elapsed=0
|
|
while true; do
|
|
sleep 5; elapsed=$((elapsed+5))
|
|
STATUS=$(curl -sf "$BASE/jobs/$JOB_ID" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
|
|
[ "$STATUS" = "done" ] || [ "$STATUS" = "failed" ] && break
|
|
[ $elapsed -gt 300 ] && break
|
|
done
|
|
info "Job finished in ${elapsed}s with status=$STATUS"
|
|
|
|
# Now wait IDLE_TIMEOUT - 2 seconds — should still be ready
|
|
SAFE_WAIT=$((IDLE_TIMEOUT - 2))
|
|
[ $SAFE_WAIT -lt 1 ] && SAFE_WAIT=1
|
|
info "Waiting ${SAFE_WAIT}s after job completion (less than idle timeout)..."
|
|
sleep $SAFE_WAIT
|
|
STATE=$(get_state)
|
|
[ "$STATE" = "ready" ] && ok "T5a: model still ready ${SAFE_WAIT}s after job completion" \
|
|
|| fail "T5a: model unexpectedly $STATE after job"
|
|
|
|
# Wait for idle timeout
|
|
REMAINING=$((IDLE_TIMEOUT - SAFE_WAIT + 3))
|
|
info "Waiting ${REMAINING}s more for idle unload..."
|
|
sleep $REMAINING
|
|
STATE=$(get_state)
|
|
[ "$STATE" = "unloaded" ] && ok "T5b: model auto-unloaded after idle period post-job" \
|
|
|| fail "T5b: expected unloaded, got $STATE"
|
|
fi
|
|
fi
|
|
|
|
# ── Summary ────────────────────────────────────────────────────────────────────
|
|
echo ""
|
|
echo "=========================================="
|
|
echo " Results: ${PASS} passed, ${FAIL} failed"
|
|
echo "=========================================="
|
|
[ $FAIL -eq 0 ] && echo -e "${GREEN}ALL PASSED${NC}" || { echo -e "${RED}FAILURES: $FAIL${NC}"; exit 1; }
|