Files
whisper-rtx2080/tests/test_model_lifecycle.sh
mozempk b191fbe200
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 8m41s
feat: dynamic model loading/unloading with GPU polling
- Model starts unloaded (lazy); loads on first job or POST /model/load
- Auto-unloads after IDLE_TIMEOUT_SECS (default 300) of inactivity
- POST /model/unload for immediate manual release
- GPU-busy detection: on VRAM OOM, enters WaitingForGpu and retries
  every GPU_POLL_INTERVAL_SECS (default 30) indefinitely
- POST /jobs when unloaded → 503 + Retry-After header, triggers load
- AppError::OutOfMemory and AppError::ModelNotReady variants
- WorkerCmd channel (SyncSender<WorkerCmd>) replaces bare tx_req channel
- Idle timer via recv_timeout(1s) tick inside OS thread (no extra thread)
- Model lifecycle events broadcast via tokio broadcast channel (SSE + webhooks)
- webhook_registry: all clients that ever submitted a webhook_url receive
  model_ready and model_unloaded webhooks
- GPU warmup retained on every (re)load

New routes:
  GET  /model/status  — current state + VRAM stats
  POST /model/load    — trigger load (idempotent)
  POST /model/unload  — immediate unload
  GET  /model/events  — SSE stream of model lifecycle events

New env vars:
  IDLE_TIMEOUT_SECS       (default 300)
  GPU_POLL_INTERVAL_SECS  (default 30)

Tests:
  tests/test_model_lifecycle.sh — 18 integration tests (full state machine,
    SSE events, webhooks, concurrency, unload-during-load)
  tests/test_idle_timeout.sh    — 5 tests with short IDLE_TIMEOUT_SECS=5
  test_all.sh updated: loads model before job submission, asserts
    model_state in /health, adds POST /model/unload at end

Docs:
  docs/USAGE.md: model lifecycle section, new env vars, 503 retry pattern,
    updated /health response shape

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-08 17:57:20 +02:00

471 lines
18 KiB
Bash
Executable File

#!/usr/bin/env bash
# tests/test_model_lifecycle.sh
#
# Integration tests for dynamic model loading/unloading.
# Requires a running whisper-server with GPU access.
#
# Usage:
# WHISPER_BASE_URL=http://localhost:8080 bash tests/test_model_lifecycle.sh
#
# Tests are designed to be independent; each section that needs a specific
# state resets it explicitly at the start.
set -euo pipefail
BASE="${WHISPER_BASE_URL:-http://localhost:8080}"
AUDIO="${TEST_AUDIO:-}"
GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[0;33m'; NC='\033[0m'
PASS=0; FAIL=0
ok() { echo -e "${GREEN}[PASS]${NC} $1"; PASS=$((PASS+1)); }
fail() { echo -e "${RED}[FAIL]${NC} $1"; FAIL=$((FAIL+1)); }
skip() { echo -e "${YELLOW}[SKIP]${NC} $1"; }
info() { echo " $1"; }
echo "=== Model Lifecycle Integration Tests ==="
echo " BASE: $BASE"
echo ""
# ── Helpers ──────────────────────────────────────────────────────────────────
get_state() {
curl -sf "$BASE/model/status" | python3 -c "import sys,json; print(json.load(sys.stdin)['state'])"
}
ensure_unloaded() {
curl -sf -X POST "$BASE/model/unload" > /dev/null
sleep 2
local s
s=$(get_state)
if [ "$s" != "unloaded" ]; then
echo " WARNING: expected unloaded, got $s — waiting 5s"
sleep 5
fi
}
ensure_ready() {
local state
state=$(get_state)
if [ "$state" = "ready" ]; then return 0; fi
curl -sf -X POST "$BASE/model/load" > /dev/null
local elapsed=0
while true; do
sleep 5; elapsed=$((elapsed+5))
state=$(get_state)
[ "$state" = "ready" ] && return 0
[ $elapsed -gt 180 ] && echo " TIMEOUT: model did not become ready" && return 1
done
}
poll_state_transition() {
local target="$1" max_secs="${2:-120}"
local elapsed=0
while true; do
sleep 2; elapsed=$((elapsed+2))
local s
s=$(get_state)
[ "$s" = "$target" ] && return 0
[ $elapsed -ge $max_secs ] && return 1
done
}
# ── TEST 1: Startup state is unloaded ────────────────────────────────────────
echo "--- Test 1: Startup state is unloaded (or after explicit unload) ---"
ensure_unloaded
STATE=$(get_state)
if [ "$STATE" = "unloaded" ]; then
ok "T1: state=unloaded after explicit unload"
else
fail "T1: expected unloaded, got $STATE"
fi
# ── TEST 2: POST /model/load returns 202 ─────────────────────────────────────
echo ""
echo "--- Test 2: POST /model/load returns 202 ---"
ensure_unloaded
HTTP=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$BASE/model/load")
if [ "$HTTP" = "202" ]; then
ok "T2: POST /model/load → 202 Accepted"
else
fail "T2: expected 202, got $HTTP"
fi
# Cancel the in-progress load to clean up
curl -sf -X POST "$BASE/model/unload" > /dev/null || true
sleep 2
# ── TEST 3: State transitions to loading/ready after load trigger ─────────────
echo ""
echo "--- Test 3: State transitions to loading (not stuck at unloaded) ---"
ensure_unloaded
curl -sf -X POST "$BASE/model/load" > /dev/null
sleep 1
STATE=$(get_state)
if [ "$STATE" = "loading" ] || [ "$STATE" = "ready" ]; then
ok "T3: state transitioned to $STATE (not stuck at unloaded)"
else
fail "T3: expected loading or ready, got $STATE"
fi
# ── TEST 4: Model reaches ready state and loaded_at is set ───────────────────
echo ""
echo "--- Test 4: Model reaches ready state with loaded_at timestamp ---"
# Already loading from T3 — wait for ready
if ! poll_state_transition "ready" 180; then
fail "T4: model did not become ready within 3 minutes"
else
STATUS_JSON=$(curl -sf "$BASE/model/status")
LOADED_AT=$(echo "$STATUS_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('loaded_at','MISSING'))" 2>/dev/null || echo "MISSING")
if [ "$LOADED_AT" != "MISSING" ] && [ "$LOADED_AT" != "null" ] && [ -n "$LOADED_AT" ]; then
ok "T4: model=ready, loaded_at=$LOADED_AT"
else
fail "T4: model ready but loaded_at is missing or null"
fi
fi
# ── TEST 5: Idempotent load — POST /model/load when ready returns 200 ─────────
echo ""
echo "--- Test 5: POST /model/load when already ready → 200 ---"
ensure_ready || { fail "T5: could not load model"; }
HTTP=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$BASE/model/load")
STATE=$(get_state)
if [ "$HTTP" = "200" ] && [ "$STATE" = "ready" ]; then
ok "T5: idempotent load → 200, state stays ready"
elif [ "$HTTP" = "202" ] && [ "$STATE" = "ready" ]; then
ok "T5: idempotent load → 202, state stays ready"
else
fail "T5: expected 200 and ready, got HTTP=$HTTP state=$STATE"
fi
# ── TEST 6: Job accepted when ready (segments > 0) ────────────────────────────
echo ""
echo "--- Test 6: Job accepted when model is ready ---"
if [ -z "$AUDIO" ]; then
skip "T6: TEST_AUDIO not set — skipping job submission test"
else
ensure_ready || { fail "T6: model load failed"; }
SUBMIT=$(curl -sf -X POST "$BASE/jobs" -F "audio=@${AUDIO};type=audio/wav" -F "task=transcribe" 2>&1)
JOB_ID=$(echo "$SUBMIT" | python3 -c "import sys,json; print(json.load(sys.stdin)['job_id'])" 2>/dev/null || echo "")
if [ -n "$JOB_ID" ]; then
ok "T6: job accepted, id=$JOB_ID"
# Poll to done
elapsed=0
while true; do
sleep 10; elapsed=$((elapsed+10))
STATUS=$(curl -sf "$BASE/jobs/$JOB_ID" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
[ "$STATUS" = "done" ] && break
[ "$STATUS" = "failed" ] && break
[ $elapsed -gt 600 ] && break
done
SEGS=$(curl -sf "$BASE/jobs/$JOB_ID" | python3 -c "import sys,json; d=json.load(sys.stdin); print(len(d.get('segments',[])))")
[ "$SEGS" -gt 0 ] && ok "T6b: job done with $SEGS segments" || fail "T6b: job done but 0 segments"
else
fail "T6: job submission failed: $SUBMIT"
fi
fi
# ── TEST 7: POST /model/unload → state=unloaded ───────────────────────────────
echo ""
echo "--- Test 7: POST /model/unload ---"
ensure_ready || { fail "T7: model load failed"; }
curl -sf -X POST "$BASE/model/unload" > /dev/null
sleep 3
STATE=$(get_state)
if [ "$STATE" = "unloaded" ]; then
ok "T7: POST /model/unload → state=unloaded"
else
fail "T7: expected unloaded after unload, got $STATE"
fi
# ── TEST 8: POST /jobs when unloaded → 503 + Retry-After ─────────────────────
echo ""
echo "--- Test 8: POST /jobs when unloaded → 503 + Retry-After ---"
ensure_unloaded
# Submit a tiny dummy payload (won't be valid audio but that's ok for this test)
HTTP=$(curl -s -o /tmp/t8_body.json -w "%{http_code}" -X POST "$BASE/jobs" \
-F "audio=@/dev/urandom;type=audio/wav" \
--max-time 5 2>/dev/null || echo "000")
# If the model auto-loads it might start processing; check for 503 first
if [ "$HTTP" = "503" ]; then
RETRY_AFTER=$(curl -sI -X POST "$BASE/jobs" \
-F "audio=@/dev/urandom;type=audio/wav" \
--max-time 5 2>/dev/null | grep -i "retry-after" | awk '{print $2}' | tr -d '\r' || echo "")
BODY=$(cat /tmp/t8_body.json 2>/dev/null || echo "{}")
HAS_STATE=$(echo "$BODY" | python3 -c "import sys,json; d=json.load(sys.stdin); print('state' in d)" 2>/dev/null || echo "False")
HAS_RETRY=$(echo "$BODY" | python3 -c "import sys,json; d=json.load(sys.stdin); print('retry_after_secs' in d)" 2>/dev/null || echo "False")
if [ "$HAS_STATE" = "True" ] && [ "$HAS_RETRY" = "True" ]; then
ok "T8: 503 with state + retry_after_secs in body"
else
fail "T8: 503 but body missing state/retry_after_secs. body=$BODY"
fi
if [ -n "$RETRY_AFTER" ]; then
ok "T8b: Retry-After header present: $RETRY_AFTER"
else
fail "T8b: Retry-After header missing from 503 response"
fi
else
skip "T8: got HTTP $HTTP (model may have loaded before check) — skipping"
fi
# ── TEST 9: Rejected job triggers load ────────────────────────────────────────
echo ""
echo "--- Test 9: Job rejection triggers model load ---"
ensure_unloaded
# Send a job (we expect 503)
curl -sf -X POST "$BASE/jobs" \
-F "audio=@/dev/urandom;type=audio/wav" \
--max-time 5 > /dev/null 2>&1 || true
sleep 2
STATE=$(get_state)
if [ "$STATE" = "loading" ] || [ "$STATE" = "ready" ]; then
ok "T9: model started loading after job rejection ($STATE)"
else
fail "T9: expected loading/ready after job rejection, got $STATE"
fi
# Stop the load to clean up
curl -sf -X POST "$BASE/model/unload" > /dev/null || true
sleep 2
# ── TEST 10: Retry-After values ───────────────────────────────────────────────
echo ""
echo "--- Test 10: Retry-After values match state ---"
ensure_unloaded
# Unloaded → Retry-After: 30
RESP_UNLOADED=$(curl -si -X POST "$BASE/jobs" -F "audio=@/dev/urandom;type=audio/wav" --max-time 5 2>/dev/null || echo "")
RA_UNLOADED=$(echo "$RESP_UNLOADED" | grep -i "retry-after" | awk '{print $2}' | tr -d '\r' || echo "")
[ "$RA_UNLOADED" = "30" ] && ok "T10a: Retry-After=30 when unloaded" \
|| skip "T10a: Retry-After=$RA_UNLOADED (expected 30) — model may have started loading"
# ── TEST 11: Retry-After=10 during loading ────────────────────────────────────
echo ""
echo "--- Test 11: Retry-After=10 when loading ---"
ensure_unloaded
curl -sf -X POST "$BASE/model/load" > /dev/null
sleep 1 # In loading state
STATE=$(get_state)
if [ "$STATE" = "loading" ]; then
RESP_LOADING=$(curl -si -X POST "$BASE/jobs" -F "audio=@/dev/urandom;type=audio/wav" --max-time 5 2>/dev/null || echo "")
RA_LOADING=$(echo "$RESP_LOADING" | grep -i "retry-after" | awk '{print $2}' | tr -d '\r' || echo "")
[ "$RA_LOADING" = "10" ] && ok "T11: Retry-After=10 when loading" \
|| fail "T11: expected Retry-After=10, got '$RA_LOADING' (state=$STATE)"
else
skip "T11: model already $STATE — can't test loading state Retry-After"
fi
# ── TEST 12: 503 body schema validation ──────────────────────────────────────
echo ""
echo "--- Test 12: 503 body schema validation ---"
ensure_unloaded
BODY=$(curl -sf -X POST "$BASE/jobs" -F "audio=@/dev/urandom;type=audio/wav" --max-time 5 2>/dev/null || echo "{}")
python3 - <<PYCHECK
import json
body = json.loads('$BODY')
required = {'error', 'state', 'retry_after_secs'}
missing = required - set(body.keys())
if missing:
print(f"MISSING: {missing}")
exit(1)
assert body['error'] == 'model_not_ready', f"error={body['error']}"
assert isinstance(body['retry_after_secs'], int), f"retry_after_secs not int: {body['retry_after_secs']}"
print("schema ok")
PYCHECK
[ $? -eq 0 ] && ok "T12: 503 body has correct schema" || fail "T12: 503 body schema invalid"
# ── TEST 13: GET /health has model_state field ────────────────────────────────
echo ""
echo "--- Test 13: GET /health has model_state ---"
HEALTH=$(curl -sf "$BASE/health")
HAS_MODEL_STATE=$(echo "$HEALTH" | python3 -c "import sys,json; d=json.load(sys.stdin); print('model_state' in d)")
[ "$HAS_MODEL_STATE" = "True" ] && ok "T13: /health has model_state" || fail "T13: /health missing model_state"
# ── TEST 14: SSE /model/events delivers model_ready event ─────────────────────
echo ""
echo "--- Test 14: GET /model/events SSE delivers model_ready ---"
ensure_unloaded
# Collect SSE events for up to 3 minutes
SSE_LOG=$(mktemp /tmp/sse_events_XXXXXX.txt)
curl -sN --max-time 180 "$BASE/model/events" > "$SSE_LOG" &
SSE_PID=$!
sleep 1
# Trigger load
curl -sf -X POST "$BASE/model/load" > /dev/null
poll_state_transition "ready" 180 || true
sleep 2
kill $SSE_PID 2>/dev/null || true
wait $SSE_PID 2>/dev/null || true
if grep -q "model_loading" "$SSE_LOG" 2>/dev/null; then
ok "T14a: SSE received model_loading event"
else
fail "T14a: SSE did not receive model_loading event"
fi
if grep -q "model_ready" "$SSE_LOG" 2>/dev/null; then
ok "T14b: SSE received model_ready event"
else
fail "T14b: SSE did not receive model_ready event"
fi
# Now unload to get model_unloaded event
curl -sf -X POST "$BASE/model/unload" > /dev/null
sleep 1
SSE_LOG2=$(mktemp /tmp/sse_events_XXXXXX.txt)
curl -sN --max-time 10 "$BASE/model/events" > "$SSE_LOG2" &
SSE_PID2=$!
sleep 2
kill $SSE_PID2 2>/dev/null || true
wait $SSE_PID2 2>/dev/null || true
# model_unloaded fires immediately on unload command
if grep -q "model_unloaded" "$SSE_LOG" 2>/dev/null || grep -q "model_unloaded" "$SSE_LOG2" 2>/dev/null; then
ok "T14c: SSE received model_unloaded event"
else
fail "T14c: SSE did not receive model_unloaded event"
fi
rm -f "$SSE_LOG" "$SSE_LOG2"
# ── TEST 15: model_ready webhook fires after load ──────────────────────────────
echo ""
echo "--- Test 15: model_ready webhook ---"
ensure_unloaded
# Start webhook receiver
WEBHOOK_LOG=$(mktemp /tmp/webhook_log_XXXXXX.txt)
python3 - <<'PYEOF' &
import http.server, json, sys, signal, os
class H(http.server.BaseHTTPRequestHandler):
def do_POST(self):
n = int(self.headers.get('Content-Length', 0))
body = json.loads(self.rfile.read(n))
with open('/tmp/t15_webhook.json', 'w') as f:
json.dump(body, f)
self.send_response(200); self.end_headers()
def log_message(self, *a): pass
signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
http.server.HTTPServer(('', 9998), H).serve_forever()
PYEOF
WBOOK_PID=$!
sleep 1
# Register a webhook via a (doomed) job submission — this registers the URL
# even though the model is unloaded (and the job will 503)
curl -sf -X POST "$BASE/jobs" \
-F "audio=@/dev/urandom;type=audio/wav" \
-F "webhook_url=http://localhost:9998/wh" \
--max-time 5 > /dev/null 2>&1 || true
# Now load the model
curl -sf -X POST "$BASE/model/load" > /dev/null
poll_state_transition "ready" 180 || true
sleep 3
kill $WBOOK_PID 2>/dev/null || true
wait $WBOOK_PID 2>/dev/null || true
if [ -f /tmp/t15_webhook.json ]; then
EVENT_TYPE=$(python3 -c "import json; d=json.load(open('/tmp/t15_webhook.json')); print(d.get('type','?'))")
[ "$EVENT_TYPE" = "model_ready" ] && ok "T15: model_ready webhook fired" \
|| fail "T15: webhook fired but type=$EVENT_TYPE (expected model_ready)"
rm -f /tmp/t15_webhook.json
else
fail "T15: model_ready webhook not received within timeout"
fi
# ── TEST 16: model_unloaded webhook fires ─────────────────────────────────────
echo ""
echo "--- Test 16: model_unloaded webhook ---"
python3 - <<'PYEOF' &
import http.server, json, sys, signal
class H(http.server.BaseHTTPRequestHandler):
def do_POST(self):
n = int(self.headers.get('Content-Length', 0))
body = json.loads(self.rfile.read(n))
with open('/tmp/t16_webhook.json', 'w') as f:
json.dump(body, f)
self.send_response(200); self.end_headers()
def log_message(self, *a): pass
signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
http.server.HTTPServer(('', 9997), H).serve_forever()
PYEOF
WBOOK2_PID=$!
sleep 1
# Register webhook URL
curl -sf -X POST "$BASE/jobs" \
-F "audio=@/dev/urandom;type=audio/wav" \
-F "webhook_url=http://localhost:9997/wh" \
--max-time 5 > /dev/null 2>&1 || true
ensure_ready
# Unload
curl -sf -X POST "$BASE/model/unload" > /dev/null
sleep 5
kill $WBOOK2_PID 2>/dev/null || true
wait $WBOOK2_PID 2>/dev/null || true
if [ -f /tmp/t16_webhook.json ]; then
EVENT_TYPE=$(python3 -c "import json; d=json.load(open('/tmp/t16_webhook.json')); print(d.get('type','?'))")
[ "$EVENT_TYPE" = "model_unloaded" ] && ok "T16: model_unloaded webhook fired" \
|| fail "T16: webhook type=$EVENT_TYPE (expected model_unloaded)"
rm -f /tmp/t16_webhook.json
else
fail "T16: model_unloaded webhook not received"
fi
# ── TEST 17: Concurrent load requests — single load, stable ready ─────────────
echo ""
echo "--- Test 17: Concurrent POST /model/load requests ---"
ensure_unloaded
# Send 3 concurrent load requests
curl -sf -X POST "$BASE/model/load" > /dev/null &
curl -sf -X POST "$BASE/model/load" > /dev/null &
curl -sf -X POST "$BASE/model/load" > /dev/null &
wait
poll_state_transition "ready" 180 || true
STATE=$(get_state)
[ "$STATE" = "ready" ] && ok "T17: concurrent loads handled cleanly, state=ready" \
|| fail "T17: expected ready after concurrent loads, got $STATE"
# ── TEST 18: POST /model/unload during loading → clean unloaded ───────────────
echo ""
echo "--- Test 18: POST /model/unload during loading ---"
ensure_unloaded
curl -sf -X POST "$BASE/model/load" > /dev/null
sleep 1 # Hopefully still in loading state
curl -sf -X POST "$BASE/model/unload" > /dev/null
# Allow time for the unload to propagate
sleep 5
STATE=$(get_state)
if [ "$STATE" = "unloaded" ]; then
ok "T18: unload during loading → clean unloaded"
elif [ "$STATE" = "ready" ]; then
# Load completed before unload arrived — immediately unload
curl -sf -X POST "$BASE/model/unload" > /dev/null
sleep 3
STATE=$(get_state)
[ "$STATE" = "unloaded" ] && ok "T18: load completed then unloaded (race condition OK)" \
|| fail "T18: state=$STATE after load+unload"
else
fail "T18: unexpected state after unload-during-load: $STATE"
fi
# ── Summary ────────────────────────────────────────────────────────────────────
echo ""
echo "=========================================="
echo " Results: ${PASS} passed, ${FAIL} failed"
echo "=========================================="
[ $FAIL -eq 0 ] && echo -e "${GREEN}ALL PASSED${NC}" || { echo -e "${RED}FAILURES: $FAIL${NC}"; exit 1; }