#!/usr/bin/env bash # tests/test_model_lifecycle.sh # # Integration tests for dynamic model loading/unloading. # Requires a running whisper-server with GPU access. # # Usage: # WHISPER_BASE_URL=http://localhost:8080 bash tests/test_model_lifecycle.sh # # Tests are designed to be independent; each section that needs a specific # state resets it explicitly at the start. set -euo pipefail BASE="${WHISPER_BASE_URL:-http://localhost:8080}" AUDIO="${TEST_AUDIO:-}" GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[0;33m'; NC='\033[0m' PASS=0; FAIL=0 ok() { echo -e "${GREEN}[PASS]${NC} $1"; PASS=$((PASS+1)); } fail() { echo -e "${RED}[FAIL]${NC} $1"; FAIL=$((FAIL+1)); } skip() { echo -e "${YELLOW}[SKIP]${NC} $1"; } info() { echo " $1"; } echo "=== Model Lifecycle Integration Tests ===" echo " BASE: $BASE" echo "" # ── Helpers ────────────────────────────────────────────────────────────────── get_state() { curl -sf "$BASE/model/status" | python3 -c "import sys,json; print(json.load(sys.stdin)['state'])" } ensure_unloaded() { curl -sf -X POST "$BASE/model/unload" > /dev/null sleep 2 local s s=$(get_state) if [ "$s" != "unloaded" ]; then echo " WARNING: expected unloaded, got $s — waiting 5s" sleep 5 fi } ensure_ready() { local state state=$(get_state) if [ "$state" = "ready" ]; then return 0; fi curl -sf -X POST "$BASE/model/load" > /dev/null local elapsed=0 while true; do sleep 5; elapsed=$((elapsed+5)) state=$(get_state) [ "$state" = "ready" ] && return 0 [ $elapsed -gt 180 ] && echo " TIMEOUT: model did not become ready" && return 1 done } poll_state_transition() { local target="$1" max_secs="${2:-120}" local elapsed=0 while true; do sleep 2; elapsed=$((elapsed+2)) local s s=$(get_state) [ "$s" = "$target" ] && return 0 [ $elapsed -ge $max_secs ] && return 1 done } # ── TEST 1: Startup state is unloaded ──────────────────────────────────────── echo "--- Test 1: Startup state is unloaded (or after explicit unload) ---" ensure_unloaded STATE=$(get_state) if [ "$STATE" = "unloaded" ]; then ok "T1: state=unloaded after explicit unload" else fail "T1: expected unloaded, got $STATE" fi # ── TEST 2: POST /model/load returns 202 ───────────────────────────────────── echo "" echo "--- Test 2: POST /model/load returns 202 ---" ensure_unloaded HTTP=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$BASE/model/load") if [ "$HTTP" = "202" ]; then ok "T2: POST /model/load → 202 Accepted" else fail "T2: expected 202, got $HTTP" fi # Cancel the in-progress load to clean up curl -sf -X POST "$BASE/model/unload" > /dev/null || true sleep 2 # ── TEST 3: State transitions to loading/ready after load trigger ───────────── echo "" echo "--- Test 3: State transitions to loading (not stuck at unloaded) ---" ensure_unloaded curl -sf -X POST "$BASE/model/load" > /dev/null sleep 1 STATE=$(get_state) if [ "$STATE" = "loading" ] || [ "$STATE" = "ready" ]; then ok "T3: state transitioned to $STATE (not stuck at unloaded)" else fail "T3: expected loading or ready, got $STATE" fi # ── TEST 4: Model reaches ready state and loaded_at is set ─────────────────── echo "" echo "--- Test 4: Model reaches ready state with loaded_at timestamp ---" # Already loading from T3 — wait for ready if ! poll_state_transition "ready" 180; then fail "T4: model did not become ready within 3 minutes" else STATUS_JSON=$(curl -sf "$BASE/model/status") LOADED_AT=$(echo "$STATUS_JSON" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('loaded_at','MISSING'))" 2>/dev/null || echo "MISSING") if [ "$LOADED_AT" != "MISSING" ] && [ "$LOADED_AT" != "null" ] && [ -n "$LOADED_AT" ]; then ok "T4: model=ready, loaded_at=$LOADED_AT" else fail "T4: model ready but loaded_at is missing or null" fi fi # ── TEST 5: Idempotent load — POST /model/load when ready returns 200 ───────── echo "" echo "--- Test 5: POST /model/load when already ready → 200 ---" ensure_ready || { fail "T5: could not load model"; } HTTP=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$BASE/model/load") STATE=$(get_state) if [ "$HTTP" = "200" ] && [ "$STATE" = "ready" ]; then ok "T5: idempotent load → 200, state stays ready" elif [ "$HTTP" = "202" ] && [ "$STATE" = "ready" ]; then ok "T5: idempotent load → 202, state stays ready" else fail "T5: expected 200 and ready, got HTTP=$HTTP state=$STATE" fi # ── TEST 6: Job accepted when ready (segments > 0) ──────────────────────────── echo "" echo "--- Test 6: Job accepted when model is ready ---" if [ -z "$AUDIO" ]; then skip "T6: TEST_AUDIO not set — skipping job submission test" else ensure_ready || { fail "T6: model load failed"; } SUBMIT=$(curl -sf -X POST "$BASE/jobs" -F "audio=@${AUDIO};type=audio/wav" -F "task=transcribe" 2>&1) JOB_ID=$(echo "$SUBMIT" | python3 -c "import sys,json; print(json.load(sys.stdin)['job_id'])" 2>/dev/null || echo "") if [ -n "$JOB_ID" ]; then ok "T6: job accepted, id=$JOB_ID" # Poll to done elapsed=0 while true; do sleep 10; elapsed=$((elapsed+10)) STATUS=$(curl -sf "$BASE/jobs/$JOB_ID" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])") [ "$STATUS" = "done" ] && break [ "$STATUS" = "failed" ] && break [ $elapsed -gt 600 ] && break done SEGS=$(curl -sf "$BASE/jobs/$JOB_ID" | python3 -c "import sys,json; d=json.load(sys.stdin); print(len(d.get('segments',[])))") [ "$SEGS" -gt 0 ] && ok "T6b: job done with $SEGS segments" || fail "T6b: job done but 0 segments" else fail "T6: job submission failed: $SUBMIT" fi fi # ── TEST 7: POST /model/unload → state=unloaded ─────────────────────────────── echo "" echo "--- Test 7: POST /model/unload ---" ensure_ready || { fail "T7: model load failed"; } curl -sf -X POST "$BASE/model/unload" > /dev/null sleep 3 STATE=$(get_state) if [ "$STATE" = "unloaded" ]; then ok "T7: POST /model/unload → state=unloaded" else fail "T7: expected unloaded after unload, got $STATE" fi # ── TEST 8: POST /jobs when unloaded → 503 + Retry-After ───────────────────── echo "" echo "--- Test 8: POST /jobs when unloaded → 503 + Retry-After ---" ensure_unloaded # Submit a tiny dummy payload (won't be valid audio but that's ok for this test) HTTP=$(curl -s -o /tmp/t8_body.json -w "%{http_code}" -X POST "$BASE/jobs" \ -F "audio=@/dev/urandom;type=audio/wav" \ --max-time 5 2>/dev/null || echo "000") # If the model auto-loads it might start processing; check for 503 first if [ "$HTTP" = "503" ]; then RETRY_AFTER=$(curl -sI -X POST "$BASE/jobs" \ -F "audio=@/dev/urandom;type=audio/wav" \ --max-time 5 2>/dev/null | grep -i "retry-after" | awk '{print $2}' | tr -d '\r' || echo "") BODY=$(cat /tmp/t8_body.json 2>/dev/null || echo "{}") HAS_STATE=$(echo "$BODY" | python3 -c "import sys,json; d=json.load(sys.stdin); print('state' in d)" 2>/dev/null || echo "False") HAS_RETRY=$(echo "$BODY" | python3 -c "import sys,json; d=json.load(sys.stdin); print('retry_after_secs' in d)" 2>/dev/null || echo "False") if [ "$HAS_STATE" = "True" ] && [ "$HAS_RETRY" = "True" ]; then ok "T8: 503 with state + retry_after_secs in body" else fail "T8: 503 but body missing state/retry_after_secs. body=$BODY" fi if [ -n "$RETRY_AFTER" ]; then ok "T8b: Retry-After header present: $RETRY_AFTER" else fail "T8b: Retry-After header missing from 503 response" fi else skip "T8: got HTTP $HTTP (model may have loaded before check) — skipping" fi # ── TEST 9: Rejected job triggers load ──────────────────────────────────────── echo "" echo "--- Test 9: Job rejection triggers model load ---" ensure_unloaded # Send a job (we expect 503) curl -sf -X POST "$BASE/jobs" \ -F "audio=@/dev/urandom;type=audio/wav" \ --max-time 5 > /dev/null 2>&1 || true sleep 2 STATE=$(get_state) if [ "$STATE" = "loading" ] || [ "$STATE" = "ready" ]; then ok "T9: model started loading after job rejection ($STATE)" else fail "T9: expected loading/ready after job rejection, got $STATE" fi # Stop the load to clean up curl -sf -X POST "$BASE/model/unload" > /dev/null || true sleep 2 # ── TEST 10: Retry-After values ─────────────────────────────────────────────── echo "" echo "--- Test 10: Retry-After values match state ---" ensure_unloaded # Unloaded → Retry-After: 30 RESP_UNLOADED=$(curl -si -X POST "$BASE/jobs" -F "audio=@/dev/urandom;type=audio/wav" --max-time 5 2>/dev/null || echo "") RA_UNLOADED=$(echo "$RESP_UNLOADED" | grep -i "retry-after" | awk '{print $2}' | tr -d '\r' || echo "") [ "$RA_UNLOADED" = "30" ] && ok "T10a: Retry-After=30 when unloaded" \ || skip "T10a: Retry-After=$RA_UNLOADED (expected 30) — model may have started loading" # ── TEST 11: Retry-After=10 during loading ──────────────────────────────────── echo "" echo "--- Test 11: Retry-After=10 when loading ---" ensure_unloaded curl -sf -X POST "$BASE/model/load" > /dev/null sleep 1 # In loading state STATE=$(get_state) if [ "$STATE" = "loading" ]; then RESP_LOADING=$(curl -si -X POST "$BASE/jobs" -F "audio=@/dev/urandom;type=audio/wav" --max-time 5 2>/dev/null || echo "") RA_LOADING=$(echo "$RESP_LOADING" | grep -i "retry-after" | awk '{print $2}' | tr -d '\r' || echo "") [ "$RA_LOADING" = "10" ] && ok "T11: Retry-After=10 when loading" \ || fail "T11: expected Retry-After=10, got '$RA_LOADING' (state=$STATE)" else skip "T11: model already $STATE — can't test loading state Retry-After" fi # ── TEST 12: 503 body schema validation ────────────────────────────────────── echo "" echo "--- Test 12: 503 body schema validation ---" ensure_unloaded BODY=$(curl -sf -X POST "$BASE/jobs" -F "audio=@/dev/urandom;type=audio/wav" --max-time 5 2>/dev/null || echo "{}") python3 - < "$SSE_LOG" & SSE_PID=$! sleep 1 # Trigger load curl -sf -X POST "$BASE/model/load" > /dev/null poll_state_transition "ready" 180 || true sleep 2 kill $SSE_PID 2>/dev/null || true wait $SSE_PID 2>/dev/null || true if grep -q "model_loading" "$SSE_LOG" 2>/dev/null; then ok "T14a: SSE received model_loading event" else fail "T14a: SSE did not receive model_loading event" fi if grep -q "model_ready" "$SSE_LOG" 2>/dev/null; then ok "T14b: SSE received model_ready event" else fail "T14b: SSE did not receive model_ready event" fi # Now unload to get model_unloaded event curl -sf -X POST "$BASE/model/unload" > /dev/null sleep 1 SSE_LOG2=$(mktemp /tmp/sse_events_XXXXXX.txt) curl -sN --max-time 10 "$BASE/model/events" > "$SSE_LOG2" & SSE_PID2=$! sleep 2 kill $SSE_PID2 2>/dev/null || true wait $SSE_PID2 2>/dev/null || true # model_unloaded fires immediately on unload command if grep -q "model_unloaded" "$SSE_LOG" 2>/dev/null || grep -q "model_unloaded" "$SSE_LOG2" 2>/dev/null; then ok "T14c: SSE received model_unloaded event" else fail "T14c: SSE did not receive model_unloaded event" fi rm -f "$SSE_LOG" "$SSE_LOG2" # ── TEST 15: model_ready webhook fires after load ────────────────────────────── echo "" echo "--- Test 15: model_ready webhook ---" ensure_unloaded # Start webhook receiver WEBHOOK_LOG=$(mktemp /tmp/webhook_log_XXXXXX.txt) python3 - <<'PYEOF' & import http.server, json, sys, signal, os class H(http.server.BaseHTTPRequestHandler): def do_POST(self): n = int(self.headers.get('Content-Length', 0)) body = json.loads(self.rfile.read(n)) with open('/tmp/t15_webhook.json', 'w') as f: json.dump(body, f) self.send_response(200); self.end_headers() def log_message(self, *a): pass signal.signal(signal.SIGTERM, lambda *_: sys.exit(0)) http.server.HTTPServer(('', 9998), H).serve_forever() PYEOF WBOOK_PID=$! sleep 1 # Register a webhook via a (doomed) job submission — this registers the URL # even though the model is unloaded (and the job will 503) curl -sf -X POST "$BASE/jobs" \ -F "audio=@/dev/urandom;type=audio/wav" \ -F "webhook_url=http://localhost:9998/wh" \ --max-time 5 > /dev/null 2>&1 || true # Now load the model curl -sf -X POST "$BASE/model/load" > /dev/null poll_state_transition "ready" 180 || true sleep 3 kill $WBOOK_PID 2>/dev/null || true wait $WBOOK_PID 2>/dev/null || true if [ -f /tmp/t15_webhook.json ]; then EVENT_TYPE=$(python3 -c "import json; d=json.load(open('/tmp/t15_webhook.json')); print(d.get('type','?'))") [ "$EVENT_TYPE" = "model_ready" ] && ok "T15: model_ready webhook fired" \ || fail "T15: webhook fired but type=$EVENT_TYPE (expected model_ready)" rm -f /tmp/t15_webhook.json else fail "T15: model_ready webhook not received within timeout" fi # ── TEST 16: model_unloaded webhook fires ───────────────────────────────────── echo "" echo "--- Test 16: model_unloaded webhook ---" python3 - <<'PYEOF' & import http.server, json, sys, signal class H(http.server.BaseHTTPRequestHandler): def do_POST(self): n = int(self.headers.get('Content-Length', 0)) body = json.loads(self.rfile.read(n)) with open('/tmp/t16_webhook.json', 'w') as f: json.dump(body, f) self.send_response(200); self.end_headers() def log_message(self, *a): pass signal.signal(signal.SIGTERM, lambda *_: sys.exit(0)) http.server.HTTPServer(('', 9997), H).serve_forever() PYEOF WBOOK2_PID=$! sleep 1 # Register webhook URL curl -sf -X POST "$BASE/jobs" \ -F "audio=@/dev/urandom;type=audio/wav" \ -F "webhook_url=http://localhost:9997/wh" \ --max-time 5 > /dev/null 2>&1 || true ensure_ready # Unload curl -sf -X POST "$BASE/model/unload" > /dev/null sleep 5 kill $WBOOK2_PID 2>/dev/null || true wait $WBOOK2_PID 2>/dev/null || true if [ -f /tmp/t16_webhook.json ]; then EVENT_TYPE=$(python3 -c "import json; d=json.load(open('/tmp/t16_webhook.json')); print(d.get('type','?'))") [ "$EVENT_TYPE" = "model_unloaded" ] && ok "T16: model_unloaded webhook fired" \ || fail "T16: webhook type=$EVENT_TYPE (expected model_unloaded)" rm -f /tmp/t16_webhook.json else fail "T16: model_unloaded webhook not received" fi # ── TEST 17: Concurrent load requests — single load, stable ready ───────────── echo "" echo "--- Test 17: Concurrent POST /model/load requests ---" ensure_unloaded # Send 3 concurrent load requests curl -sf -X POST "$BASE/model/load" > /dev/null & curl -sf -X POST "$BASE/model/load" > /dev/null & curl -sf -X POST "$BASE/model/load" > /dev/null & wait poll_state_transition "ready" 180 || true STATE=$(get_state) [ "$STATE" = "ready" ] && ok "T17: concurrent loads handled cleanly, state=ready" \ || fail "T17: expected ready after concurrent loads, got $STATE" # ── TEST 18: POST /model/unload during loading → clean unloaded ─────────────── echo "" echo "--- Test 18: POST /model/unload during loading ---" ensure_unloaded curl -sf -X POST "$BASE/model/load" > /dev/null sleep 1 # Hopefully still in loading state curl -sf -X POST "$BASE/model/unload" > /dev/null # Allow time for the unload to propagate sleep 5 STATE=$(get_state) if [ "$STATE" = "unloaded" ]; then ok "T18: unload during loading → clean unloaded" elif [ "$STATE" = "ready" ]; then # Load completed before unload arrived — immediately unload curl -sf -X POST "$BASE/model/unload" > /dev/null sleep 3 STATE=$(get_state) [ "$STATE" = "unloaded" ] && ok "T18: load completed then unloaded (race condition OK)" \ || fail "T18: state=$STATE after load+unload" else fail "T18: unexpected state after unload-during-load: $STATE" fi # ── Summary ──────────────────────────────────────────────────────────────────── echo "" echo "==========================================" echo " Results: ${PASS} passed, ${FAIL} failed" echo "==========================================" [ $FAIL -eq 0 ] && echo -e "${GREEN}ALL PASSED${NC}" || { echo -e "${RED}FAILURES: $FAIL${NC}"; exit 1; }