Files
whisper-rtx2080/tests/test_idle_timeout.sh
mozempk b191fbe200
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 8m41s
feat: dynamic model loading/unloading with GPU polling
- Model starts unloaded (lazy); loads on first job or POST /model/load
- Auto-unloads after IDLE_TIMEOUT_SECS (default 300) of inactivity
- POST /model/unload for immediate manual release
- GPU-busy detection: on VRAM OOM, enters WaitingForGpu and retries
  every GPU_POLL_INTERVAL_SECS (default 30) indefinitely
- POST /jobs when unloaded → 503 + Retry-After header, triggers load
- AppError::OutOfMemory and AppError::ModelNotReady variants
- WorkerCmd channel (SyncSender<WorkerCmd>) replaces bare tx_req channel
- Idle timer via recv_timeout(1s) tick inside OS thread (no extra thread)
- Model lifecycle events broadcast via tokio broadcast channel (SSE + webhooks)
- webhook_registry: all clients that ever submitted a webhook_url receive
  model_ready and model_unloaded webhooks
- GPU warmup retained on every (re)load

New routes:
  GET  /model/status  — current state + VRAM stats
  POST /model/load    — trigger load (idempotent)
  POST /model/unload  — immediate unload
  GET  /model/events  — SSE stream of model lifecycle events

New env vars:
  IDLE_TIMEOUT_SECS       (default 300)
  GPU_POLL_INTERVAL_SECS  (default 30)

Tests:
  tests/test_model_lifecycle.sh — 18 integration tests (full state machine,
    SSE events, webhooks, concurrency, unload-during-load)
  tests/test_idle_timeout.sh    — 5 tests with short IDLE_TIMEOUT_SECS=5
  test_all.sh updated: loads model before job submission, asserts
    model_state in /health, adds POST /model/unload at end

Docs:
  docs/USAGE.md: model lifecycle section, new env vars, 503 retry pattern,
    updated /health response shape

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-08 17:57:20 +02:00

247 lines
8.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# tests/test_idle_timeout.sh
#
# Integration tests for the idle-timeout auto-unload feature.
# REQUIRES the server to be started with a short idle timeout:
#
# IDLE_TIMEOUT_SECS=5 ./whisper-server
# # or via Docker:
# docker run -e IDLE_TIMEOUT_SECS=5 ...
#
# The default idle timeout is 5 minutes; these tests use a 5-second window
# to keep the suite fast.
set -euo pipefail
BASE="${WHISPER_BASE_URL:-http://localhost:8080}"
IDLE_TIMEOUT="${EXPECTED_IDLE_TIMEOUT_SECS:-5}"
AUDIO="${TEST_AUDIO:-}"
GREEN='\033[0;32m'; RED='\033[0;31m'; YELLOW='\033[0;33m'; NC='\033[0m'
PASS=0; FAIL=0
ok() { echo -e "${GREEN}[PASS]${NC} $1"; PASS=$((PASS+1)); }
fail() { echo -e "${RED}[FAIL]${NC} $1"; FAIL=$((FAIL+1)); }
skip() { echo -e "${YELLOW}[SKIP]${NC} $1"; }
info() { echo " $1"; }
echo "=== Idle Timeout Tests ==="
echo " BASE: $BASE"
echo " IDLE_TIMEOUT_SECS: $IDLE_TIMEOUT (must be configured on the server)"
echo ""
echo "NOTE: These tests require the server to be running with IDLE_TIMEOUT_SECS=$IDLE_TIMEOUT"
echo ""
# ── Helpers ──────────────────────────────────────────────────────────────────
get_state() {
curl -sf "$BASE/model/status" | python3 -c "import sys,json; print(json.load(sys.stdin)['state'])"
}
ensure_ready() {
local state
state=$(get_state)
if [ "$state" = "ready" ]; then return 0; fi
curl -sf -X POST "$BASE/model/load" > /dev/null
local elapsed=0
while true; do
sleep 3; elapsed=$((elapsed+3))
state=$(get_state)
[ "$state" = "ready" ] && return 0
[ $elapsed -gt 180 ] && return 1
done
}
ensure_unloaded() {
curl -sf -X POST "$BASE/model/unload" > /dev/null || true
sleep 2
}
# ── TEST 1: Load model, complete a job, then wait for idle unload ─────────────
echo "--- Test 1: Idle timeout triggers auto-unload ---"
ensure_unloaded
ensure_ready || { fail "T1: model load failed"; }
WAIT_SECS=$((IDLE_TIMEOUT + 3))
info "Model is ready. Waiting $WAIT_SECS seconds (idle timeout=$IDLE_TIMEOUT + 3s buffer)..."
sleep $WAIT_SECS
STATE=$(get_state)
if [ "$STATE" = "unloaded" ]; then
ok "T1: model auto-unloaded after ${IDLE_TIMEOUT}s idle"
else
fail "T1: expected unloaded after idle timeout, got $STATE"
info "Is the server running with IDLE_TIMEOUT_SECS=$IDLE_TIMEOUT?"
fi
# ── TEST 2: model_unloaded webhook fires on idle timeout ─────────────────────
echo ""
echo "--- Test 2: model_unloaded webhook fires on idle timeout ---"
ensure_unloaded
# Start webhook receiver
python3 - <<'PYEOF' &
import http.server, json, sys, signal
class H(http.server.BaseHTTPRequestHandler):
def do_POST(self):
n = int(self.headers.get('Content-Length', 0))
body = json.loads(self.rfile.read(n))
with open('/tmp/idle_wh_event.json', 'w') as f:
json.dump(body, f)
self.send_response(200); self.end_headers()
def log_message(self, *a): pass
signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
http.server.HTTPServer(('', 9995), H).serve_forever()
PYEOF
WH_PID=$!
sleep 1
# Register webhook via a job submission (will 503 since unloaded)
curl -sf -X POST "$BASE/jobs" \
-F "audio=@/dev/urandom;type=audio/wav" \
-F "webhook_url=http://localhost:9995/wh" \
--max-time 5 > /dev/null 2>&1 || true
# Load model
ensure_ready || { fail "T2: model load failed"; kill $WH_PID 2>/dev/null; }
# Wait for idle timeout
WAIT_SECS=$((IDLE_TIMEOUT + 5))
info "Waiting ${WAIT_SECS}s for idle timeout..."
sleep $WAIT_SECS
kill $WH_PID 2>/dev/null || true
wait $WH_PID 2>/dev/null || true
if [ -f /tmp/idle_wh_event.json ]; then
EVENT_TYPE=$(python3 -c "import json; print(json.load(open('/tmp/idle_wh_event.json')).get('type','?'))")
rm -f /tmp/idle_wh_event.json
[ "$EVENT_TYPE" = "model_unloaded" ] && ok "T2: model_unloaded webhook fired on idle timeout" \
|| fail "T2: webhook type=$EVENT_TYPE (expected model_unloaded)"
else
fail "T2: no webhook received within timeout"
fi
# ── TEST 3: Job submission after idle timeout → 503 → triggers reload ─────────
echo ""
echo "--- Test 3: Job triggers reload after idle unload ---"
ensure_unloaded
ensure_ready || { fail "T3: initial load failed"; }
# Wait for auto-unload
WAIT_SECS=$((IDLE_TIMEOUT + 3))
info "Waiting ${WAIT_SECS}s for idle unload..."
sleep $WAIT_SECS
STATE=$(get_state)
[ "$STATE" = "unloaded" ] || info "Note: state=$STATE (expected unloaded)"
# Submit job → 503, triggers reload
HTTP=$(curl -s -o /tmp/t3_body.json -w "%{http_code}" -X POST "$BASE/jobs" \
-F "audio=@/dev/urandom;type=audio/wav" \
--max-time 5 2>/dev/null || echo "000")
if [ "$HTTP" = "503" ]; then
ok "T3a: POST /jobs → 503 after idle unload"
else
skip "T3a: POST /jobs returned $HTTP (model may have reloaded)"
fi
# State should be loading or ready (reload triggered by job submission)
sleep 2
STATE=$(get_state)
if [ "$STATE" = "loading" ] || [ "$STATE" = "ready" ]; then
ok "T3b: reload triggered by job submission ($STATE)"
else
fail "T3b: expected loading/ready, got $STATE"
fi
rm -f /tmp/t3_body.json
# ── TEST 4: Idle timer resets per job (wait 60% of timeout → still ready) ─────
echo ""
echo "--- Test 4: Idle timer resets with each completed job ---"
ensure_unloaded
ensure_ready || { fail "T4: model load failed"; }
HALF_WAIT=$((IDLE_TIMEOUT - 1))
info "Waiting ${HALF_WAIT}s (less than idle timeout)..."
sleep $HALF_WAIT
STATE=$(get_state)
if [ "$STATE" = "ready" ]; then
ok "T4a: model still ready after ${HALF_WAIT}s (less than ${IDLE_TIMEOUT}s timeout)"
else
fail "T4a: model unexpectedly $STATE after only ${HALF_WAIT}s"
fi
# Wait for full unload
REMAINING=$((IDLE_TIMEOUT - HALF_WAIT + 3))
info "Waiting another ${REMAINING}s for full idle unload..."
sleep $REMAINING
STATE=$(get_state)
[ "$STATE" = "unloaded" ] && ok "T4b: model unloaded after total > ${IDLE_TIMEOUT}s idle" \
|| fail "T4b: expected unloaded, got $STATE"
# ── TEST 5: Job resets idle timer ─────────────────────────────────────────────
echo ""
echo "--- Test 5: Completing a job resets the idle timer ---"
if [ -z "$AUDIO" ]; then
skip "T5: TEST_AUDIO not set — skipping timer-reset test"
else
ensure_unloaded
ensure_ready || { fail "T5: model load failed"; }
# Submit a job
SUBMIT=$(curl -sf -X POST "$BASE/jobs" \
-F "audio=@${AUDIO};type=audio/wav" \
-F "task=transcribe" 2>&1)
JOB_ID=$(echo "$SUBMIT" | python3 -c "import sys,json; print(json.load(sys.stdin)['job_id'])" 2>/dev/null || echo "")
if [ -z "$JOB_ID" ]; then
fail "T5: job submission failed"
else
# Wait for job to finish
elapsed=0
while true; do
sleep 5; elapsed=$((elapsed+5))
STATUS=$(curl -sf "$BASE/jobs/$JOB_ID" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
[ "$STATUS" = "done" ] || [ "$STATUS" = "failed" ] && break
[ $elapsed -gt 300 ] && break
done
info "Job finished in ${elapsed}s with status=$STATUS"
# Now wait IDLE_TIMEOUT - 2 seconds — should still be ready
SAFE_WAIT=$((IDLE_TIMEOUT - 2))
[ $SAFE_WAIT -lt 1 ] && SAFE_WAIT=1
info "Waiting ${SAFE_WAIT}s after job completion (less than idle timeout)..."
sleep $SAFE_WAIT
STATE=$(get_state)
[ "$STATE" = "ready" ] && ok "T5a: model still ready ${SAFE_WAIT}s after job completion" \
|| fail "T5a: model unexpectedly $STATE after job"
# Wait for idle timeout
REMAINING=$((IDLE_TIMEOUT - SAFE_WAIT + 3))
info "Waiting ${REMAINING}s more for idle unload..."
sleep $REMAINING
STATE=$(get_state)
[ "$STATE" = "unloaded" ] && ok "T5b: model auto-unloaded after idle period post-job" \
|| fail "T5b: expected unloaded, got $STATE"
fi
fi
# ── Summary ────────────────────────────────────────────────────────────────────
echo ""
echo "=========================================="
echo " Results: ${PASS} passed, ${FAIL} failed"
echo "=========================================="
[ $FAIL -eq 0 ] && echo -e "${GREEN}ALL PASSED${NC}" || { echo -e "${RED}FAILURES: $FAIL${NC}"; exit 1; }