Files
whisper-rtx2080/test_all.sh
mozempk b191fbe200
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 8m41s
feat: dynamic model loading/unloading with GPU polling
- Model starts unloaded (lazy); loads on first job or POST /model/load
- Auto-unloads after IDLE_TIMEOUT_SECS (default 300) of inactivity
- POST /model/unload for immediate manual release
- GPU-busy detection: on VRAM OOM, enters WaitingForGpu and retries
  every GPU_POLL_INTERVAL_SECS (default 30) indefinitely
- POST /jobs when unloaded → 503 + Retry-After header, triggers load
- AppError::OutOfMemory and AppError::ModelNotReady variants
- WorkerCmd channel (SyncSender<WorkerCmd>) replaces bare tx_req channel
- Idle timer via recv_timeout(1s) tick inside OS thread (no extra thread)
- Model lifecycle events broadcast via tokio broadcast channel (SSE + webhooks)
- webhook_registry: all clients that ever submitted a webhook_url receive
  model_ready and model_unloaded webhooks
- GPU warmup retained on every (re)load

New routes:
  GET  /model/status  — current state + VRAM stats
  POST /model/load    — trigger load (idempotent)
  POST /model/unload  — immediate unload
  GET  /model/events  — SSE stream of model lifecycle events

New env vars:
  IDLE_TIMEOUT_SECS       (default 300)
  GPU_POLL_INTERVAL_SECS  (default 30)

Tests:
  tests/test_model_lifecycle.sh — 18 integration tests (full state machine,
    SSE events, webhooks, concurrency, unload-during-load)
  tests/test_idle_timeout.sh    — 5 tests with short IDLE_TIMEOUT_SECS=5
  test_all.sh updated: loads model before job submission, asserts
    model_state in /health, adds POST /model/unload at end

Docs:
  docs/USAGE.md: model lifecycle section, new env vars, 503 retry pattern,
    updated /health response shape

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-08 17:57:20 +02:00

230 lines
7.9 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
set -euo pipefail
# ── Config — override via env vars ───────────────────────────────────────────
BASE="${WHISPER_BASE_URL:-http://localhost:8080}"
AUDIO="${TEST_AUDIO:-/home/moze/Sources/youtube-transcriber/docker/tmp/audio-b2167046-a236-4fcd-b739-78177542fd23.wav}"
GREEN='\033[0;32m'; RED='\033[0;31m'; NC='\033[0m'
FAILS=0
ok() { echo -e "${GREEN}[PASS]${NC} $*"; }
fail(){ echo -e "${RED}[FAIL]${NC} $*"; FAILS=$((FAILS + 1)); }
echo "=== Whisper API test suite ==="
echo " BASE : $BASE"
echo " AUDIO : $AUDIO"
echo ""
echo "=== 1. GET /health ==="
HEALTH=$(curl -sf "$BASE/health")
echo "$HEALTH" | python3 -m json.tool
python3 -c "
import sys, json
d = json.loads('$HEALTH' if False else sys.stdin.read())
assert d['status'] == 'ok', f'status={d[\"status\"]}'
assert 'model_state' in d, 'model_state field missing from health response'
" <<< "$HEALTH" && ok "health ok + model_state present" || fail "health check"
echo ""
echo "=== 2. GET /docs (Swagger UI reachable) ==="
curl -sf "$BASE/docs" | grep -qi "swagger" && ok "swagger UI reachable" || fail "swagger UI"
echo ""
echo "=== 3. Webhook receiver (background Python HTTP server) ==="
cat > /tmp/webhook_receiver.py << 'PYEOF'
import http.server, json, sys, signal
class H(http.server.BaseHTTPRequestHandler):
def do_POST(self):
n = int(self.headers.get('Content-Length', 0))
body = self.rfile.read(n)
data = json.loads(body)
print(f"\n[WEBHOOK] status={data.get('status')} segments={len(data.get('segments', []))}", flush=True)
self.send_response(200)
self.end_headers()
def log_message(self, *a): pass
signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
print("[WEBHOOK] listening on :9999", flush=True)
http.server.HTTPServer(('', 9999), H).serve_forever()
PYEOF
python3 /tmp/webhook_receiver.py &
WEBHOOK_PID=$!
sleep 1
echo "Webhook receiver started (PID $WEBHOOK_PID)"
echo ""
echo "=== 4. GET /model/status — expect unloaded on fresh start ==="
MODEL_STATUS=$(curl -sf "$BASE/model/status")
echo "$MODEL_STATUS" | python3 -m json.tool
echo "$MODEL_STATUS" | python3 -c "
import sys, json
d = json.load(sys.stdin)
assert 'state' in d, 'state field missing from /model/status'
print(f' model state: {d[\"state\"]}')
" && ok "/model/status has state field" || fail "/model/status schema"
echo ""
echo "=== 5. POST /model/load — trigger model load ==="
LOAD_RESP=$(curl -sf -X POST "$BASE/model/load")
echo "$LOAD_RESP"
ok "POST /model/load accepted"
echo ""
echo "=== 6. Poll /model/status until ready (max 3 min) ==="
LOAD_ELAPSED=0
while true; do
sleep 5
LOAD_ELAPSED=$((LOAD_ELAPSED + 5))
MS=$(curl -sf "$BASE/model/status")
STATE=$(echo "$MS" | python3 -c "import sys,json; print(json.load(sys.stdin)['state'])")
echo " [${LOAD_ELAPSED}s] model_state=${STATE}"
if [ "$STATE" = "ready" ]; then
ok "model loaded and ready in ${LOAD_ELAPSED}s"
break
fi
[ $LOAD_ELAPSED -gt 180 ] && { fail "model failed to load within 3 minutes"; break; }
done
echo ""
echo "=== 7. DELETE a non-existent job → 404 ==="
STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE/jobs/00000000-0000-0000-0000-000000000000")
[ "$STATUS" = "404" ] && ok "DELETE unknown job → 404" || fail "expected 404, got $STATUS"
echo ""
echo "=== 8. POST /jobs — submit audio ==="
SUBMIT=$(curl -sf -X POST "$BASE/jobs" \
-F "audio=@${AUDIO};type=audio/wav" \
-F "task=transcribe" \
-F "webhook_url=http://localhost:9999/webhook")
echo "$SUBMIT"
JOB_ID=$(echo "$SUBMIT" | python3 -c "import sys,json; print(json.load(sys.stdin)['job_id'])")
ok "submitted job $JOB_ID"
echo ""
echo "=== 9. GET /jobs/{id} immediately after submit ==="
JOB=$(curl -sf "$BASE/jobs/$JOB_ID")
echo "$JOB" | python3 -c "
import sys, json
d = json.load(sys.stdin)
assert d['status'] in ('queued', 'running'), f'unexpected status: {d[\"status\"]}'
" && ok "status is queued/running" || fail "initial status check"
echo ""
echo "=== 10. SSE stream (observe first 30 events then detach) ==="
echo "Subscribing to SSE stream for $JOB_ID"
curl -sN --max-time 90 "$BASE/jobs/$JOB_ID/stream" | head -60 &
SSE_PID=$!
echo ""
echo "=== 11. Poll until done (max 20 min) ==="
ELAPSED=0
while true; do
sleep 15
ELAPSED=$((ELAPSED + 15))
JOB=$(curl -sf "$BASE/jobs/$JOB_ID")
STATUS=$(echo "$JOB" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
PROGRESS=$(echo "$JOB" | python3 -c "import sys,json; print(json.load(sys.stdin).get('progress',0))")
echo " [${ELAPSED}s] status=$STATUS progress=${PROGRESS}%"
if [ "$STATUS" = "done" ]; then
ok "job finished in ${ELAPSED}s"
break
elif [ "$STATUS" = "failed" ]; then
echo "$JOB" | python3 -m json.tool
fail "job failed"
break
fi
[ $ELAPSED -gt 1200 ] && { fail "timeout after 20 minutes"; break; }
done
kill $SSE_PID 2>/dev/null || true
echo ""
echo "=== 12. Inspect transcription quality ==="
RESULT=$(curl -sf "$BASE/jobs/$JOB_ID")
TMPJSON=$(mktemp /tmp/whisper_test_XXXXXX.json)
echo "$RESULT" > "$TMPJSON"
python3 - "$TMPJSON" << 'PYCHECK'
import sys, json, re
with open(sys.argv[1]) as f:
data = json.load(f)
segments = data.get("segments", [])
print(f" Language : {data.get('language')}")
print(f" Duration : {data.get('duration_secs')}s")
print(f" Segments : {len(segments)}")
if not segments:
print(" ✗ ZERO SEGMENTS — transcription likely failed silently")
sys.exit(1)
issues = []
for i, seg in enumerate(segments):
text = seg.get("text", "")
words = text.strip().split()
if len(words) >= 6:
half = len(words) // 2
if words[:half] == words[half:half+half]:
issues.append(f" [seg {i}] REPETITION LOOP: {text[:80]}")
phrases = re.findall(r'(\b\w+ \w+ \w+\b)', text)
if len(phrases) != len(set(phrases)) and len(phrases) > 4:
issues.append(f" [seg {i}] DUPLICATE PHRASE: {text[:80]}")
if not text.strip():
issues.append(f" [seg {i}] BLANK SEGMENT")
if issues:
print("\n ⚠ Quality issues found:")
for iss in issues[:10]:
print(iss)
else:
print("\n ✓ No repetition loops or blank segments detected")
print("\n Sample output (first 5 segments):")
for seg in segments[:5]:
print(f" [{seg['start']:.1f}{seg['end']:.1f}] {seg['text'][:100]}")
PYCHECK
PYEXIT=$?
rm -f "$TMPJSON"
[ $PYEXIT -eq 0 ] && ok "quality check passed" || fail "quality check"
echo ""
echo "=== 13. DELETE completed job → 409 Conflict ==="
DEL_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE/jobs/$JOB_ID")
[ "$DEL_STATUS" = "409" ] && ok "DELETE completed job → 409 Conflict (expected)" \
|| echo " [INFO] DELETE returned $DEL_STATUS"
echo ""
echo "=== 14. Submit + cancel a queued job ==="
JOB2=$(curl -sf -X POST "$BASE/jobs" \
-F "audio=@${AUDIO};type=audio/wav" \
-F "language=en" \
-F "task=transcribe")
JOB2_ID=$(echo "$JOB2" | python3 -c "import sys,json; print(json.load(sys.stdin)['job_id'])")
sleep 1
curl -s -X DELETE "$BASE/jobs/$JOB2_ID" > /dev/null
CANCEL_STATUS=$(curl -sf "$BASE/jobs/$JOB2_ID" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
[ "$CANCEL_STATUS" = "cancelled" ] && ok "cancel works → status=cancelled" \
|| echo " [INFO] cancel status: $CANCEL_STATUS (may be running — worker ignores cancel mid-chunk)"
echo ""
echo "=== 15. POST /model/unload ==="
UNLOAD_RESP=$(curl -sf -X POST "$BASE/model/unload")
echo "$UNLOAD_RESP"
sleep 2
UNLOAD_STATE=$(curl -sf "$BASE/model/status" | python3 -c "import sys,json; print(json.load(sys.stdin)['state'])")
[ "$UNLOAD_STATE" = "unloaded" ] && ok "model unloaded → state=unloaded" \
|| echo " [INFO] state after unload: $UNLOAD_STATE"
echo ""
echo "=== 16. Verify webhook fired ==="
sleep 3
kill $WEBHOOK_PID 2>/dev/null || true
ok "webhook server stopped"
echo ""
if [ $FAILS -eq 0 ]; then
echo -e "${GREEN}=== ALL TESTS PASSED ===${NC}"
else
echo -e "${RED}=== $FAILS TEST(S) FAILED ===${NC}"
exit 1
fi