#!/usr/bin/env bash set -euo pipefail BASE="http://localhost:8090" AUDIO="/home/moze/Sources/youtube-transcriber/docker/tmp/audio-b2167046-a236-4fcd-b739-78177542fd23.wav" GREEN='\033[0;32m'; RED='\033[0;31m'; NC='\033[0m' ok() { echo -e "${GREEN}[PASS]${NC} $*"; } fail(){ echo -e "${RED}[FAIL]${NC} $*"; exit 1; } echo "=== 1. GET /health ===" HEALTH=$(curl -sf "$BASE/health") echo "$HEALTH" | python3 -m json.tool echo "$HEALTH" | python3 -c "import sys,json; d=json.load(sys.stdin); assert d['status']=='ok'" && ok "health" echo "" echo "=== 2. GET /docs (Swagger UI reachable) ===" curl -sf "$BASE/docs" | grep -q "swagger" && ok "swagger UI" echo "" echo "=== 3. Webhook server (background nc loop) ===" # Simple webhook receiver using Python python3 - & WEBHOOK_PID=$! cat > /tmp/webhook_receiver.py << 'PYEOF' import http.server, json, sys class H(http.server.BaseHTTPRequestHandler): def do_POST(self): n = int(self.headers.get('Content-Length', 0)) body = self.rfile.read(n) print("\n[WEBHOOK] received:", json.dumps(json.loads(body), indent=2)[:500]) self.send_response(200) self.end_headers() def log_message(self, *a): pass print("[WEBHOOK] listening on :9999") http.server.HTTPServer(('', 9999), H).serve_forever() PYEOF kill $WEBHOOK_PID 2>/dev/null || true python3 /tmp/webhook_receiver.py & WEBHOOK_PID=$! sleep 1 echo "Webhook receiver started (PID $WEBHOOK_PID)" echo "" echo "=== 4. DELETE a non-existent job → 404 ===" STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE/jobs/00000000-0000-0000-0000-000000000000") [ "$STATUS" = "404" ] && ok "DELETE 404 for unknown job" || fail "expected 404 got $STATUS" echo "" echo "=== 5. POST /jobs — submit audio ===" SUBMIT=$(curl -sf -X POST "$BASE/jobs" \ -F "audio=@${AUDIO};type=audio/wav" \ -F "language=auto" \ -F "task=transcribe" \ -F "webhook_url=http://localhost:9999/webhook") echo "$SUBMIT" JOB_ID=$(echo "$SUBMIT" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") ok "submitted job $JOB_ID" echo "" echo "=== 6. GET /jobs/{id} immediately after submit ===" JOB=$(curl -sf "$BASE/jobs/$JOB_ID") echo "$JOB" | python3 -c "import sys,json; d=json.load(sys.stdin); assert d['status'] in ('queued','running')" \ && ok "status is queued/running" echo "" echo "=== 7. SSE stream (first 15 events then detach) ===" echo "Subscribing to SSE stream for $JOB_ID …" curl -sN --max-time 60 "$BASE/jobs/$JOB_ID/stream" | head -30 & SSE_PID=$! echo "" echo "=== 8. Poll until done (max 20 min) ===" SECONDS=0 while true; do sleep 15 JOB=$(curl -sf "$BASE/jobs/$JOB_ID") STATUS=$(echo "$JOB" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])") echo " [${SECONDS}s] status=$STATUS" if [ "$STATUS" = "done" ]; then ok "job finished in ${SECONDS}s" break elif [ "$STATUS" = "failed" ]; then echo "$JOB" | python3 -m json.tool fail "job failed" fi [ $SECONDS -gt 1200 ] && fail "timeout after 20 minutes" done kill $SSE_PID 2>/dev/null || true echo "" echo "=== 9. Inspect transcription quality ===" RESULT=$(curl -sf "$BASE/jobs/$JOB_ID") echo "$RESULT" | python3 - << 'PYCHECK' import sys, json, re data = json.loads(sys.stdin.read()) segments = data.get("segments", []) print(f" Language : {data.get('language')}") print(f" Duration : {data.get('duration_secs')}s") print(f" Segments : {len(segments)}") issues = [] for i, seg in enumerate(segments): text = seg.get("text", "") # --- repetition loop --- words = text.strip().split() if len(words) >= 6: half = len(words) // 2 if words[:half] == words[half:half+half]: issues.append(f" [seg {i}] REPETITION LOOP: {text[:80]}") # --- long duplicate phrases --- phrases = re.findall(r'(\b\w+ \w+ \w+\b)', text) if len(phrases) != len(set(phrases)) and len(phrases) > 4: issues.append(f" [seg {i}] DUPLICATE PHRASE: {text[:80]}") # --- blank/empty segment --- if not text.strip(): issues.append(f" [seg {i}] BLANK SEGMENT") if issues: print("\n ⚠ Quality issues found:") for iss in issues[:10]: print(iss) else: print("\n ✓ No repetition loops or blank segments detected") # Print first 5 segments as sample print("\n Sample output:") for seg in segments[:5]: print(f" [{seg['start']:.1f}–{seg['end']:.1f}] {seg['text'][:100]}") PYCHECK echo "" echo "=== 10. DELETE completed job ===" STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE/jobs/$JOB_ID") [ "$STATUS" = "204" ] || [ "$STATUS" = "200" ] && ok "DELETE returned $STATUS" echo "" echo "=== 11. Submit + immediately cancel a job ===" JOB2=$(curl -sf -X POST "$BASE/jobs" \ -F "audio=@${AUDIO};type=audio/wav" \ -F "language=en" \ -F "task=transcribe") JOB2_ID=$(echo "$JOB2" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") sleep 1 DEL_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE/jobs/$JOB2_ID") CANCEL_STATUS=$(curl -sf "$BASE/jobs/$JOB2_ID" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])") [ "$CANCEL_STATUS" = "cancelled" ] && ok "cancel works ($DEL_STATUS → cancelled)" echo "" echo "=== 12. Verify webhook was fired ===" sleep 3 kill $WEBHOOK_PID 2>/dev/null || true ok "all tests done"