From 78c6fab81be3b7155c8e82e2f599311337ec63f5 Mon Sep 17 00:00:00 2001 From: mozempk Date: Wed, 6 May 2026 12:13:15 +0200 Subject: [PATCH] fix: remove duplicate old test suite and fix step 9 pipe/heredoc bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 9 used 'echo $RESULT | python3 - << HEREDOC' which is a bash gotcha: the heredoc takes over stdin (as the script source), so the pipe is silently ignored and sys.stdin.read() returns empty string → JSONDecodeError. Fix: write RESULT to a temp file and pass it as sys.argv[1] to the script. Also removed the old buggy test suite that was accidentally left appended at lines 181-327 (had language=auto, ['id'] field, wrong DELETE assertion). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- test_all.sh | 160 ++++------------------------------------------------ 1 file changed, 10 insertions(+), 150 deletions(-) mode change 100755 => 100644 test_all.sh diff --git a/test_all.sh b/test_all.sh old mode 100755 new mode 100644 index 5945728..c6a1621 --- a/test_all.sh +++ b/test_all.sh @@ -104,10 +104,15 @@ kill $SSE_PID 2>/dev/null || true echo "" echo "=== 9. Inspect transcription quality ===" RESULT=$(curl -sf "$BASE/jobs/$JOB_ID") -echo "$RESULT" | python3 - << 'PYCHECK' +# Note: can't pipe into a heredoc-driven python3 (heredoc takes stdin, pipe is ignored). +# Write to a temp file instead. +TMPJSON=$(mktemp /tmp/whisper_test_XXXXXX.json) +echo "$RESULT" > "$TMPJSON" +python3 - "$TMPJSON" << 'PYCHECK' import sys, json, re -data = json.loads(sys.stdin.read()) +with open(sys.argv[1]) as f: + data = json.load(f) segments = data.get("segments", []) print(f" Language : {data.get('language')}") print(f" Duration : {data.get('duration_secs')}s") @@ -142,7 +147,9 @@ print("\n Sample output (first 5 segments):") for seg in segments[:5]: print(f" [{seg['start']:.1f}–{seg['end']:.1f}] {seg['text'][:100]}") PYCHECK -ok "quality check passed" +PYEXIT=$? +rm -f "$TMPJSON" +[ $PYEXIT -eq 0 ] && ok "quality check passed" || { echo "[FAIL] quality check"; FAILS=$((FAILS+1)); } echo "" echo "=== 10. DELETE completed job → 200 ===" @@ -171,150 +178,3 @@ sleep 3 kill $WEBHOOK_PID 2>/dev/null || true ok "all tests complete" -echo "=== 1. GET /health ===" -HEALTH=$(curl -sf "$BASE/health") -echo "$HEALTH" | python3 -m json.tool -echo "$HEALTH" | python3 -c "import sys,json; d=json.load(sys.stdin); assert d['status']=='ok'" && ok "health" - -echo "" -echo "=== 2. GET /docs (Swagger UI reachable) ===" -curl -sf "$BASE/docs" | grep -q "swagger" && ok "swagger UI" - -echo "" -echo "=== 3. Webhook server (background nc loop) ===" -# Simple webhook receiver using Python -python3 - & -WEBHOOK_PID=$! -cat > /tmp/webhook_receiver.py << 'PYEOF' -import http.server, json, sys - -class H(http.server.BaseHTTPRequestHandler): - def do_POST(self): - n = int(self.headers.get('Content-Length', 0)) - body = self.rfile.read(n) - print("\n[WEBHOOK] received:", json.dumps(json.loads(body), indent=2)[:500]) - self.send_response(200) - self.end_headers() - def log_message(self, *a): pass - -print("[WEBHOOK] listening on :9999") -http.server.HTTPServer(('', 9999), H).serve_forever() -PYEOF -kill $WEBHOOK_PID 2>/dev/null || true -python3 /tmp/webhook_receiver.py & -WEBHOOK_PID=$! -sleep 1 -echo "Webhook receiver started (PID $WEBHOOK_PID)" - -echo "" -echo "=== 4. DELETE a non-existent job → 404 ===" -STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE/jobs/00000000-0000-0000-0000-000000000000") -[ "$STATUS" = "404" ] && ok "DELETE 404 for unknown job" || fail "expected 404 got $STATUS" - -echo "" -echo "=== 5. POST /jobs — submit audio ===" -SUBMIT=$(curl -sf -X POST "$BASE/jobs" \ - -F "audio=@${AUDIO};type=audio/wav" \ - -F "language=auto" \ - -F "task=transcribe" \ - -F "webhook_url=http://localhost:9999/webhook") -echo "$SUBMIT" -JOB_ID=$(echo "$SUBMIT" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") -ok "submitted job $JOB_ID" - -echo "" -echo "=== 6. GET /jobs/{id} immediately after submit ===" -JOB=$(curl -sf "$BASE/jobs/$JOB_ID") -echo "$JOB" | python3 -c "import sys,json; d=json.load(sys.stdin); assert d['status'] in ('queued','running')" \ - && ok "status is queued/running" - -echo "" -echo "=== 7. SSE stream (first 15 events then detach) ===" -echo "Subscribing to SSE stream for $JOB_ID …" -curl -sN --max-time 60 "$BASE/jobs/$JOB_ID/stream" | head -30 & -SSE_PID=$! - -echo "" -echo "=== 8. Poll until done (max 20 min) ===" -SECONDS=0 -while true; do - sleep 15 - JOB=$(curl -sf "$BASE/jobs/$JOB_ID") - STATUS=$(echo "$JOB" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])") - echo " [${SECONDS}s] status=$STATUS" - if [ "$STATUS" = "done" ]; then - ok "job finished in ${SECONDS}s" - break - elif [ "$STATUS" = "failed" ]; then - echo "$JOB" | python3 -m json.tool - fail "job failed" - fi - [ $SECONDS -gt 1200 ] && fail "timeout after 20 minutes" -done -kill $SSE_PID 2>/dev/null || true - -echo "" -echo "=== 9. Inspect transcription quality ===" -RESULT=$(curl -sf "$BASE/jobs/$JOB_ID") -echo "$RESULT" | python3 - << 'PYCHECK' -import sys, json, re - -data = json.loads(sys.stdin.read()) -segments = data.get("segments", []) -print(f" Language : {data.get('language')}") -print(f" Duration : {data.get('duration_secs')}s") -print(f" Segments : {len(segments)}") - -issues = [] - -for i, seg in enumerate(segments): - text = seg.get("text", "") - # --- repetition loop --- - words = text.strip().split() - if len(words) >= 6: - half = len(words) // 2 - if words[:half] == words[half:half+half]: - issues.append(f" [seg {i}] REPETITION LOOP: {text[:80]}") - # --- long duplicate phrases --- - phrases = re.findall(r'(\b\w+ \w+ \w+\b)', text) - if len(phrases) != len(set(phrases)) and len(phrases) > 4: - issues.append(f" [seg {i}] DUPLICATE PHRASE: {text[:80]}") - # --- blank/empty segment --- - if not text.strip(): - issues.append(f" [seg {i}] BLANK SEGMENT") - -if issues: - print("\n ⚠ Quality issues found:") - for iss in issues[:10]: - print(iss) -else: - print("\n ✓ No repetition loops or blank segments detected") - -# Print first 5 segments as sample -print("\n Sample output:") -for seg in segments[:5]: - print(f" [{seg['start']:.1f}–{seg['end']:.1f}] {seg['text'][:100]}") -PYCHECK - -echo "" -echo "=== 10. DELETE completed job ===" -STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE/jobs/$JOB_ID") -[ "$STATUS" = "204" ] || [ "$STATUS" = "200" ] && ok "DELETE returned $STATUS" - -echo "" -echo "=== 11. Submit + immediately cancel a job ===" -JOB2=$(curl -sf -X POST "$BASE/jobs" \ - -F "audio=@${AUDIO};type=audio/wav" \ - -F "language=en" \ - -F "task=transcribe") -JOB2_ID=$(echo "$JOB2" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") -sleep 1 -DEL_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE/jobs/$JOB2_ID") -CANCEL_STATUS=$(curl -sf "$BASE/jobs/$JOB2_ID" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])") -[ "$CANCEL_STATUS" = "cancelled" ] && ok "cancel works ($DEL_STATUS → cancelled)" - -echo "" -echo "=== 12. Verify webhook was fired ===" -sleep 3 -kill $WEBHOOK_PID 2>/dev/null || true -ok "all tests done"