feat: GPU-accelerated Whisper API for RTX 2080 (sm_75)
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 11m13s

- Pure Rust: Axum 0.7 + whisper-rs 0.13 (CUDA FFI)
- Async job queue with SSE progress streaming
- Webhook delivery with 5x exponential backoff
- Disk-persisted job state (survives restarts)
- Anti-hallucination params: no_speech_thold, entropy_thold, suppress_blank
- CUDA sm_75 flags: GGML_CUDA_FORCE_MMQ, GGML_CUDA_GRAPHS, GGML_CUDA_FA_ALL_QUANTS
- Configurable via env: CUDA_DEVICE, WHISPER_MODEL_PATH, PORT, DATA_DIR
- Gitea Actions CI: build + push to git.sal.giize.com registry
- Multi-stage Dockerfile with customizable CUDA_VERSION ARG

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
mozempk
2026-05-05 22:47:24 +02:00
commit 16cb6ca661
18 changed files with 1898 additions and 0 deletions

155
test_all.sh Executable file
View File

@@ -0,0 +1,155 @@
#!/usr/bin/env bash
set -euo pipefail
BASE="http://localhost:8090"
AUDIO="/home/moze/Sources/youtube-transcriber/docker/tmp/audio-b2167046-a236-4fcd-b739-78177542fd23.wav"
GREEN='\033[0;32m'; RED='\033[0;31m'; NC='\033[0m'
ok() { echo -e "${GREEN}[PASS]${NC} $*"; }
fail(){ echo -e "${RED}[FAIL]${NC} $*"; exit 1; }
echo "=== 1. GET /health ==="
HEALTH=$(curl -sf "$BASE/health")
echo "$HEALTH" | python3 -m json.tool
echo "$HEALTH" | python3 -c "import sys,json; d=json.load(sys.stdin); assert d['status']=='ok'" && ok "health"
echo ""
echo "=== 2. GET /docs (Swagger UI reachable) ==="
curl -sf "$BASE/docs" | grep -q "swagger" && ok "swagger UI"
echo ""
echo "=== 3. Webhook server (background nc loop) ==="
# Simple webhook receiver using Python
python3 - &
WEBHOOK_PID=$!
cat > /tmp/webhook_receiver.py << 'PYEOF'
import http.server, json, sys
class H(http.server.BaseHTTPRequestHandler):
def do_POST(self):
n = int(self.headers.get('Content-Length', 0))
body = self.rfile.read(n)
print("\n[WEBHOOK] received:", json.dumps(json.loads(body), indent=2)[:500])
self.send_response(200)
self.end_headers()
def log_message(self, *a): pass
print("[WEBHOOK] listening on :9999")
http.server.HTTPServer(('', 9999), H).serve_forever()
PYEOF
kill $WEBHOOK_PID 2>/dev/null || true
python3 /tmp/webhook_receiver.py &
WEBHOOK_PID=$!
sleep 1
echo "Webhook receiver started (PID $WEBHOOK_PID)"
echo ""
echo "=== 4. DELETE a non-existent job → 404 ==="
STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE/jobs/00000000-0000-0000-0000-000000000000")
[ "$STATUS" = "404" ] && ok "DELETE 404 for unknown job" || fail "expected 404 got $STATUS"
echo ""
echo "=== 5. POST /jobs — submit audio ==="
SUBMIT=$(curl -sf -X POST "$BASE/jobs" \
-F "audio=@${AUDIO};type=audio/wav" \
-F "language=auto" \
-F "task=transcribe" \
-F "webhook_url=http://localhost:9999/webhook")
echo "$SUBMIT"
JOB_ID=$(echo "$SUBMIT" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")
ok "submitted job $JOB_ID"
echo ""
echo "=== 6. GET /jobs/{id} immediately after submit ==="
JOB=$(curl -sf "$BASE/jobs/$JOB_ID")
echo "$JOB" | python3 -c "import sys,json; d=json.load(sys.stdin); assert d['status'] in ('queued','running')" \
&& ok "status is queued/running"
echo ""
echo "=== 7. SSE stream (first 15 events then detach) ==="
echo "Subscribing to SSE stream for $JOB_ID"
curl -sN --max-time 60 "$BASE/jobs/$JOB_ID/stream" | head -30 &
SSE_PID=$!
echo ""
echo "=== 8. Poll until done (max 20 min) ==="
SECONDS=0
while true; do
sleep 15
JOB=$(curl -sf "$BASE/jobs/$JOB_ID")
STATUS=$(echo "$JOB" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
echo " [${SECONDS}s] status=$STATUS"
if [ "$STATUS" = "done" ]; then
ok "job finished in ${SECONDS}s"
break
elif [ "$STATUS" = "failed" ]; then
echo "$JOB" | python3 -m json.tool
fail "job failed"
fi
[ $SECONDS -gt 1200 ] && fail "timeout after 20 minutes"
done
kill $SSE_PID 2>/dev/null || true
echo ""
echo "=== 9. Inspect transcription quality ==="
RESULT=$(curl -sf "$BASE/jobs/$JOB_ID")
echo "$RESULT" | python3 - << 'PYCHECK'
import sys, json, re
data = json.loads(sys.stdin.read())
segments = data.get("segments", [])
print(f" Language : {data.get('language')}")
print(f" Duration : {data.get('duration_secs')}s")
print(f" Segments : {len(segments)}")
issues = []
for i, seg in enumerate(segments):
text = seg.get("text", "")
# --- repetition loop ---
words = text.strip().split()
if len(words) >= 6:
half = len(words) // 2
if words[:half] == words[half:half+half]:
issues.append(f" [seg {i}] REPETITION LOOP: {text[:80]}")
# --- long duplicate phrases ---
phrases = re.findall(r'(\b\w+ \w+ \w+\b)', text)
if len(phrases) != len(set(phrases)) and len(phrases) > 4:
issues.append(f" [seg {i}] DUPLICATE PHRASE: {text[:80]}")
# --- blank/empty segment ---
if not text.strip():
issues.append(f" [seg {i}] BLANK SEGMENT")
if issues:
print("\n ⚠ Quality issues found:")
for iss in issues[:10]:
print(iss)
else:
print("\n ✓ No repetition loops or blank segments detected")
# Print first 5 segments as sample
print("\n Sample output:")
for seg in segments[:5]:
print(f" [{seg['start']:.1f}{seg['end']:.1f}] {seg['text'][:100]}")
PYCHECK
echo ""
echo "=== 10. DELETE completed job ==="
STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE/jobs/$JOB_ID")
[ "$STATUS" = "204" ] || [ "$STATUS" = "200" ] && ok "DELETE returned $STATUS"
echo ""
echo "=== 11. Submit + immediately cancel a job ==="
JOB2=$(curl -sf -X POST "$BASE/jobs" \
-F "audio=@${AUDIO};type=audio/wav" \
-F "language=en" \
-F "task=transcribe")
JOB2_ID=$(echo "$JOB2" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")
sleep 1
DEL_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X DELETE "$BASE/jobs/$JOB2_ID")
CANCEL_STATUS=$(curl -sf "$BASE/jobs/$JOB2_ID" | python3 -c "import sys,json; print(json.load(sys.stdin)['status'])")
[ "$CANCEL_STATUS" = "cancelled" ] && ok "cancel works ($DEL_STATUS → cancelled)"
echo ""
echo "=== 12. Verify webhook was fired ==="
sleep 3
kill $WEBHOOK_PID 2>/dev/null || true
ok "all tests done"