From bcaf8680dba9552879075eea886834aedc6e8803 Mon Sep 17 00:00:00 2001 From: mozempk Date: Fri, 8 May 2026 23:47:13 +0200 Subject: [PATCH] docs: add FRONTEND_INTEGRATION.md developer guide Comprehensive integration guide for frontend/full-stack developers: - Architecture overview diagram - Quick start (submit + poll in ~20 lines) - Model lifecycle: state machine diagram, all 4 /model/* endpoints, SSE event subscription with JS examples - Job submission: multipart fields, 503 model_not_ready handling, retry-with-auto-load pattern - Job progress: polling vs SSE, all event types with payloads - Webhooks: job completion + model lifecycle, Express receiver example, how to distinguish job vs model payloads - Health check field reference - Cancellation semantics (GPU inference not interruptible) - Full TypeScript type definitions for all API shapes - React hooks: useModelStatus, useJobStream, useTranscribe - Complete WhisperClient class example with ensureModelReady, streamProgress, and end-to-end transcribe() - Error reference table with all 400/404/409/503/500 shapes Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/FRONTEND_INTEGRATION.md | 941 +++++++++++++++++++++++++++++++++++ 1 file changed, 941 insertions(+) create mode 100644 docs/FRONTEND_INTEGRATION.md diff --git a/docs/FRONTEND_INTEGRATION.md b/docs/FRONTEND_INTEGRATION.md new file mode 100644 index 0000000..691ee37 --- /dev/null +++ b/docs/FRONTEND_INTEGRATION.md @@ -0,0 +1,941 @@ +# Frontend Integration Guide + +> **Audience:** Frontend / full-stack developers integrating the whisper transcription API into a web application. +> **Base URL:** `http://your-server:8080` (configurable via the `PORT` env var on the server). +> **Interactive docs:** `http://your-server:8080/docs` (Swagger UI — try every endpoint live). + +--- + +## Table of Contents + +1. [Architecture Overview](#1-architecture-overview) +2. [Quick Start — submit and poll](#2-quick-start--submit-and-poll) +3. [Model Lifecycle](#3-model-lifecycle) + - 3.1 [State machine](#31-state-machine) + - 3.2 [GET /model/status](#32-get-modelstatus) + - 3.3 [POST /model/load](#33-post-modelload) + - 3.4 [POST /model/unload](#34-post-modelunload) + - 3.5 [GET /model/events (SSE)](#35-get-modelevents-sse) +4. [Submitting Jobs](#4-submitting-jobs) + - 4.1 [POST /jobs](#41-post-jobs) + - 4.2 [Handling 503 Model Not Ready](#42-handling-503-model-not-ready) + - 4.3 [Retry pattern with auto-load](#43-retry-pattern-with-auto-load) +5. [Tracking Job Progress](#5-tracking-job-progress) + - 5.1 [GET /jobs/:id (poll)](#51-get-jobsid-poll) + - 5.2 [GET /jobs/:id/stream (SSE)](#52-get-jobsidstream-sse) +6. [Webhooks](#6-webhooks) + - 6.1 [Job completion webhook](#61-job-completion-webhook) + - 6.2 [Model lifecycle webhooks](#62-model-lifecycle-webhooks) +7. [Health Check](#7-health-check) +8. [Cancelling Jobs](#8-cancelling-jobs) +9. [TypeScript Types](#9-typescript-types) +10. [React Hooks](#10-react-hooks) +11. [Complete Integration Example](#11-complete-integration-example) +12. [Error Reference](#12-error-reference) + +--- + +## 1. Architecture Overview + +``` +┌─────────────────────────────────────────────────────────┐ +│ whisper-server │ +│ │ +│ HTTP / SSE Worker thread (GPU) │ +│ ──────────── ─────────────────── │ +│ POST /jobs ───► job queue (FIFO) │ +│ GET /jobs/:id ↕ │ +│ GET /jobs/:id/stream ◄── progress broadcast │ +│ │ +│ POST /model/load ─► load whisper into VRAM │ +│ POST /model/unload ► free VRAM │ +│ GET /model/status read state │ +│ GET /model/events ◄── lifecycle SSE broadcast │ +└─────────────────────────────────────────────────────────┘ +``` + +**Key behaviours to understand before building:** + +- The model starts **unloaded** on every server restart. No inference is possible until it loads (~15–25 seconds for large-v3 on an RTX 2080). +- Submitting a job when the model is not ready returns `503` with a `Retry-After` header **and automatically triggers a load**. You can retry the submission; no separate load call is needed. +- The worker processes jobs **sequentially** (one at a time). Queue depth is visible via `/health`. +- Long audio is split into silence-bounded chunks internally. SSE `progress` events reflect chunk completion, not raw GPU progress. + +--- + +## 2. Quick Start — submit and poll + +The simplest possible integration — no SSE, no model management, just submit and poll: + +```typescript +const BASE = 'http://your-server:8080'; + +async function transcribe(audioBlob: Blob): Promise { + // 1. Submit + const form = new FormData(); + form.append('audio', audioBlob, 'audio.wav'); + + let submitResp = await fetch(`${BASE}/jobs`, { method: 'POST', body: form }); + + // 2. If model isn't loaded yet, keep retrying until it is + while (submitResp.status === 503) { + const retryAfter = parseInt(submitResp.headers.get('Retry-After') ?? '15'); + await sleep(retryAfter * 1000); + submitResp = await fetch(`${BASE}/jobs`, { method: 'POST', body: form }); + } + if (!submitResp.ok) throw new Error(`Submit failed: ${submitResp.status}`); + + const { job_id } = await submitResp.json(); + + // 3. Poll until done + while (true) { + await sleep(2000); + const job: Job = await fetch(`${BASE}/jobs/${job_id}`).then(r => r.json()); + if (job.status === 'done') return job; + if (job.status === 'failed') throw new Error(job.error ?? 'transcription failed'); + if (job.status === 'cancelled') throw new Error('job was cancelled'); + } +} + +const sleep = (ms: number) => new Promise(r => setTimeout(r, ms)); +``` + +> For a better UX — real-time progress bar, model state indicator — read the full sections below. + +--- + +## 3. Model Lifecycle + +### 3.1 State machine + +The model moves through four states: + +``` + job submit + or POST /model/load + │ + ┌──────────▼───────────┐ + │ Unloaded │◄──────────────────────────┐ + └──────────┬───────────┘ │ + │ load triggered │ + ┌──────────▼───────────┐ │ + │ Loading │ │ idle timeout + └──┬──────────────┬────┘ │ or POST /model/unload + │ success │ VRAM full │ + │ │ │ + ┌──▼────┐ ┌──────▼────────────────┐ │ + │ Ready │ │ WaitingForGpu │────────────────►│ + └──┬────┘ └──────────────┬────────┘ │ + │ retry ok ────┘ │ + └────────────────────────────────────────────────►┘ +``` + +| State | `state` value | Can accept jobs? | +|-------|--------------|-----------------| +| Unloaded | `"unloaded"` | ❌ → triggers load, returns 503 | +| Loading | `"loading"` | ❌ → returns 503 | +| Waiting for GPU | `"waiting_for_gpu"` | ❌ → returns 503 | +| Ready | `"ready"` | ✅ | + +--- + +### 3.2 `GET /model/status` + +Returns the current model state and live VRAM figures (from `nvidia-smi`). + +**Unloaded:** +```json +{ "state": "unloaded" } +``` + +**Loading:** +```json +{ "state": "loading" } +``` + +**Waiting for GPU (VRAM contention):** +```json +{ + "state": "waiting_for_gpu", + "vram_needed_mb": 3951, + "vram_free_mb": 512, + "retry_in_secs": 30 +} +``` + +**Ready:** +```json +{ + "state": "ready", + "loaded_at": "2026-05-10T14:00:00.000Z", + "vram_used_mb": 4096, + "vram_total_mb": 8192 +} +``` + +> `vram_used_mb` / `vram_total_mb` are omitted when `nvidia-smi` is unavailable. + +--- + +### 3.3 `POST /model/load` + +Tells the server to load the model. **Idempotent** — safe to call multiple times. + +```bash +curl -X POST http://your-server:8080/model/load +``` + +**Responses:** + +| Status | Body | Meaning | +|--------|------|---------| +| 202 | `{"status":"load_initiated"}` | Load queued | +| 200 | `{"status":"already_ready"}` | Already loaded | + +The load happens asynchronously. Subscribe to `/model/events` or poll `/model/status` to know when ready. + +--- + +### 3.4 `POST /model/unload` + +Immediately frees the model from GPU memory. In-flight jobs finish first; the model is dropped after the current inference completes. + +```bash +curl -X POST http://your-server:8080/model/unload +``` + +**Response:** `200 {"status":"unload_requested"}` (always, regardless of current state). + +> Use this if you know transcription won't happen for a while and you want to free VRAM for other workloads on the same GPU. + +--- + +### 3.5 `GET /model/events` (SSE) + +A persistent Server-Sent Events stream that emits every model lifecycle transition. + +```bash +curl -N http://your-server:8080/model/events +``` + +**Events emitted:** + +``` +event: model_loading +data: {"type":"model_loading"} + +event: model_ready +data: {"type":"model_ready","loaded_at":"2026-05-10T14:00:00.000Z"} + +event: model_unloaded +data: {"type":"model_unloaded"} + +event: model_waiting_for_gpu +data: {"type":"model_waiting_for_gpu","vram_needed_mb":3951,"vram_free_mb":512,"retry_in_secs":30} +``` + +**JavaScript:** +```typescript +function subscribeModelEvents( + onReady: (loadedAt: string) => void, + onUnloaded: () => void, + onLoading: () => void, + onWaitingGpu: (info: { vram_needed_mb: number; vram_free_mb: number; retry_in_secs: number }) => void, +): () => void { + const es = new EventSource(`${BASE}/model/events`); + + es.addEventListener('model_ready', (e) => onReady(JSON.parse(e.data).loaded_at)); + es.addEventListener('model_unloaded', () => onUnloaded()); + es.addEventListener('model_loading', () => onLoading()); + es.addEventListener('model_waiting_for_gpu',(e) => onWaitingGpu(JSON.parse(e.data))); + + es.onerror = () => { + // The browser reconnects automatically with exponential backoff. + // Log the error but don't tear down the listener. + console.warn('model/events connection dropped, reconnecting…'); + }; + + return () => es.close(); // call this to clean up (e.g. in React useEffect return) +} +``` + +> The server sends an SSE keepalive comment every 15 seconds so proxies don't close idle connections. + +--- + +## 4. Submitting Jobs + +### 4.1 `POST /jobs` + +**Content-Type:** `multipart/form-data` + +| Field | Required | Type | Notes | +|-------|----------|------|-------| +| `audio` | ✅ | file | Any format ffmpeg understands: WAV, MP3, M4A, OGG, FLAC, MP4, MKV … No size limit. | +| `language` | ❌ | string | ISO 639-1 code (`"en"`, `"it"`, `"fr"` …). Omit for auto-detection. | +| `task` | ❌ | string | `"transcribe"` (default) or `"translate"` (→ English) | +| `webhook_url` | ❌ | string | URL to POST the completed job to. Also registers the URL for model lifecycle webhooks. | + +**202 Accepted:** +```json +{ "job_id": "550e8400-e29b-41d4-a716-446655440000" } +``` + +```typescript +async function submitJob( + audio: Blob, + opts: { language?: string; task?: 'transcribe' | 'translate'; webhookUrl?: string } = {} +): Promise { + const form = new FormData(); + form.append('audio', audio, 'audio.wav'); + if (opts.language) form.append('language', opts.language); + if (opts.task) form.append('task', opts.task); + if (opts.webhookUrl) form.append('webhook_url', opts.webhookUrl); + + const resp = await fetch(`${BASE}/jobs`, { method: 'POST', body: form }); + if (!resp.ok) throw await toApiError(resp); + + const { job_id } = await resp.json(); + return job_id; +} +``` + +--- + +### 4.2 Handling 503 Model Not Ready + +When the model isn't loaded, `POST /jobs` returns: + +``` +HTTP/1.1 503 Service Unavailable +Retry-After: 30 +Content-Type: application/json +``` +```json +{ + "error": "model_not_ready", + "state": "unloaded", + "retry_after_secs": 30 +} +``` + +**`retry_after_secs` by state:** + +| `state` | `retry_after_secs` | Why | +|---------|-------------------|-----| +| `unloaded` | 30 | Load just triggered; RTX 2080 + large-v3 loads in ~15–25s | +| `loading` | 10 | Already loading; check again soon | +| `waiting_for_gpu` | `GPU_POLL_INTERVAL_SECS` (default 30) | VRAM busy; retry later | + +> **Submitting a job when the model is `unloaded` automatically triggers a load.** You do NOT need a separate `POST /model/load` call for the normal happy path. + +--- + +### 4.3 Retry pattern with auto-load + +```typescript +async function submitWithRetry( + audio: Blob, + opts: { language?: string; task?: 'transcribe' | 'translate'; webhookUrl?: string } = {}, + maxAttempts = 20, +): Promise { + const form = new FormData(); + form.append('audio', audio, 'audio.wav'); + if (opts.language) form.append('language', opts.language); + if (opts.task) form.append('task', opts.task); + if (opts.webhookUrl) form.append('webhook_url', opts.webhookUrl); + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + const resp = await fetch(`${BASE}/jobs`, { method: 'POST', body: form }); + + if (resp.status === 202) { + const { job_id } = await resp.json(); + return job_id; + } + + if (resp.status === 503) { + const body = await resp.json(); + const waitMs = (parseInt(resp.headers.get('Retry-After') ?? '15') + 1) * 1000; + console.log(`Model ${body.state} — waiting ${waitMs / 1000}s (attempt ${attempt}/${maxAttempts})`); + await sleep(waitMs); + continue; + } + + throw await toApiError(resp); + } + + throw new Error(`Model did not become ready after ${maxAttempts} attempts`); +} +``` + +> **Tip:** For a better UX, subscribe to `GET /model/events` and wait for the `model_ready` event instead of sleeping blindly — then submit immediately when ready. + +--- + +## 5. Tracking Job Progress + +Two patterns: **SSE** (real-time push) or **polling** (simpler). SSE is preferred for UX. + +### 5.1 `GET /jobs/:id` (poll) + +Returns the full job document. Poll every 2–5 seconds while `status` is `queued` or `running`. + +```json +{ + "id": "550e8400-e29b-41d4-a716-446655440000", + "status": "running", + "task": "transcribe", + "language": "en", + "progress": 42, + "duration_secs": 120.5, + "segments": [], + "created_at": "2026-05-10T14:00:00.000Z" +} +``` + +When `status === "done"`: +```json +{ + "id": "550e8400-e29b-41d4-a716-446655440000", + "status": "done", + "task": "transcribe", + "language": "en", + "progress": 100, + "duration_secs": 120.5, + "segments": [ + { "index": 0, "start": 0.0, "end": 3.5, "text": "Hello, world.", "words": [] }, + { "index": 1, "start": 3.6, "end": 7.2, "text": "How are you?", "words": [] } + ], + "created_at": "2026-05-10T14:00:00.000Z", + "completed_at": "2026-05-10T14:02:35.000Z" +} +``` + +**Terminal statuses:** `done`, `failed`, `cancelled` — stop polling when you see one. + +--- + +### 5.2 `GET /jobs/:id/stream` (SSE) + +Subscribe immediately after submission. The connection is held open and events are pushed as they occur. + +**Event types:** + +``` +event: progress +data: {"type":"progress","percent":42,"chunk":3,"chunks_total":7} + +event: done +data: {"type":"done","job":{...full Job object...}} + +event: error +data: {"type":"error","message":"whisper inference failed: ..."} +``` + +- `percent` — overall job progress 0–100 (derived from chunks completed / total). +- `chunk` / `chunks_total` — the audio is split on silences; each chunk is one whisper inference call. +- If you open the stream after the job is already finished, you immediately receive a single `done` event. + +```typescript +function streamJobProgress( + jobId: string, + onProgress: (percent: number, chunk: number, total: number) => void, + onDone: (job: Job) => void, + onError: (message: string) => void, +): () => void { + const es = new EventSource(`${BASE}/jobs/${jobId}/stream`); + + es.addEventListener('progress', (e) => { + const { percent, chunk, chunks_total } = JSON.parse(e.data); + onProgress(percent, chunk, chunks_total); + }); + + es.addEventListener('done', (e) => { + const { job } = JSON.parse(e.data); + es.close(); + onDone(job); + }); + + es.addEventListener('error', (e) => { + // SSE protocol error vs application error — check if data exists + if ('data' in e) { + const { message } = JSON.parse((e as MessageEvent).data); + onError(message); + } + es.close(); + }); + + return () => es.close(); +} +``` + +> **Note:** Do not confuse the SSE `error` event (connection drop — no `data`) with the application `error` event (transcription failure — has `data`). The example above handles both. + +--- + +## 6. Webhooks + +Webhooks are fired as HTTP `POST` requests with `Content-Type: application/json` to the `webhook_url` you supply at job submission. The server retries up to 3 times with exponential backoff (1s, 2s) on non-2xx responses. + +### 6.1 Job completion webhook + +Fired when a job reaches `done`, `failed`, or `cancelled`. +**Payload:** the full `Job` object (same as `GET /jobs/:id`). + +```json +{ + "id": "550e8400-e29b-41d4-a716-446655440000", + "status": "done", + "task": "transcribe", + "language": "en", + "progress": 100, + "duration_secs": 120.5, + "segments": [ + { "index": 0, "start": 0.0, "end": 3.5, "text": "Hello, world.", "words": [] } + ], + "created_at": "2026-05-10T14:00:00.000Z", + "completed_at": "2026-05-10T14:02:35.000Z" +} +``` + +### 6.2 Model lifecycle webhooks + +**Any URL that has ever appeared as a `webhook_url` in a job submission** also receives model lifecycle webhooks for the lifetime of the server process. This lets your backend know when the model comes up or goes down without polling. + +Only two events are delivered via webhook (the others are SSE-only): + +**Model ready:** +```json +{ "type": "model_ready", "loaded_at": "2026-05-10T14:00:00.000Z" } +``` + +**Model unloaded:** +```json +{ "type": "model_unloaded" } +``` + +**Express.js receiver example:** +```typescript +import express from 'express'; +const app = express(); +app.use(express.json()); + +app.post('/webhooks/whisper', (req, res) => { + res.sendStatus(200); // acknowledge quickly — retries on non-2xx + + const body = req.body; + + if ('type' in body) { + // Model lifecycle event + if (body.type === 'model_ready') { + console.log('Whisper model ready at', body.loaded_at); + } else if (body.type === 'model_unloaded') { + console.log('Whisper model freed GPU memory'); + } + return; + } + + // Job completion event — body is a Job object + if (body.status === 'done') { + console.log(`Job ${body.id} done — ${body.segments.length} segments`); + processTranscript(body.segments); + } else if (body.status === 'failed') { + console.error(`Job ${body.id} failed:`, body.error); + } +}); +``` + +> **Distinguish job vs. model webhook:** Job payloads have an `id` and `status` field. Model payloads have a `type` field at the top level (`model_ready` / `model_unloaded`). + +--- + +## 7. Health Check + +```bash +curl http://your-server:8080/health +``` + +```json +{ + "status": "ok", + "gpu_name": "NVIDIA GeForce RTX 2080", + "vram_total_mb": 8192, + "model": "large-v3", + "queue_depth": 2, + "model_state": "ready" +} +``` + +| Field | Notes | +|-------|-------| +| `status` | Always `"ok"` when the server is reachable | +| `gpu_name` | From `nvidia-smi`; `null` if unavailable | +| `vram_total_mb` | Total VRAM in MiB; `null` if unavailable | +| `model` | Model name string (server config) | +| `queue_depth` | Jobs waiting (not counting the currently running one) | +| `model_state` | `"unloaded"` / `"loading"` / `"waiting_for_gpu"` / `"ready"` | + +--- + +## 8. Cancelling Jobs + +```bash +curl -X DELETE http://your-server:8080/jobs/550e8400-e29b-41d4-a716-446655440000 +``` + +- `200` — job marked `cancelled`. Returns the updated `Job` object. +- `404` — job not found. +- `409` — job already in a terminal state (`done` / `failed` / `cancelled`). + +> **Important:** whisper.cpp does not support mid-inference cancellation. If the job is currently `running`, the GPU inference will finish before the cancellation takes effect — the result is simply discarded and the status set to `cancelled`. + +--- + +## 9. TypeScript Types + +```typescript +type ModelStateTag = 'unloaded' | 'loading' | 'waiting_for_gpu' | 'ready'; +type JobStatus = 'queued' | 'running' | 'done' | 'failed' | 'cancelled'; +type Task = 'transcribe' | 'translate'; + +interface ModelStatus { + state: ModelStateTag; + // ready only + loaded_at?: string; + // waiting_for_gpu only + vram_needed_mb?: number; + vram_free_mb?: number; + retry_in_secs?: number; + // always (when nvidia-smi available) + vram_used_mb?: number; + vram_total_mb?: number; +} + +interface Word { + text: string; + start: number; // seconds + end: number; // seconds + probability: number; // 0–1 +} + +interface Segment { + index: number; + start: number; // seconds + end: number; // seconds + text: string; + words: Word[]; +} + +interface Job { + id: string; + status: JobStatus; + task: Task; + language?: string; // ISO 639-1; null until detected/set + progress: number; // 0–100 + duration_secs?: number; // null until processing starts + segments: Segment[]; // populated when status = 'done' + error?: string; // populated when status = 'failed' + webhook_url?: string; + filename?: string; + created_at: string; // ISO 8601 + completed_at?: string; // ISO 8601; null until terminal +} + +// SSE payloads from GET /jobs/:id/stream +type JobSseEvent = + | { type: 'progress'; percent: number; chunk: number; chunks_total: number } + | { type: 'done'; job: Job } + | { type: 'error'; message: string }; + +// SSE payloads from GET /model/events +type ModelSseEvent = + | { type: 'model_loading' } + | { type: 'model_ready'; loaded_at: string } + | { type: 'model_unloaded' } + | { type: 'model_waiting_for_gpu'; vram_needed_mb: number; vram_free_mb: number; retry_in_secs: number }; + +// Webhook payload — union of job completion and model lifecycle events +type WebhookPayload = Job | { type: 'model_ready'; loaded_at: string } | { type: 'model_unloaded' }; + +// Helpers +function isJobPayload(p: WebhookPayload): p is Job { + return 'id' in p && 'status' in p; +} +function isModelPayload(p: WebhookPayload): p is { type: string } { + return 'type' in p; +} +``` + +--- + +## 10. React Hooks + +```typescript +// useModelStatus.ts +import { useEffect, useState } from 'react'; + +const BASE = process.env.NEXT_PUBLIC_WHISPER_BASE_URL ?? ''; + +export function useModelStatus() { + const [status, setStatus] = useState(null); + + // Initial fetch + useEffect(() => { + fetch(`${BASE}/model/status`) + .then(r => r.json()) + .then(setStatus) + .catch(console.error); + }, []); + + // Live updates via SSE + useEffect(() => { + const es = new EventSource(`${BASE}/model/events`); + + const refresh = () => { + fetch(`${BASE}/model/status`) + .then(r => r.json()) + .then(setStatus) + .catch(console.error); + }; + + es.addEventListener('model_loading', refresh); + es.addEventListener('model_ready', refresh); + es.addEventListener('model_unloaded', refresh); + es.addEventListener('model_waiting_for_gpu',refresh); + es.onerror = () => console.warn('model/events reconnecting…'); + + return () => es.close(); + }, []); + + return status; +} +``` + +```typescript +// useJobStream.ts +import { useEffect, useRef, useState } from 'react'; + +type ProgressState = { + percent: number; + chunk: number; + chunks_total: number; +}; + +export function useJobStream(jobId: string | null) { + const [progress, setProgress] = useState(null); + const [job, setJob] = useState(null); + const [error, setError] = useState(null); + const esRef = useRef(null); + + useEffect(() => { + if (!jobId) return; + + esRef.current?.close(); + setProgress(null); setJob(null); setError(null); + + const es = new EventSource(`${BASE}/jobs/${jobId}/stream`); + esRef.current = es; + + es.addEventListener('progress', (e) => { + setProgress(JSON.parse(e.data)); + }); + + es.addEventListener('done', (e) => { + setJob(JSON.parse(e.data).job); + setProgress({ percent: 100, chunk: 0, chunks_total: 0 }); + es.close(); + }); + + es.addEventListener('error', (e) => { + if ('data' in e) setError(JSON.parse((e as MessageEvent).data).message); + es.close(); + }); + + return () => es.close(); + }, [jobId]); + + return { progress, job, error }; +} +``` + +```typescript +// useTranscribe.ts — ties it all together +import { useState, useCallback } from 'react'; + +export function useTranscribe() { + const [jobId, setJobId] = useState(null); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + + const submit = useCallback(async ( + audio: Blob, + opts: { language?: string; task?: Task } = {} + ) => { + setLoading(true); + setError(null); + setJobId(null); + + try { + const id = await submitWithRetry(audio, opts); // see §4.3 + setJobId(id); + } catch (e) { + setError(String(e)); + } finally { + setLoading(false); + } + }, []); + + const { progress, job, error: streamError } = useJobStream(jobId); + + return { submit, loading, jobId, progress, job, error: error ?? streamError }; +} +``` + +--- + +## 11. Complete Integration Example + +A full transcription flow with model warm-up indicator and real-time progress: + +```typescript +// whisperClient.ts +const BASE = process.env.NEXT_PUBLIC_WHISPER_BASE_URL ?? ''; + +export class WhisperClient { + /** Wait for the model to be ready, triggering a load if needed. */ + async ensureModelReady(timeoutMs = 120_000): Promise { + const status = await this.getModelStatus(); + if (status.state === 'ready') return; + + // Trigger load (idempotent) + await fetch(`${BASE}/model/load`, { method: 'POST' }); + + return new Promise((resolve, reject) => { + const deadline = setTimeout(() => { + es.close(); + reject(new Error('Model did not become ready within timeout')); + }, timeoutMs); + + const es = new EventSource(`${BASE}/model/events`); + es.addEventListener('model_ready', () => { + clearTimeout(deadline); + es.close(); + resolve(); + }); + es.onerror = () => { + // Reconnects automatically; don't reject on transient drops. + }; + }); + } + + async getModelStatus(): Promise { + const r = await fetch(`${BASE}/model/status`); + if (!r.ok) throw new Error(`/model/status ${r.status}`); + return r.json(); + } + + async submit( + audio: Blob, + opts: { language?: string; task?: Task; webhookUrl?: string } = {} + ): Promise { + return submitWithRetry(audio, opts); + } + + streamProgress( + jobId: string, + callbacks: { + onProgress?: (p: { percent: number; chunk: number; total: number }) => void; + onDone?: (job: Job) => void; + onError?: (msg: string) => void; + } + ): () => void { + const es = new EventSource(`${BASE}/jobs/${jobId}/stream`); + + es.addEventListener('progress', (e) => { + const d = JSON.parse(e.data); + callbacks.onProgress?.({ percent: d.percent, chunk: d.chunk, total: d.chunks_total }); + }); + + es.addEventListener('done', (e) => { + callbacks.onDone?.(JSON.parse(e.data).job); + es.close(); + }); + + es.addEventListener('error', (e) => { + if ('data' in e) callbacks.onError?.(JSON.parse((e as MessageEvent).data).message); + es.close(); + }); + + return () => es.close(); + } + + async transcribe( + audio: Blob, + opts: { + language?: string; + task?: Task; + webhookUrl?: string; + onProgress?: (percent: number) => void; + } = {} + ): Promise { + const jobId = await this.submit(audio, opts); + + return new Promise((resolve, reject) => { + this.streamProgress(jobId, { + onProgress: (p) => opts.onProgress?.(p.percent), + onDone: resolve, + onError: (msg) => reject(new Error(msg)), + }); + }); + } +} + +// Usage +const whisper = new WhisperClient(); + +const job = await whisper.transcribe(audioBlob, { + language: 'en', + onProgress: (pct) => console.log(`${pct}%`), +}); + +for (const seg of job.segments) { + console.log(`[${seg.start.toFixed(1)}s → ${seg.end.toFixed(1)}s] ${seg.text}`); +} +``` + +--- + +## 12. Error Reference + +All error responses follow this shape: + +```json +{ "error": "human-readable message" } +``` + +With the following additions for specific errors: + +**503 model_not_ready:** +```json +{ "error": "model_not_ready", "state": "loading", "retry_after_secs": 10 } +``` + +| HTTP | `error` value | When | What to do | +|------|--------------|------|-----------| +| 400 | `"missing 'audio' field"` | `audio` not in form | Fix the form | +| 400 | `"audio field is empty"` | Zero-byte file uploaded | Fix the file | +| 400 | `"task must be 'transcribe' or 'translate'"` | Bad `task` value | Fix the value | +| 400 | `"multipart error: …"` | Malformed request | Check content-type header | +| 404 | `"job … not found"` | Unknown job ID | Check the ID | +| 409 | `"job … is already in terminal state …"` | Cancelling a finished job | No action needed | +| 503 | `"model_not_ready"` | Model not loaded | See §4.2 — retry with `Retry-After` | +| 500 | `"worker channel closed"` | Server crash | Contact server admin | + +**Network / SSE errors:** + +- `EventSource` `onerror` with no `.data` = connection dropped. The browser reconnects automatically — no action needed unless you want to show a UI indicator. +- HTTP 502/503/504 from a reverse proxy = the container is restarting. Wait and retry. + +--- + +*Last updated: 2026-05-08. Corresponds to whisper-server v0.1.0 commit `d014826`.*