From bcaf8680dba9552879075eea886834aedc6e8803 Mon Sep 17 00:00:00 2001
From: mozempk <moze@sal.giize.com>
Date: Fri, 8 May 2026 23:47:13 +0200
Subject: [PATCH] docs: add FRONTEND_INTEGRATION.md developer guide

Comprehensive integration guide for frontend/full-stack developers:

- Architecture overview diagram
- Quick start (submit + poll in ~20 lines)
- Model lifecycle: state machine diagram, all 4 /model/* endpoints,
  SSE event subscription with JS examples
- Job submission: multipart fields, 503 model_not_ready handling,
  retry-with-auto-load pattern
- Job progress: polling vs SSE, all event types with payloads
- Webhooks: job completion + model lifecycle, Express receiver example,
  how to distinguish job vs model payloads
- Health check field reference
- Cancellation semantics (GPU inference not interruptible)
- Full TypeScript type definitions for all API shapes
- React hooks: useModelStatus, useJobStream, useTranscribe
- Complete WhisperClient class example with ensureModelReady,
  streamProgress, and end-to-end transcribe()
- Error reference table with all 400/404/409/503/500 shapes

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 docs/FRONTEND_INTEGRATION.md | 941 +++++++++++++++++++++++++++++++++++
 1 file changed, 941 insertions(+)
 create mode 100644 docs/FRONTEND_INTEGRATION.md

diff --git a/docs/FRONTEND_INTEGRATION.md b/docs/FRONTEND_INTEGRATION.md
new file mode 100644
index 0000000..691ee37
--- /dev/null
+++ b/docs/FRONTEND_INTEGRATION.md
@@ -0,0 +1,941 @@
+# Frontend Integration Guide
+
+> **Audience:** Frontend / full-stack developers integrating the whisper transcription API into a web application.  
+> **Base URL:** `http://your-server:8080` (configurable via the `PORT` env var on the server).  
+> **Interactive docs:** `http://your-server:8080/docs` (Swagger UI — try every endpoint live).
+
+---
+
+## Table of Contents
+
+1. [Architecture Overview](#1-architecture-overview)
+2. [Quick Start — submit and poll](#2-quick-start--submit-and-poll)
+3. [Model Lifecycle](#3-model-lifecycle)
+   - 3.1 [State machine](#31-state-machine)
+   - 3.2 [GET /model/status](#32-get-modelstatus)
+   - 3.3 [POST /model/load](#33-post-modelload)
+   - 3.4 [POST /model/unload](#34-post-modelunload)
+   - 3.5 [GET /model/events (SSE)](#35-get-modelevents-sse)
+4. [Submitting Jobs](#4-submitting-jobs)
+   - 4.1 [POST /jobs](#41-post-jobs)
+   - 4.2 [Handling 503 Model Not Ready](#42-handling-503-model-not-ready)
+   - 4.3 [Retry pattern with auto-load](#43-retry-pattern-with-auto-load)
+5. [Tracking Job Progress](#5-tracking-job-progress)
+   - 5.1 [GET /jobs/:id (poll)](#51-get-jobsid-poll)
+   - 5.2 [GET /jobs/:id/stream (SSE)](#52-get-jobsidstream-sse)
+6. [Webhooks](#6-webhooks)
+   - 6.1 [Job completion webhook](#61-job-completion-webhook)
+   - 6.2 [Model lifecycle webhooks](#62-model-lifecycle-webhooks)
+7. [Health Check](#7-health-check)
+8. [Cancelling Jobs](#8-cancelling-jobs)
+9. [TypeScript Types](#9-typescript-types)
+10. [React Hooks](#10-react-hooks)
+11. [Complete Integration Example](#11-complete-integration-example)
+12. [Error Reference](#12-error-reference)
+
+---
+
+## 1. Architecture Overview
+
+```
+┌─────────────────────────────────────────────────────────┐
+│  whisper-server                                         │
+│                                                         │
+│  HTTP / SSE          Worker thread (GPU)                │
+│  ────────────        ───────────────────                │
+│  POST /jobs    ───►  job queue (FIFO)                   │
+│  GET  /jobs/:id      ↕                                  │
+│  GET  /jobs/:id/stream ◄── progress broadcast           │
+│                                                         │
+│  POST /model/load  ─►  load whisper into VRAM           │
+│  POST /model/unload ►  free VRAM                        │
+│  GET  /model/status    read state                       │
+│  GET  /model/events ◄── lifecycle SSE broadcast         │
+└─────────────────────────────────────────────────────────┘
+```
+
+**Key behaviours to understand before building:**
+
+- The model starts **unloaded** on every server restart. No inference is possible until it loads (~15–25 seconds for large-v3 on an RTX 2080).
+- Submitting a job when the model is not ready returns `503` with a `Retry-After` header **and automatically triggers a load**. You can retry the submission; no separate load call is needed.
+- The worker processes jobs **sequentially** (one at a time). Queue depth is visible via `/health`.
+- Long audio is split into silence-bounded chunks internally. SSE `progress` events reflect chunk completion, not raw GPU progress.
+
+---
+
+## 2. Quick Start — submit and poll
+
+The simplest possible integration — no SSE, no model management, just submit and poll:
+
+```typescript
+const BASE = 'http://your-server:8080';
+
+async function transcribe(audioBlob: Blob): Promise<Job> {
+  // 1. Submit
+  const form = new FormData();
+  form.append('audio', audioBlob, 'audio.wav');
+
+  let submitResp = await fetch(`${BASE}/jobs`, { method: 'POST', body: form });
+
+  // 2. If model isn't loaded yet, keep retrying until it is
+  while (submitResp.status === 503) {
+    const retryAfter = parseInt(submitResp.headers.get('Retry-After') ?? '15');
+    await sleep(retryAfter * 1000);
+    submitResp = await fetch(`${BASE}/jobs`, { method: 'POST', body: form });
+  }
+  if (!submitResp.ok) throw new Error(`Submit failed: ${submitResp.status}`);
+
+  const { job_id } = await submitResp.json();
+
+  // 3. Poll until done
+  while (true) {
+    await sleep(2000);
+    const job: Job = await fetch(`${BASE}/jobs/${job_id}`).then(r => r.json());
+    if (job.status === 'done')      return job;
+    if (job.status === 'failed')    throw new Error(job.error ?? 'transcription failed');
+    if (job.status === 'cancelled') throw new Error('job was cancelled');
+  }
+}
+
+const sleep = (ms: number) => new Promise(r => setTimeout(r, ms));
+```
+
+> For a better UX — real-time progress bar, model state indicator — read the full sections below.
+
+---
+
+## 3. Model Lifecycle
+
+### 3.1 State machine
+
+The model moves through four states:
+
+```
+          job submit
+         or POST /model/load
+               │
+   ┌──────────▼───────────┐
+   │       Unloaded        │◄──────────────────────────┐
+   └──────────┬───────────┘                            │
+              │ load triggered                         │
+   ┌──────────▼───────────┐                            │
+   │        Loading        │                            │ idle timeout
+   └──┬──────────────┬────┘                            │ or POST /model/unload
+      │ success      │ VRAM full                       │
+      │              │                                  │
+   ┌──▼────┐  ┌──────▼────────────────┐                │
+   │ Ready │  │   WaitingForGpu       │────────────────►│
+   └──┬────┘  └──────────────┬────────┘                │
+      │         retry ok ────┘                         │
+      └────────────────────────────────────────────────►┘
+```
+
+| State | `state` value | Can accept jobs? |
+|-------|--------------|-----------------|
+| Unloaded | `"unloaded"` | ❌ → triggers load, returns 503 |
+| Loading | `"loading"` | ❌ → returns 503 |
+| Waiting for GPU | `"waiting_for_gpu"` | ❌ → returns 503 |
+| Ready | `"ready"` | ✅ |
+
+---
+
+### 3.2 `GET /model/status`
+
+Returns the current model state and live VRAM figures (from `nvidia-smi`).
+
+**Unloaded:**
+```json
+{ "state": "unloaded" }
+```
+
+**Loading:**
+```json
+{ "state": "loading" }
+```
+
+**Waiting for GPU (VRAM contention):**
+```json
+{
+  "state": "waiting_for_gpu",
+  "vram_needed_mb": 3951,
+  "vram_free_mb": 512,
+  "retry_in_secs": 30
+}
+```
+
+**Ready:**
+```json
+{
+  "state": "ready",
+  "loaded_at": "2026-05-10T14:00:00.000Z",
+  "vram_used_mb": 4096,
+  "vram_total_mb": 8192
+}
+```
+
+> `vram_used_mb` / `vram_total_mb` are omitted when `nvidia-smi` is unavailable.
+
+---
+
+### 3.3 `POST /model/load`
+
+Tells the server to load the model. **Idempotent** — safe to call multiple times.
+
+```bash
+curl -X POST http://your-server:8080/model/load
+```
+
+**Responses:**
+
+| Status | Body | Meaning |
+|--------|------|---------|
+| 202 | `{"status":"load_initiated"}` | Load queued |
+| 200 | `{"status":"already_ready"}` | Already loaded |
+
+The load happens asynchronously. Subscribe to `/model/events` or poll `/model/status` to know when ready.
+
+---
+
+### 3.4 `POST /model/unload`
+
+Immediately frees the model from GPU memory. In-flight jobs finish first; the model is dropped after the current inference completes.
+
+```bash
+curl -X POST http://your-server:8080/model/unload
+```
+
+**Response:** `200 {"status":"unload_requested"}` (always, regardless of current state).
+
+> Use this if you know transcription won't happen for a while and you want to free VRAM for other workloads on the same GPU.
+
+---
+
+### 3.5 `GET /model/events` (SSE)
+
+A persistent Server-Sent Events stream that emits every model lifecycle transition.
+
+```bash
+curl -N http://your-server:8080/model/events
+```
+
+**Events emitted:**
+
+```
+event: model_loading
+data: {"type":"model_loading"}
+
+event: model_ready
+data: {"type":"model_ready","loaded_at":"2026-05-10T14:00:00.000Z"}
+
+event: model_unloaded
+data: {"type":"model_unloaded"}
+
+event: model_waiting_for_gpu
+data: {"type":"model_waiting_for_gpu","vram_needed_mb":3951,"vram_free_mb":512,"retry_in_secs":30}
+```
+
+**JavaScript:**
+```typescript
+function subscribeModelEvents(
+  onReady:       (loadedAt: string) => void,
+  onUnloaded:    () => void,
+  onLoading:     () => void,
+  onWaitingGpu:  (info: { vram_needed_mb: number; vram_free_mb: number; retry_in_secs: number }) => void,
+): () => void {
+  const es = new EventSource(`${BASE}/model/events`);
+
+  es.addEventListener('model_ready',          (e) => onReady(JSON.parse(e.data).loaded_at));
+  es.addEventListener('model_unloaded',       ()  => onUnloaded());
+  es.addEventListener('model_loading',        ()  => onLoading());
+  es.addEventListener('model_waiting_for_gpu',(e) => onWaitingGpu(JSON.parse(e.data)));
+
+  es.onerror = () => {
+    // The browser reconnects automatically with exponential backoff.
+    // Log the error but don't tear down the listener.
+    console.warn('model/events connection dropped, reconnecting…');
+  };
+
+  return () => es.close(); // call this to clean up (e.g. in React useEffect return)
+}
+```
+
+> The server sends an SSE keepalive comment every 15 seconds so proxies don't close idle connections.
+
+---
+
+## 4. Submitting Jobs
+
+### 4.1 `POST /jobs`
+
+**Content-Type:** `multipart/form-data`
+
+| Field | Required | Type | Notes |
+|-------|----------|------|-------|
+| `audio` | ✅ | file | Any format ffmpeg understands: WAV, MP3, M4A, OGG, FLAC, MP4, MKV … No size limit. |
+| `language` | ❌ | string | ISO 639-1 code (`"en"`, `"it"`, `"fr"` …). Omit for auto-detection. |
+| `task` | ❌ | string | `"transcribe"` (default) or `"translate"` (→ English) |
+| `webhook_url` | ❌ | string | URL to POST the completed job to. Also registers the URL for model lifecycle webhooks. |
+
+**202 Accepted:**
+```json
+{ "job_id": "550e8400-e29b-41d4-a716-446655440000" }
+```
+
+```typescript
+async function submitJob(
+  audio: Blob,
+  opts: { language?: string; task?: 'transcribe' | 'translate'; webhookUrl?: string } = {}
+): Promise<string> {
+  const form = new FormData();
+  form.append('audio', audio, 'audio.wav');
+  if (opts.language)   form.append('language', opts.language);
+  if (opts.task)       form.append('task', opts.task);
+  if (opts.webhookUrl) form.append('webhook_url', opts.webhookUrl);
+
+  const resp = await fetch(`${BASE}/jobs`, { method: 'POST', body: form });
+  if (!resp.ok) throw await toApiError(resp);
+
+  const { job_id } = await resp.json();
+  return job_id;
+}
+```
+
+---
+
+### 4.2 Handling 503 Model Not Ready
+
+When the model isn't loaded, `POST /jobs` returns:
+
+```
+HTTP/1.1 503 Service Unavailable
+Retry-After: 30
+Content-Type: application/json
+```
+```json
+{
+  "error": "model_not_ready",
+  "state": "unloaded",
+  "retry_after_secs": 30
+}
+```
+
+**`retry_after_secs` by state:**
+
+| `state` | `retry_after_secs` | Why |
+|---------|-------------------|-----|
+| `unloaded` | 30 | Load just triggered; RTX 2080 + large-v3 loads in ~15–25s |
+| `loading` | 10 | Already loading; check again soon |
+| `waiting_for_gpu` | `GPU_POLL_INTERVAL_SECS` (default 30) | VRAM busy; retry later |
+
+> **Submitting a job when the model is `unloaded` automatically triggers a load.** You do NOT need a separate `POST /model/load` call for the normal happy path.
+
+---
+
+### 4.3 Retry pattern with auto-load
+
+```typescript
+async function submitWithRetry(
+  audio: Blob,
+  opts: { language?: string; task?: 'transcribe' | 'translate'; webhookUrl?: string } = {},
+  maxAttempts = 20,
+): Promise<string> {
+  const form = new FormData();
+  form.append('audio', audio, 'audio.wav');
+  if (opts.language)   form.append('language', opts.language);
+  if (opts.task)       form.append('task', opts.task);
+  if (opts.webhookUrl) form.append('webhook_url', opts.webhookUrl);
+
+  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+    const resp = await fetch(`${BASE}/jobs`, { method: 'POST', body: form });
+
+    if (resp.status === 202) {
+      const { job_id } = await resp.json();
+      return job_id;
+    }
+
+    if (resp.status === 503) {
+      const body = await resp.json();
+      const waitMs = (parseInt(resp.headers.get('Retry-After') ?? '15') + 1) * 1000;
+      console.log(`Model ${body.state} — waiting ${waitMs / 1000}s (attempt ${attempt}/${maxAttempts})`);
+      await sleep(waitMs);
+      continue;
+    }
+
+    throw await toApiError(resp);
+  }
+
+  throw new Error(`Model did not become ready after ${maxAttempts} attempts`);
+}
+```
+
+> **Tip:** For a better UX, subscribe to `GET /model/events` and wait for the `model_ready` event instead of sleeping blindly — then submit immediately when ready.
+
+---
+
+## 5. Tracking Job Progress
+
+Two patterns: **SSE** (real-time push) or **polling** (simpler). SSE is preferred for UX.
+
+### 5.1 `GET /jobs/:id` (poll)
+
+Returns the full job document. Poll every 2–5 seconds while `status` is `queued` or `running`.
+
+```json
+{
+  "id": "550e8400-e29b-41d4-a716-446655440000",
+  "status": "running",
+  "task": "transcribe",
+  "language": "en",
+  "progress": 42,
+  "duration_secs": 120.5,
+  "segments": [],
+  "created_at": "2026-05-10T14:00:00.000Z"
+}
+```
+
+When `status === "done"`:
+```json
+{
+  "id": "550e8400-e29b-41d4-a716-446655440000",
+  "status": "done",
+  "task": "transcribe",
+  "language": "en",
+  "progress": 100,
+  "duration_secs": 120.5,
+  "segments": [
+    { "index": 0, "start": 0.0, "end": 3.5, "text": "Hello, world.", "words": [] },
+    { "index": 1, "start": 3.6, "end": 7.2, "text": "How are you?", "words": [] }
+  ],
+  "created_at": "2026-05-10T14:00:00.000Z",
+  "completed_at": "2026-05-10T14:02:35.000Z"
+}
+```
+
+**Terminal statuses:** `done`, `failed`, `cancelled` — stop polling when you see one.
+
+---
+
+### 5.2 `GET /jobs/:id/stream` (SSE)
+
+Subscribe immediately after submission. The connection is held open and events are pushed as they occur.
+
+**Event types:**
+
+```
+event: progress
+data: {"type":"progress","percent":42,"chunk":3,"chunks_total":7}
+
+event: done
+data: {"type":"done","job":{...full Job object...}}
+
+event: error
+data: {"type":"error","message":"whisper inference failed: ..."}
+```
+
+- `percent` — overall job progress 0–100 (derived from chunks completed / total).
+- `chunk` / `chunks_total` — the audio is split on silences; each chunk is one whisper inference call.
+- If you open the stream after the job is already finished, you immediately receive a single `done` event.
+
+```typescript
+function streamJobProgress(
+  jobId: string,
+  onProgress: (percent: number, chunk: number, total: number) => void,
+  onDone:     (job: Job) => void,
+  onError:    (message: string) => void,
+): () => void {
+  const es = new EventSource(`${BASE}/jobs/${jobId}/stream`);
+
+  es.addEventListener('progress', (e) => {
+    const { percent, chunk, chunks_total } = JSON.parse(e.data);
+    onProgress(percent, chunk, chunks_total);
+  });
+
+  es.addEventListener('done', (e) => {
+    const { job } = JSON.parse(e.data);
+    es.close();
+    onDone(job);
+  });
+
+  es.addEventListener('error', (e) => {
+    // SSE protocol error vs application error — check if data exists
+    if ('data' in e) {
+      const { message } = JSON.parse((e as MessageEvent).data);
+      onError(message);
+    }
+    es.close();
+  });
+
+  return () => es.close();
+}
+```
+
+> **Note:** Do not confuse the SSE `error` event (connection drop — no `data`) with the application `error` event (transcription failure — has `data`). The example above handles both.
+
+---
+
+## 6. Webhooks
+
+Webhooks are fired as HTTP `POST` requests with `Content-Type: application/json` to the `webhook_url` you supply at job submission. The server retries up to 3 times with exponential backoff (1s, 2s) on non-2xx responses.
+
+### 6.1 Job completion webhook
+
+Fired when a job reaches `done`, `failed`, or `cancelled`.  
+**Payload:** the full `Job` object (same as `GET /jobs/:id`).
+
+```json
+{
+  "id": "550e8400-e29b-41d4-a716-446655440000",
+  "status": "done",
+  "task": "transcribe",
+  "language": "en",
+  "progress": 100,
+  "duration_secs": 120.5,
+  "segments": [
+    { "index": 0, "start": 0.0, "end": 3.5, "text": "Hello, world.", "words": [] }
+  ],
+  "created_at": "2026-05-10T14:00:00.000Z",
+  "completed_at": "2026-05-10T14:02:35.000Z"
+}
+```
+
+### 6.2 Model lifecycle webhooks
+
+**Any URL that has ever appeared as a `webhook_url` in a job submission** also receives model lifecycle webhooks for the lifetime of the server process. This lets your backend know when the model comes up or goes down without polling.
+
+Only two events are delivered via webhook (the others are SSE-only):
+
+**Model ready:**
+```json
+{ "type": "model_ready", "loaded_at": "2026-05-10T14:00:00.000Z" }
+```
+
+**Model unloaded:**
+```json
+{ "type": "model_unloaded" }
+```
+
+**Express.js receiver example:**
+```typescript
+import express from 'express';
+const app = express();
+app.use(express.json());
+
+app.post('/webhooks/whisper', (req, res) => {
+  res.sendStatus(200); // acknowledge quickly — retries on non-2xx
+
+  const body = req.body;
+
+  if ('type' in body) {
+    // Model lifecycle event
+    if (body.type === 'model_ready') {
+      console.log('Whisper model ready at', body.loaded_at);
+    } else if (body.type === 'model_unloaded') {
+      console.log('Whisper model freed GPU memory');
+    }
+    return;
+  }
+
+  // Job completion event — body is a Job object
+  if (body.status === 'done') {
+    console.log(`Job ${body.id} done — ${body.segments.length} segments`);
+    processTranscript(body.segments);
+  } else if (body.status === 'failed') {
+    console.error(`Job ${body.id} failed:`, body.error);
+  }
+});
+```
+
+> **Distinguish job vs. model webhook:** Job payloads have an `id` and `status` field. Model payloads have a `type` field at the top level (`model_ready` / `model_unloaded`).
+
+---
+
+## 7. Health Check
+
+```bash
+curl http://your-server:8080/health
+```
+
+```json
+{
+  "status": "ok",
+  "gpu_name": "NVIDIA GeForce RTX 2080",
+  "vram_total_mb": 8192,
+  "model": "large-v3",
+  "queue_depth": 2,
+  "model_state": "ready"
+}
+```
+
+| Field | Notes |
+|-------|-------|
+| `status` | Always `"ok"` when the server is reachable |
+| `gpu_name` | From `nvidia-smi`; `null` if unavailable |
+| `vram_total_mb` | Total VRAM in MiB; `null` if unavailable |
+| `model` | Model name string (server config) |
+| `queue_depth` | Jobs waiting (not counting the currently running one) |
+| `model_state` | `"unloaded"` / `"loading"` / `"waiting_for_gpu"` / `"ready"` |
+
+---
+
+## 8. Cancelling Jobs
+
+```bash
+curl -X DELETE http://your-server:8080/jobs/550e8400-e29b-41d4-a716-446655440000
+```
+
+- `200` — job marked `cancelled`. Returns the updated `Job` object.
+- `404` — job not found.
+- `409` — job already in a terminal state (`done` / `failed` / `cancelled`).
+
+> **Important:** whisper.cpp does not support mid-inference cancellation. If the job is currently `running`, the GPU inference will finish before the cancellation takes effect — the result is simply discarded and the status set to `cancelled`.
+
+---
+
+## 9. TypeScript Types
+
+```typescript
+type ModelStateTag = 'unloaded' | 'loading' | 'waiting_for_gpu' | 'ready';
+type JobStatus     = 'queued' | 'running' | 'done' | 'failed' | 'cancelled';
+type Task          = 'transcribe' | 'translate';
+
+interface ModelStatus {
+  state: ModelStateTag;
+  // ready only
+  loaded_at?: string;
+  // waiting_for_gpu only
+  vram_needed_mb?: number;
+  vram_free_mb?:   number;
+  retry_in_secs?:  number;
+  // always (when nvidia-smi available)
+  vram_used_mb?:   number;
+  vram_total_mb?:  number;
+}
+
+interface Word {
+  text:        string;
+  start:       number; // seconds
+  end:         number; // seconds
+  probability: number; // 0–1
+}
+
+interface Segment {
+  index: number;
+  start: number; // seconds
+  end:   number; // seconds
+  text:  string;
+  words: Word[];
+}
+
+interface Job {
+  id:            string;
+  status:        JobStatus;
+  task:          Task;
+  language?:     string;     // ISO 639-1; null until detected/set
+  progress:      number;     // 0–100
+  duration_secs?: number;   // null until processing starts
+  segments:      Segment[];  // populated when status = 'done'
+  error?:        string;     // populated when status = 'failed'
+  webhook_url?:  string;
+  filename?:     string;
+  created_at:    string;     // ISO 8601
+  completed_at?: string;     // ISO 8601; null until terminal
+}
+
+// SSE payloads from GET /jobs/:id/stream
+type JobSseEvent =
+  | { type: 'progress'; percent: number; chunk: number; chunks_total: number }
+  | { type: 'done';     job: Job }
+  | { type: 'error';    message: string };
+
+// SSE payloads from GET /model/events
+type ModelSseEvent =
+  | { type: 'model_loading' }
+  | { type: 'model_ready';           loaded_at: string }
+  | { type: 'model_unloaded' }
+  | { type: 'model_waiting_for_gpu'; vram_needed_mb: number; vram_free_mb: number; retry_in_secs: number };
+
+// Webhook payload — union of job completion and model lifecycle events
+type WebhookPayload = Job | { type: 'model_ready'; loaded_at: string } | { type: 'model_unloaded' };
+
+// Helpers
+function isJobPayload(p: WebhookPayload): p is Job {
+  return 'id' in p && 'status' in p;
+}
+function isModelPayload(p: WebhookPayload): p is { type: string } {
+  return 'type' in p;
+}
+```
+
+---
+
+## 10. React Hooks
+
+```typescript
+// useModelStatus.ts
+import { useEffect, useState } from 'react';
+
+const BASE = process.env.NEXT_PUBLIC_WHISPER_BASE_URL ?? '';
+
+export function useModelStatus() {
+  const [status, setStatus] = useState<ModelStatus | null>(null);
+
+  // Initial fetch
+  useEffect(() => {
+    fetch(`${BASE}/model/status`)
+      .then(r => r.json())
+      .then(setStatus)
+      .catch(console.error);
+  }, []);
+
+  // Live updates via SSE
+  useEffect(() => {
+    const es = new EventSource(`${BASE}/model/events`);
+
+    const refresh = () => {
+      fetch(`${BASE}/model/status`)
+        .then(r => r.json())
+        .then(setStatus)
+        .catch(console.error);
+    };
+
+    es.addEventListener('model_loading',        refresh);
+    es.addEventListener('model_ready',          refresh);
+    es.addEventListener('model_unloaded',       refresh);
+    es.addEventListener('model_waiting_for_gpu',refresh);
+    es.onerror = () => console.warn('model/events reconnecting…');
+
+    return () => es.close();
+  }, []);
+
+  return status;
+}
+```
+
+```typescript
+// useJobStream.ts
+import { useEffect, useRef, useState } from 'react';
+
+type ProgressState = {
+  percent: number;
+  chunk: number;
+  chunks_total: number;
+};
+
+export function useJobStream(jobId: string | null) {
+  const [progress, setProgress] = useState<ProgressState | null>(null);
+  const [job,      setJob]      = useState<Job | null>(null);
+  const [error,    setError]    = useState<string | null>(null);
+  const esRef = useRef<EventSource | null>(null);
+
+  useEffect(() => {
+    if (!jobId) return;
+
+    esRef.current?.close();
+    setProgress(null); setJob(null); setError(null);
+
+    const es = new EventSource(`${BASE}/jobs/${jobId}/stream`);
+    esRef.current = es;
+
+    es.addEventListener('progress', (e) => {
+      setProgress(JSON.parse(e.data));
+    });
+
+    es.addEventListener('done', (e) => {
+      setJob(JSON.parse(e.data).job);
+      setProgress({ percent: 100, chunk: 0, chunks_total: 0 });
+      es.close();
+    });
+
+    es.addEventListener('error', (e) => {
+      if ('data' in e) setError(JSON.parse((e as MessageEvent).data).message);
+      es.close();
+    });
+
+    return () => es.close();
+  }, [jobId]);
+
+  return { progress, job, error };
+}
+```
+
+```typescript
+// useTranscribe.ts — ties it all together
+import { useState, useCallback } from 'react';
+
+export function useTranscribe() {
+  const [jobId,  setJobId]  = useState<string | null>(null);
+  const [loading, setLoading] = useState(false);
+  const [error,  setError]  = useState<string | null>(null);
+
+  const submit = useCallback(async (
+    audio: Blob,
+    opts: { language?: string; task?: Task } = {}
+  ) => {
+    setLoading(true);
+    setError(null);
+    setJobId(null);
+
+    try {
+      const id = await submitWithRetry(audio, opts); // see §4.3
+      setJobId(id);
+    } catch (e) {
+      setError(String(e));
+    } finally {
+      setLoading(false);
+    }
+  }, []);
+
+  const { progress, job, error: streamError } = useJobStream(jobId);
+
+  return { submit, loading, jobId, progress, job, error: error ?? streamError };
+}
+```
+
+---
+
+## 11. Complete Integration Example
+
+A full transcription flow with model warm-up indicator and real-time progress:
+
+```typescript
+// whisperClient.ts
+const BASE = process.env.NEXT_PUBLIC_WHISPER_BASE_URL ?? '';
+
+export class WhisperClient {
+  /** Wait for the model to be ready, triggering a load if needed. */
+  async ensureModelReady(timeoutMs = 120_000): Promise<void> {
+    const status = await this.getModelStatus();
+    if (status.state === 'ready') return;
+
+    // Trigger load (idempotent)
+    await fetch(`${BASE}/model/load`, { method: 'POST' });
+
+    return new Promise((resolve, reject) => {
+      const deadline = setTimeout(() => {
+        es.close();
+        reject(new Error('Model did not become ready within timeout'));
+      }, timeoutMs);
+
+      const es = new EventSource(`${BASE}/model/events`);
+      es.addEventListener('model_ready', () => {
+        clearTimeout(deadline);
+        es.close();
+        resolve();
+      });
+      es.onerror = () => {
+        // Reconnects automatically; don't reject on transient drops.
+      };
+    });
+  }
+
+  async getModelStatus(): Promise<ModelStatus> {
+    const r = await fetch(`${BASE}/model/status`);
+    if (!r.ok) throw new Error(`/model/status ${r.status}`);
+    return r.json();
+  }
+
+  async submit(
+    audio: Blob,
+    opts: { language?: string; task?: Task; webhookUrl?: string } = {}
+  ): Promise<string> {
+    return submitWithRetry(audio, opts);
+  }
+
+  streamProgress(
+    jobId: string,
+    callbacks: {
+      onProgress?: (p: { percent: number; chunk: number; total: number }) => void;
+      onDone?:     (job: Job) => void;
+      onError?:    (msg: string) => void;
+    }
+  ): () => void {
+    const es = new EventSource(`${BASE}/jobs/${jobId}/stream`);
+
+    es.addEventListener('progress', (e) => {
+      const d = JSON.parse(e.data);
+      callbacks.onProgress?.({ percent: d.percent, chunk: d.chunk, total: d.chunks_total });
+    });
+
+    es.addEventListener('done', (e) => {
+      callbacks.onDone?.(JSON.parse(e.data).job);
+      es.close();
+    });
+
+    es.addEventListener('error', (e) => {
+      if ('data' in e) callbacks.onError?.(JSON.parse((e as MessageEvent).data).message);
+      es.close();
+    });
+
+    return () => es.close();
+  }
+
+  async transcribe(
+    audio: Blob,
+    opts: {
+      language?: string;
+      task?: Task;
+      webhookUrl?: string;
+      onProgress?: (percent: number) => void;
+    } = {}
+  ): Promise<Job> {
+    const jobId = await this.submit(audio, opts);
+
+    return new Promise((resolve, reject) => {
+      this.streamProgress(jobId, {
+        onProgress: (p) => opts.onProgress?.(p.percent),
+        onDone:     resolve,
+        onError:    (msg) => reject(new Error(msg)),
+      });
+    });
+  }
+}
+
+// Usage
+const whisper = new WhisperClient();
+
+const job = await whisper.transcribe(audioBlob, {
+  language: 'en',
+  onProgress: (pct) => console.log(`${pct}%`),
+});
+
+for (const seg of job.segments) {
+  console.log(`[${seg.start.toFixed(1)}s → ${seg.end.toFixed(1)}s] ${seg.text}`);
+}
+```
+
+---
+
+## 12. Error Reference
+
+All error responses follow this shape:
+
+```json
+{ "error": "human-readable message" }
+```
+
+With the following additions for specific errors:
+
+**503 model_not_ready:**
+```json
+{ "error": "model_not_ready", "state": "loading", "retry_after_secs": 10 }
+```
+
+| HTTP | `error` value | When | What to do |
+|------|--------------|------|-----------|
+| 400 | `"missing 'audio' field"` | `audio` not in form | Fix the form |
+| 400 | `"audio field is empty"` | Zero-byte file uploaded | Fix the file |
+| 400 | `"task must be 'transcribe' or 'translate'"` | Bad `task` value | Fix the value |
+| 400 | `"multipart error: …"` | Malformed request | Check content-type header |
+| 404 | `"job … not found"` | Unknown job ID | Check the ID |
+| 409 | `"job … is already in terminal state …"` | Cancelling a finished job | No action needed |
+| 503 | `"model_not_ready"` | Model not loaded | See §4.2 — retry with `Retry-After` |
+| 500 | `"worker channel closed"` | Server crash | Contact server admin |
+
+**Network / SSE errors:**
+
+- `EventSource` `onerror` with no `.data` = connection dropped. The browser reconnects automatically — no action needed unless you want to show a UI indicator.
+- HTTP 502/503/504 from a reverse proxy = the container is restarting. Wait and retry.
+
+---
+
+*Last updated: 2026-05-08. Corresponds to whisper-server v0.1.0 commit `d014826`.*