From b90d57984c0936721b1baad8890ce55268481d22 Mon Sep 17 00:00:00 2001 From: Giancarmine Salucci Date: Sat, 9 May 2026 00:08:21 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20model-on-demand=20lifecycle=20=E2=80=94?= =?UTF-8?q?=20retry=20on=20503,=20live=20status=20pill,=20warming=20indica?= =?UTF-8?q?tor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - whisper.ts: add getModelStatus(); fix submitJob() to retry on 503 using Retry-After header instead of throwing; optional onModelWaiting callback lets the pipeline surface model state to the UI during the wait - pipeline.ts: pass onModelWaiting callback → emits model_warming SSE event so the job detail page can show 'Warming up model…' while waiting - types.ts: add ModelStateTag union and ModelStatus interface - api/model/status: GET route proxies whisper /model/status (falls back to {state:'unloaded'} if whisper unreachable) - api/model/events: GET route relays whisper SSE stream to the browser; AbortController tied to request.signal cleans up on disconnect - layout.svelte: status pill is now live — initial fetch + EventSource on /api/model/events; dot colour + label reflect real model state with a pulsing animation while loading or waiting_for_gpu - jobs/[id]/+page.svelte: handle model_warming event type → show a yellow 'Warming up model…' sub-label with spinner inside the progress card - whisper.test.ts: update submitJob mocks to status:202 to match real API Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/lib/server/pipeline.ts | 4 +- src/lib/server/whisper.ts | 61 ++++++++++++++++++++------ src/lib/types.ts | 12 +++++ src/routes/+layout.svelte | 57 ++++++++++++++++++++++-- src/routes/api/model/events/+server.ts | 43 ++++++++++++++++++ src/routes/api/model/status/+server.ts | 14 ++++++ src/routes/jobs/[id]/+page.svelte | 23 ++++++++++ src/tests/whisper.test.ts | 6 +++ 8 files changed, 201 insertions(+), 19 deletions(-) create mode 100644 src/routes/api/model/events/+server.ts create mode 100644 src/routes/api/model/status/+server.ts diff --git a/src/lib/server/pipeline.ts b/src/lib/server/pipeline.ts index 6e3581e..cdd724a 100644 --- a/src/lib/server/pipeline.ts +++ b/src/lib/server/pipeline.ts @@ -129,7 +129,9 @@ async function runJob( emitProgress(jobId, { type: 'status', status: 'transcribing' }); const webhookUrl = `${WEBHOOK_BASE_URL}/api/webhook/${jobId}`; - const whisperJobId = await submitJob(wavPath, webhookUrl, language); + const whisperJobId = await submitJob(wavPath, webhookUrl, language, (state, retryAfterSecs) => { + emitProgress(jobId, { type: 'model_warming', state, retryAfterSecs }); + }); updateJob({ id: jobId, whisperJobId }); // ── 5. Open SSE for live progress (non-blocking relay) ─────────────── diff --git a/src/lib/server/whisper.ts b/src/lib/server/whisper.ts index 0e107fc..8591176 100644 --- a/src/lib/server/whisper.ts +++ b/src/lib/server/whisper.ts @@ -1,17 +1,34 @@ -import { execFile } from 'child_process'; -import { promisify } from 'util'; - -const execFileAsync = promisify(execFile); +import type { ModelStatus } from '$lib/types.js'; function whisperUrl() { return process.env.WHISPER_URL ?? 'http://localhost:8080'; } -/** Submit an audio file to whisper-rtx2080. Returns the whisper job id. */ +const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms)); + +/** Get the current model state from whisper-rtx2080. */ +export async function getModelStatus(): Promise { + const { default: fetch } = await import('node-fetch'); + const res = await fetch(`${whisperUrl()}/model/status`, { + signal: AbortSignal.timeout(5000) + }); + if (!res.ok) throw new Error(`/model/status returned ${res.status}`); + return res.json() as Promise; +} + +/** + * Submit an audio file to whisper-rtx2080. Returns the whisper job id. + * + * Handles 503 (model not ready) transparently: retries using the + * `Retry-After` header until the model loads or maxAttempts is exhausted. + * Calls `onModelWaiting` on each 503 so the caller can surface the wait to the user. + */ export async function submitJob( wavPath: string, webhookUrl: string, - language?: string + language?: string, + onModelWaiting?: (state: string, retryAfterSecs: number) => void, + maxAttempts = 20 ): Promise { const FormData = (await import('form-data')).default; const { createReadStream } = await import('fs'); @@ -23,19 +40,35 @@ export async function submitJob( form.append('webhook_url', webhookUrl); if (language) form.append('language', language); - const res = await fetch(`${whisperUrl()}/jobs`, { - method: 'POST', - body: form, - headers: form.getHeaders() - }); + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + const res = await fetch(`${whisperUrl()}/jobs`, { + method: 'POST', + body: form, + headers: form.getHeaders() + }); + + if (res.status === 202) { + const json = (await res.json()) as { job_id: string }; + return json.job_id; + } + + if (res.status === 503) { + const body = (await res.json().catch(() => ({}))) as { + state?: string; + retry_after_secs?: number; + }; + const state = body.state ?? 'unloaded'; + const waitSecs = body.retry_after_secs ?? parseInt(res.headers.get('Retry-After') ?? '15'); + onModelWaiting?.(state, waitSecs); + await sleep((waitSecs + 1) * 1000); + continue; + } - if (!res.ok) { const text = await res.text(); throw new Error(`whisper /jobs returned ${res.status}: ${text}`); } - const json = (await res.json()) as { job_id: string }; - return json.job_id; + throw new Error(`Whisper model did not become ready after ${maxAttempts} attempts`); } /** Open an SSE stream from whisper and call onProgress/onDone callbacks. */ diff --git a/src/lib/types.ts b/src/lib/types.ts index fe80083..a646182 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -1,5 +1,17 @@ export type AudioMode = 'auto' | 'standard' | 'aggressive' | 'none'; +export type ModelStateTag = 'unloaded' | 'loading' | 'waiting_for_gpu' | 'ready'; + +export interface ModelStatus { + state: ModelStateTag; + loaded_at?: string; + vram_needed_mb?: number; + vram_free_mb?: number; + retry_in_secs?: number; + vram_used_mb?: number; + vram_total_mb?: number; +} + export type JobStatus = 'pending' | 'downloading' | 'preparing' | 'transcribing' | 'processing' | 'done' | 'failed' | 'cancelled'; export interface Segment { diff --git a/src/routes/+layout.svelte b/src/routes/+layout.svelte index f8e0682..27283ef 100644 --- a/src/routes/+layout.svelte +++ b/src/routes/+layout.svelte @@ -1,9 +1,10 @@