feat: model-on-demand lifecycle — retry on 503, live status pill, warming indicator

- whisper.ts: add getModelStatus(); fix submitJob() to retry on 503 using
  Retry-After header instead of throwing; optional onModelWaiting callback
  lets the pipeline surface model state to the UI during the wait
- pipeline.ts: pass onModelWaiting callback → emits model_warming SSE event
  so the job detail page can show 'Warming up model…' while waiting
- types.ts: add ModelStateTag union and ModelStatus interface
- api/model/status: GET route proxies whisper /model/status (falls back to
  {state:'unloaded'} if whisper unreachable)
- api/model/events: GET route relays whisper SSE stream to the browser;
  AbortController tied to request.signal cleans up on disconnect
- layout.svelte: status pill is now live — initial fetch + EventSource on
  /api/model/events; dot colour + label reflect real model state with a
  pulsing animation while loading or waiting_for_gpu
- jobs/[id]/+page.svelte: handle model_warming event type → show a yellow
  'Warming up model…' sub-label with spinner inside the progress card
- whisper.test.ts: update submitJob mocks to status:202 to match real API

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Giancarmine Salucci
2026-05-09 00:08:21 +02:00
parent ffd5d48c0d
commit b90d57984c
8 changed files with 201 additions and 19 deletions

View File

@@ -1,17 +1,34 @@
import { execFile } from 'child_process';
import { promisify } from 'util';
const execFileAsync = promisify(execFile);
import type { ModelStatus } from '$lib/types.js';
function whisperUrl() {
return process.env.WHISPER_URL ?? 'http://localhost:8080';
}
/** Submit an audio file to whisper-rtx2080. Returns the whisper job id. */
const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
/** Get the current model state from whisper-rtx2080. */
export async function getModelStatus(): Promise<ModelStatus> {
const { default: fetch } = await import('node-fetch');
const res = await fetch(`${whisperUrl()}/model/status`, {
signal: AbortSignal.timeout(5000)
});
if (!res.ok) throw new Error(`/model/status returned ${res.status}`);
return res.json() as Promise<ModelStatus>;
}
/**
* Submit an audio file to whisper-rtx2080. Returns the whisper job id.
*
* Handles 503 (model not ready) transparently: retries using the
* `Retry-After` header until the model loads or maxAttempts is exhausted.
* Calls `onModelWaiting` on each 503 so the caller can surface the wait to the user.
*/
export async function submitJob(
wavPath: string,
webhookUrl: string,
language?: string
language?: string,
onModelWaiting?: (state: string, retryAfterSecs: number) => void,
maxAttempts = 20
): Promise<string> {
const FormData = (await import('form-data')).default;
const { createReadStream } = await import('fs');
@@ -23,19 +40,35 @@ export async function submitJob(
form.append('webhook_url', webhookUrl);
if (language) form.append('language', language);
const res = await fetch(`${whisperUrl()}/jobs`, {
method: 'POST',
body: form,
headers: form.getHeaders()
});
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
const res = await fetch(`${whisperUrl()}/jobs`, {
method: 'POST',
body: form,
headers: form.getHeaders()
});
if (res.status === 202) {
const json = (await res.json()) as { job_id: string };
return json.job_id;
}
if (res.status === 503) {
const body = (await res.json().catch(() => ({}))) as {
state?: string;
retry_after_secs?: number;
};
const state = body.state ?? 'unloaded';
const waitSecs = body.retry_after_secs ?? parseInt(res.headers.get('Retry-After') ?? '15');
onModelWaiting?.(state, waitSecs);
await sleep((waitSecs + 1) * 1000);
continue;
}
if (!res.ok) {
const text = await res.text();
throw new Error(`whisper /jobs returned ${res.status}: ${text}`);
}
const json = (await res.json()) as { job_id: string };
return json.job_id;
throw new Error(`Whisper model did not become ready after ${maxAttempts} attempts`);
}
/** Open an SSE stream from whisper and call onProgress/onDone callbacks. */