feat: model-on-demand lifecycle — retry on 503, live status pill, warming indicator
- whisper.ts: add getModelStatus(); fix submitJob() to retry on 503 using
Retry-After header instead of throwing; optional onModelWaiting callback
lets the pipeline surface model state to the UI during the wait
- pipeline.ts: pass onModelWaiting callback → emits model_warming SSE event
so the job detail page can show 'Warming up model…' while waiting
- types.ts: add ModelStateTag union and ModelStatus interface
- api/model/status: GET route proxies whisper /model/status (falls back to
{state:'unloaded'} if whisper unreachable)
- api/model/events: GET route relays whisper SSE stream to the browser;
AbortController tied to request.signal cleans up on disconnect
- layout.svelte: status pill is now live — initial fetch + EventSource on
/api/model/events; dot colour + label reflect real model state with a
pulsing animation while loading or waiting_for_gpu
- jobs/[id]/+page.svelte: handle model_warming event type → show a yellow
'Warming up model…' sub-label with spinner inside the progress card
- whisper.test.ts: update submitJob mocks to status:202 to match real API
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -129,7 +129,9 @@ async function runJob(
|
||||
emitProgress(jobId, { type: 'status', status: 'transcribing' });
|
||||
|
||||
const webhookUrl = `${WEBHOOK_BASE_URL}/api/webhook/${jobId}`;
|
||||
const whisperJobId = await submitJob(wavPath, webhookUrl, language);
|
||||
const whisperJobId = await submitJob(wavPath, webhookUrl, language, (state, retryAfterSecs) => {
|
||||
emitProgress(jobId, { type: 'model_warming', state, retryAfterSecs });
|
||||
});
|
||||
updateJob({ id: jobId, whisperJobId });
|
||||
|
||||
// ── 5. Open SSE for live progress (non-blocking relay) ───────────────
|
||||
|
||||
@@ -1,17 +1,34 @@
|
||||
import { execFile } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
import type { ModelStatus } from '$lib/types.js';
|
||||
|
||||
function whisperUrl() {
|
||||
return process.env.WHISPER_URL ?? 'http://localhost:8080';
|
||||
}
|
||||
|
||||
/** Submit an audio file to whisper-rtx2080. Returns the whisper job id. */
|
||||
const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
|
||||
|
||||
/** Get the current model state from whisper-rtx2080. */
|
||||
export async function getModelStatus(): Promise<ModelStatus> {
|
||||
const { default: fetch } = await import('node-fetch');
|
||||
const res = await fetch(`${whisperUrl()}/model/status`, {
|
||||
signal: AbortSignal.timeout(5000)
|
||||
});
|
||||
if (!res.ok) throw new Error(`/model/status returned ${res.status}`);
|
||||
return res.json() as Promise<ModelStatus>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Submit an audio file to whisper-rtx2080. Returns the whisper job id.
|
||||
*
|
||||
* Handles 503 (model not ready) transparently: retries using the
|
||||
* `Retry-After` header until the model loads or maxAttempts is exhausted.
|
||||
* Calls `onModelWaiting` on each 503 so the caller can surface the wait to the user.
|
||||
*/
|
||||
export async function submitJob(
|
||||
wavPath: string,
|
||||
webhookUrl: string,
|
||||
language?: string
|
||||
language?: string,
|
||||
onModelWaiting?: (state: string, retryAfterSecs: number) => void,
|
||||
maxAttempts = 20
|
||||
): Promise<string> {
|
||||
const FormData = (await import('form-data')).default;
|
||||
const { createReadStream } = await import('fs');
|
||||
@@ -23,19 +40,35 @@ export async function submitJob(
|
||||
form.append('webhook_url', webhookUrl);
|
||||
if (language) form.append('language', language);
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
||||
const res = await fetch(`${whisperUrl()}/jobs`, {
|
||||
method: 'POST',
|
||||
body: form,
|
||||
headers: form.getHeaders()
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
if (res.status === 202) {
|
||||
const json = (await res.json()) as { job_id: string };
|
||||
return json.job_id;
|
||||
}
|
||||
|
||||
if (res.status === 503) {
|
||||
const body = (await res.json().catch(() => ({}))) as {
|
||||
state?: string;
|
||||
retry_after_secs?: number;
|
||||
};
|
||||
const state = body.state ?? 'unloaded';
|
||||
const waitSecs = body.retry_after_secs ?? parseInt(res.headers.get('Retry-After') ?? '15');
|
||||
onModelWaiting?.(state, waitSecs);
|
||||
await sleep((waitSecs + 1) * 1000);
|
||||
continue;
|
||||
}
|
||||
|
||||
const text = await res.text();
|
||||
throw new Error(`whisper /jobs returned ${res.status}: ${text}`);
|
||||
}
|
||||
|
||||
const json = (await res.json()) as { job_id: string };
|
||||
return json.job_id;
|
||||
throw new Error(`Whisper model did not become ready after ${maxAttempts} attempts`);
|
||||
}
|
||||
|
||||
/** Open an SSE stream from whisper and call onProgress/onDone callbacks. */
|
||||
|
||||
@@ -1,5 +1,17 @@
|
||||
export type AudioMode = 'auto' | 'standard' | 'aggressive' | 'none';
|
||||
|
||||
export type ModelStateTag = 'unloaded' | 'loading' | 'waiting_for_gpu' | 'ready';
|
||||
|
||||
export interface ModelStatus {
|
||||
state: ModelStateTag;
|
||||
loaded_at?: string;
|
||||
vram_needed_mb?: number;
|
||||
vram_free_mb?: number;
|
||||
retry_in_secs?: number;
|
||||
vram_used_mb?: number;
|
||||
vram_total_mb?: number;
|
||||
}
|
||||
|
||||
export type JobStatus = 'pending' | 'downloading' | 'preparing' | 'transcribing' | 'processing' | 'done' | 'failed' | 'cancelled';
|
||||
|
||||
export interface Segment {
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
<script lang="ts">
|
||||
import '../app.css';
|
||||
import { onMount } from 'svelte';
|
||||
import { onMount, onDestroy } from 'svelte';
|
||||
import { browser } from '$app/environment';
|
||||
import { page } from '$app/stores';
|
||||
import { accent } from '$lib/accent.js';
|
||||
import type { ModelStatus } from '$lib/types.js';
|
||||
|
||||
let { children } = $props();
|
||||
|
||||
@@ -11,8 +12,43 @@
|
||||
// The store subscriber handles everything; just subscribing here keeps it alive.
|
||||
$effect(() => { void $accent; });
|
||||
|
||||
// ── Model status ───────────────────────────────────────
|
||||
let modelStatus = $state<ModelStatus>({ state: 'unloaded' });
|
||||
let modelEs: EventSource | null = null;
|
||||
|
||||
function refreshModelStatus() {
|
||||
fetch('/api/model/status')
|
||||
.then((r) => r.json())
|
||||
.then((s) => (modelStatus = s as ModelStatus))
|
||||
.catch(() => {});
|
||||
}
|
||||
|
||||
function subscribeModelEvents() {
|
||||
modelEs?.close();
|
||||
modelEs = new EventSource('/api/model/events');
|
||||
modelEs.addEventListener('model_loading', () => refreshModelStatus());
|
||||
modelEs.addEventListener('model_ready', () => refreshModelStatus());
|
||||
modelEs.addEventListener('model_unloaded', () => refreshModelStatus());
|
||||
modelEs.addEventListener('model_waiting_for_gpu',() => refreshModelStatus());
|
||||
modelEs.onerror = () => { /* browser reconnects automatically */ };
|
||||
}
|
||||
|
||||
const modelStateMeta: Record<string, { dot: string; label: string; pulse: boolean }> = {
|
||||
unloaded: { dot: 'var(--text-dim)', label: 'model unloaded', pulse: false },
|
||||
loading: { dot: '#f0b429', label: 'model loading…', pulse: true },
|
||||
waiting_for_gpu: { dot: '#f97316', label: 'waiting for GPU', pulse: true },
|
||||
ready: { dot: '#5dd47a', label: 'whisper-large-v3',pulse: false }
|
||||
};
|
||||
|
||||
const modelMeta = $derived(
|
||||
modelStateMeta[modelStatus.state] ?? modelStateMeta.unloaded
|
||||
);
|
||||
|
||||
// Push notification setup
|
||||
onMount(async () => {
|
||||
refreshModelStatus();
|
||||
subscribeModelEvents();
|
||||
|
||||
if (!browser || !('serviceWorker' in navigator) || !('PushManager' in window)) return;
|
||||
try {
|
||||
const reg = await navigator.serviceWorker.ready;
|
||||
@@ -42,6 +78,8 @@
|
||||
}
|
||||
});
|
||||
|
||||
onDestroy(() => modelEs?.close());
|
||||
|
||||
function urlBase64ToUint8Array(base64: string): Uint8Array {
|
||||
const pad = '='.repeat((4 - (base64.length % 4)) % 4);
|
||||
const b64 = (base64 + pad).replace(/-/g, '+').replace(/_/g, '/');
|
||||
@@ -135,8 +173,12 @@
|
||||
|
||||
<!-- Status dot -->
|
||||
<div class="status-pill">
|
||||
<div class="status-dot"></div>
|
||||
<span>whisper-large-v3</span>
|
||||
<div
|
||||
class="status-dot"
|
||||
class:pulse={modelMeta.pulse}
|
||||
style="background: {modelMeta.dot}"
|
||||
></div>
|
||||
<span>{modelMeta.label}</span>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
@@ -268,8 +310,15 @@
|
||||
width: 6px;
|
||||
height: 6px;
|
||||
border-radius: 3px;
|
||||
background: #5dd47a;
|
||||
flex-shrink: 0;
|
||||
transition: background 0.4s;
|
||||
}
|
||||
.status-dot.pulse {
|
||||
animation: dot-pulse 1.4s ease-in-out infinite;
|
||||
}
|
||||
@keyframes dot-pulse {
|
||||
0%, 100% { opacity: 1; }
|
||||
50% { opacity: 0.3; }
|
||||
}
|
||||
|
||||
/* ── Main content ─────────────────────────────────────── */
|
||||
|
||||
43
src/routes/api/model/events/+server.ts
Normal file
43
src/routes/api/model/events/+server.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
const WHISPER_URL = process.env.WHISPER_URL ?? 'http://localhost:8080';
|
||||
|
||||
/** Relay the whisper /model/events SSE stream to the browser. */
|
||||
export async function GET({ request }) {
|
||||
const { default: fetch } = await import('node-fetch');
|
||||
|
||||
const ac = new AbortController();
|
||||
request.signal.addEventListener('abort', () => ac.abort());
|
||||
|
||||
const stream = new ReadableStream({
|
||||
async start(controller) {
|
||||
try {
|
||||
const upstream = await fetch(`${WHISPER_URL}/model/events`, {
|
||||
signal: ac.signal as AbortSignal
|
||||
});
|
||||
if (!upstream.body) {
|
||||
controller.close();
|
||||
return;
|
||||
}
|
||||
for await (const chunk of upstream.body) {
|
||||
if (ac.signal.aborted) break;
|
||||
controller.enqueue(chunk instanceof Buffer ? chunk : Buffer.from(String(chunk)));
|
||||
}
|
||||
} catch {
|
||||
// upstream closed, client disconnected, or whisper unreachable — all fine
|
||||
} finally {
|
||||
controller.close();
|
||||
}
|
||||
},
|
||||
cancel() {
|
||||
ac.abort();
|
||||
}
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
Connection: 'keep-alive',
|
||||
'X-Accel-Buffering': 'no'
|
||||
}
|
||||
});
|
||||
}
|
||||
14
src/routes/api/model/status/+server.ts
Normal file
14
src/routes/api/model/status/+server.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
import { getModelStatus } from '$lib/server/whisper.js';
|
||||
|
||||
export async function GET() {
|
||||
try {
|
||||
const status = await getModelStatus();
|
||||
return new Response(JSON.stringify(status), {
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
} catch {
|
||||
return new Response(JSON.stringify({ state: 'unloaded' }), {
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -13,6 +13,7 @@
|
||||
let segments = $state<Segment[]>([]);
|
||||
let error = $state('');
|
||||
let chunkInfo = $state({ chunk: 0, total: 0 });
|
||||
let modelWarming = $state<{ state: string; retryAfterSecs: number } | null>(null);
|
||||
let eventSource: EventSource | null = null;
|
||||
|
||||
const statusLabel: Record<string, string> = {
|
||||
@@ -83,8 +84,11 @@
|
||||
try {
|
||||
const data = JSON.parse(e.data);
|
||||
if (data.type === 'progress') {
|
||||
modelWarming = null;
|
||||
chunkInfo = { chunk: data.chunk ?? 0, total: data.total ?? 0 };
|
||||
if (job) job = { ...job, progress: data.progress ?? job.progress, status: 'transcribing' };
|
||||
} else if (data.type === 'model_warming') {
|
||||
modelWarming = { state: data.state ?? 'loading', retryAfterSecs: data.retryAfterSecs ?? 30 };
|
||||
} else if (data.type === 'status') {
|
||||
if (job) job = { ...job, status: data.status, progress: data.progress ?? job.progress };
|
||||
} else if (data.type === 'done') {
|
||||
@@ -215,6 +219,15 @@
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
{#if modelWarming}
|
||||
<div class="warming-notice mono">
|
||||
<svg width="12" height="12" viewBox="0 0 12 12" fill="none" style="flex-shrink:0; animation: spin 1.5s linear infinite">
|
||||
<circle cx="6" cy="6" r="4.5" stroke="currentColor" stroke-width="1.4" fill="none" stroke-dasharray="14 8"/>
|
||||
</svg>
|
||||
Warming up model ({modelWarming.state.replace(/_/g, ' ')}) — retrying in {modelWarming.retryAfterSecs}s…
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
<!-- Progress bar -->
|
||||
<div class="progress-bar-track">
|
||||
<div
|
||||
@@ -484,6 +497,16 @@
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.warming-notice {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 8px;
|
||||
margin-top: 10px;
|
||||
font-size: 11.5px;
|
||||
color: #f0b429;
|
||||
opacity: 0.9;
|
||||
}
|
||||
|
||||
.progress-bar-track {
|
||||
height: 4px;
|
||||
border-radius: 2px;
|
||||
|
||||
@@ -31,6 +31,7 @@ describe('submitJob', () => {
|
||||
it('POSTs to /jobs and returns job_id', async () => {
|
||||
mocks.fetch.mockResolvedValue({
|
||||
ok: true,
|
||||
status: 202,
|
||||
json: () => Promise.resolve({ job_id: 'whisper-job-abc' })
|
||||
});
|
||||
const id = await submitJob('/tmp/audio.wav', 'http://host/api/webhook/job-1');
|
||||
@@ -41,6 +42,7 @@ describe('submitJob', () => {
|
||||
vi.stubEnv('WHISPER_URL', 'http://localhost:8091');
|
||||
mocks.fetch.mockResolvedValue({
|
||||
ok: true,
|
||||
status: 202,
|
||||
json: () => Promise.resolve({ job_id: 'x' })
|
||||
});
|
||||
await submitJob('/tmp/audio.wav', 'http://host/api/webhook/job-1');
|
||||
@@ -54,6 +56,7 @@ describe('submitJob', () => {
|
||||
it('includes task=transcribe in the form', async () => {
|
||||
mocks.fetch.mockResolvedValue({
|
||||
ok: true,
|
||||
status: 202,
|
||||
json: () => Promise.resolve({ job_id: 'x' })
|
||||
});
|
||||
await submitJob('/tmp/audio.wav', 'http://host/webhook');
|
||||
@@ -63,6 +66,7 @@ describe('submitJob', () => {
|
||||
it('includes webhook_url in the form', async () => {
|
||||
mocks.fetch.mockResolvedValue({
|
||||
ok: true,
|
||||
status: 202,
|
||||
json: () => Promise.resolve({ job_id: 'x' })
|
||||
});
|
||||
await submitJob('/tmp/audio.wav', 'http://192.168.1.10:3000/api/webhook/job-99');
|
||||
@@ -75,6 +79,7 @@ describe('submitJob', () => {
|
||||
it('includes language when provided', async () => {
|
||||
mocks.fetch.mockResolvedValue({
|
||||
ok: true,
|
||||
status: 202,
|
||||
json: () => Promise.resolve({ job_id: 'x' })
|
||||
});
|
||||
await submitJob('/tmp/audio.wav', 'http://host/webhook', 'en');
|
||||
@@ -84,6 +89,7 @@ describe('submitJob', () => {
|
||||
it('omits language field when not provided', async () => {
|
||||
mocks.fetch.mockResolvedValue({
|
||||
ok: true,
|
||||
status: 202,
|
||||
json: () => Promise.resolve({ job_id: 'x' })
|
||||
});
|
||||
await submitJob('/tmp/audio.wav', 'http://host/webhook');
|
||||
|
||||
Reference in New Issue
Block a user