feat: model-on-demand lifecycle — retry on 503, live status pill, warming indicator
- whisper.ts: add getModelStatus(); fix submitJob() to retry on 503 using
Retry-After header instead of throwing; optional onModelWaiting callback
lets the pipeline surface model state to the UI during the wait
- pipeline.ts: pass onModelWaiting callback → emits model_warming SSE event
so the job detail page can show 'Warming up model…' while waiting
- types.ts: add ModelStateTag union and ModelStatus interface
- api/model/status: GET route proxies whisper /model/status (falls back to
{state:'unloaded'} if whisper unreachable)
- api/model/events: GET route relays whisper SSE stream to the browser;
AbortController tied to request.signal cleans up on disconnect
- layout.svelte: status pill is now live — initial fetch + EventSource on
/api/model/events; dot colour + label reflect real model state with a
pulsing animation while loading or waiting_for_gpu
- jobs/[id]/+page.svelte: handle model_warming event type → show a yellow
'Warming up model…' sub-label with spinner inside the progress card
- whisper.test.ts: update submitJob mocks to status:202 to match real API
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
43
src/routes/api/model/events/+server.ts
Normal file
43
src/routes/api/model/events/+server.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
const WHISPER_URL = process.env.WHISPER_URL ?? 'http://localhost:8080';
|
||||
|
||||
/** Relay the whisper /model/events SSE stream to the browser. */
|
||||
export async function GET({ request }) {
|
||||
const { default: fetch } = await import('node-fetch');
|
||||
|
||||
const ac = new AbortController();
|
||||
request.signal.addEventListener('abort', () => ac.abort());
|
||||
|
||||
const stream = new ReadableStream({
|
||||
async start(controller) {
|
||||
try {
|
||||
const upstream = await fetch(`${WHISPER_URL}/model/events`, {
|
||||
signal: ac.signal as AbortSignal
|
||||
});
|
||||
if (!upstream.body) {
|
||||
controller.close();
|
||||
return;
|
||||
}
|
||||
for await (const chunk of upstream.body) {
|
||||
if (ac.signal.aborted) break;
|
||||
controller.enqueue(chunk instanceof Buffer ? chunk : Buffer.from(String(chunk)));
|
||||
}
|
||||
} catch {
|
||||
// upstream closed, client disconnected, or whisper unreachable — all fine
|
||||
} finally {
|
||||
controller.close();
|
||||
}
|
||||
},
|
||||
cancel() {
|
||||
ac.abort();
|
||||
}
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
headers: {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
Connection: 'keep-alive',
|
||||
'X-Accel-Buffering': 'no'
|
||||
}
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user