fix(whisper): handle model warmup events
- Ignore backend model lifecycle webhooks so model warmup does not mark jobs done early - Parse batched SSE messages and relay model load states during submit retries Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -1,4 +1,32 @@
|
||||
import type { ModelStatus } from '$lib/types.js';
|
||||
import type { ModelStateTag, ModelStatus } from '$lib/types.js';
|
||||
|
||||
const MODEL_STATES = new Set<ModelStateTag>(['unloaded', 'loading', 'waiting_for_gpu', 'ready']);
|
||||
|
||||
function isModelStateTag(value: unknown): value is ModelStateTag {
|
||||
return typeof value === 'string' && MODEL_STATES.has(value as ModelStateTag);
|
||||
}
|
||||
|
||||
function extractSseMessages(buffer: string): { messages: { eventType: string; data: string }[]; rest: string } {
|
||||
const normalized = buffer.replace(/\r/g, '');
|
||||
const chunks = normalized.split('\n\n');
|
||||
const rest = chunks.pop() ?? '';
|
||||
const messages = chunks
|
||||
.map((chunk) => {
|
||||
let eventType = '';
|
||||
const dataLines: string[] = [];
|
||||
for (const line of chunk.split('\n')) {
|
||||
if (line.startsWith('event:')) {
|
||||
eventType = line.slice(6).trim();
|
||||
} else if (line.startsWith('data:')) {
|
||||
dataLines.push(line.slice(5).trim());
|
||||
}
|
||||
}
|
||||
return { eventType, data: dataLines.join('\n') };
|
||||
})
|
||||
.filter((message) => message.data.length > 0);
|
||||
|
||||
return { messages, rest };
|
||||
}
|
||||
|
||||
function whisperUrl() {
|
||||
return process.env.WHISPER_URL ?? 'http://localhost:8080';
|
||||
@@ -22,7 +50,10 @@ export async function getModelStatus(): Promise<ModelStatus> {
|
||||
* SSE connection fails or closes without that event, so the retry loop can
|
||||
* try again without hanging indefinitely.
|
||||
*/
|
||||
async function waitForModelReady(timeoutMs: number): Promise<void> {
|
||||
async function waitForModelReady(
|
||||
timeoutMs: number,
|
||||
onStateChange?: (state: ModelStateTag) => void
|
||||
): Promise<void> {
|
||||
const { default: fetch } = await import('node-fetch');
|
||||
const ac = new AbortController();
|
||||
return new Promise((resolve) => {
|
||||
@@ -47,17 +78,18 @@ async function waitForModelReady(timeoutMs: number): Promise<void> {
|
||||
for await (const chunk of res.body) {
|
||||
if (ac.signal.aborted) break;
|
||||
buf += chunk.toString();
|
||||
const lines = buf.split('\n');
|
||||
buf = lines.pop() ?? '';
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith('data:')) continue;
|
||||
const { messages, rest } = extractSseMessages(buf);
|
||||
buf = rest;
|
||||
for (const message of messages) {
|
||||
try {
|
||||
const payload = JSON.parse(line.slice(5).trim());
|
||||
const payload = JSON.parse(message.data) as { state?: unknown };
|
||||
if (!isModelStateTag(payload.state)) continue;
|
||||
if (payload.state === 'ready') {
|
||||
clearTimeout(timer);
|
||||
finish();
|
||||
return;
|
||||
}
|
||||
onStateChange?.(payload.state);
|
||||
} catch { /* ignore parse errors */ }
|
||||
}
|
||||
}
|
||||
@@ -83,7 +115,7 @@ export async function submitJob(
|
||||
wavPath: string,
|
||||
webhookUrl: string,
|
||||
language?: string,
|
||||
onModelWaiting?: (state: string, retryAfterSecs: number) => void,
|
||||
onModelWaiting?: (state: ModelStateTag, retryAfterSecs: number) => void,
|
||||
maxAttempts = 20
|
||||
): Promise<string> {
|
||||
const FormData = (await import('form-data')).default;
|
||||
@@ -116,10 +148,15 @@ export async function submitJob(
|
||||
state?: string;
|
||||
retry_after_secs?: number;
|
||||
};
|
||||
const state = body.state ?? 'unloaded';
|
||||
const state = isModelStateTag(body.state) ? body.state : 'unloaded';
|
||||
const waitSecs = body.retry_after_secs ?? parseInt(res.headers.get('Retry-After') ?? '15');
|
||||
onModelWaiting?.(state, waitSecs);
|
||||
await waitForModelReady((waitSecs + 1) * 1000);
|
||||
let lastState = state;
|
||||
await waitForModelReady((waitSecs + 1) * 1000, (nextState) => {
|
||||
if (nextState === lastState) return;
|
||||
lastState = nextState;
|
||||
onModelWaiting?.(nextState, waitSecs);
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -169,30 +206,23 @@ export async function streamJob(
|
||||
let buf = '';
|
||||
for await (const chunk of res.body) {
|
||||
buf += chunk.toString();
|
||||
const lines = buf.split('\n');
|
||||
buf = lines.pop() ?? '';
|
||||
const { messages, rest } = extractSseMessages(buf);
|
||||
buf = rest;
|
||||
|
||||
let eventType = '';
|
||||
let dataLine = '';
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('event:')) eventType = line.slice(6).trim();
|
||||
else if (line.startsWith('data:')) dataLine = line.slice(5).trim();
|
||||
for (const message of messages) {
|
||||
try {
|
||||
const payload = JSON.parse(message.data);
|
||||
if (payload.type === 'progress') {
|
||||
onProgress(payload.percent ?? 0, payload.chunk ?? 0, payload.total ?? 0);
|
||||
} else if (payload.type === 'done') {
|
||||
onDone();
|
||||
return;
|
||||
} else if (payload.type === 'error') {
|
||||
onError(payload.message ?? 'unknown error');
|
||||
return;
|
||||
}
|
||||
} catch { /* ignore parse errors */ }
|
||||
}
|
||||
|
||||
if (!dataLine) continue;
|
||||
|
||||
try {
|
||||
const payload = JSON.parse(dataLine);
|
||||
if (payload.type === 'progress') {
|
||||
onProgress(payload.percent ?? 0, payload.chunk ?? 0, payload.total ?? 0);
|
||||
} else if (payload.type === 'done') {
|
||||
onDone();
|
||||
return;
|
||||
} else if (payload.type === 'error') {
|
||||
onError(payload.message ?? 'unknown error');
|
||||
return;
|
||||
}
|
||||
} catch { /* ignore parse errors */ }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user