- Ignore backend model lifecycle webhooks so model warmup does not mark jobs done early - Parse batched SSE messages and relay model load states during submit retries Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
239 lines
7.5 KiB
TypeScript
239 lines
7.5 KiB
TypeScript
import type { ModelStateTag, ModelStatus } from '$lib/types.js';
|
|
|
|
const MODEL_STATES = new Set<ModelStateTag>(['unloaded', 'loading', 'waiting_for_gpu', 'ready']);
|
|
|
|
function isModelStateTag(value: unknown): value is ModelStateTag {
|
|
return typeof value === 'string' && MODEL_STATES.has(value as ModelStateTag);
|
|
}
|
|
|
|
function extractSseMessages(buffer: string): { messages: { eventType: string; data: string }[]; rest: string } {
|
|
const normalized = buffer.replace(/\r/g, '');
|
|
const chunks = normalized.split('\n\n');
|
|
const rest = chunks.pop() ?? '';
|
|
const messages = chunks
|
|
.map((chunk) => {
|
|
let eventType = '';
|
|
const dataLines: string[] = [];
|
|
for (const line of chunk.split('\n')) {
|
|
if (line.startsWith('event:')) {
|
|
eventType = line.slice(6).trim();
|
|
} else if (line.startsWith('data:')) {
|
|
dataLines.push(line.slice(5).trim());
|
|
}
|
|
}
|
|
return { eventType, data: dataLines.join('\n') };
|
|
})
|
|
.filter((message) => message.data.length > 0);
|
|
|
|
return { messages, rest };
|
|
}
|
|
|
|
function whisperUrl() {
|
|
return process.env.WHISPER_URL ?? 'http://localhost:8080';
|
|
}
|
|
|
|
/** Get the current model state from whisper-rtx2080. */
|
|
export async function getModelStatus(): Promise<ModelStatus> {
|
|
const { default: fetch } = await import('node-fetch');
|
|
const res = await fetch(`${whisperUrl()}/model/status`, {
|
|
signal: AbortSignal.timeout(5000)
|
|
});
|
|
if (!res.ok) throw new Error(`/model/status returned ${res.status}`);
|
|
return res.json() as Promise<ModelStatus>;
|
|
}
|
|
|
|
/**
|
|
* Wait for the whisper model to become ready.
|
|
*
|
|
* Subscribes to /model/events SSE and resolves as soon as a payload with
|
|
* state:"ready" arrives. Falls back to a plain timeout (`timeoutMs`) if the
|
|
* SSE connection fails or closes without that event, so the retry loop can
|
|
* try again without hanging indefinitely.
|
|
*/
|
|
async function waitForModelReady(
|
|
timeoutMs: number,
|
|
onStateChange?: (state: ModelStateTag) => void
|
|
): Promise<void> {
|
|
const { default: fetch } = await import('node-fetch');
|
|
const ac = new AbortController();
|
|
return new Promise((resolve) => {
|
|
let done = false;
|
|
const finish = () => {
|
|
if (!done) {
|
|
done = true;
|
|
ac.abort();
|
|
resolve();
|
|
}
|
|
};
|
|
|
|
const timer = setTimeout(finish, timeoutMs);
|
|
|
|
fetch(`${whisperUrl()}/model/events`, { signal: ac.signal as AbortSignal })
|
|
.then(async (res) => {
|
|
if (!res.body) {
|
|
clearTimeout(timer);
|
|
return finish();
|
|
}
|
|
let buf = '';
|
|
for await (const chunk of res.body) {
|
|
if (ac.signal.aborted) break;
|
|
buf += chunk.toString();
|
|
const { messages, rest } = extractSseMessages(buf);
|
|
buf = rest;
|
|
for (const message of messages) {
|
|
try {
|
|
const payload = JSON.parse(message.data) as { state?: unknown };
|
|
if (!isModelStateTag(payload.state)) continue;
|
|
if (payload.state === 'ready') {
|
|
clearTimeout(timer);
|
|
finish();
|
|
return;
|
|
}
|
|
onStateChange?.(payload.state);
|
|
} catch { /* ignore parse errors */ }
|
|
}
|
|
}
|
|
// Stream ended without model_ready → proceed to retry immediately
|
|
clearTimeout(timer);
|
|
finish();
|
|
})
|
|
.catch(() => {
|
|
// SSE unreachable — fallback timer will fire eventually
|
|
});
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Submit an audio file to whisper-rtx2080. Returns the whisper job id.
|
|
*
|
|
* Handles 503 (model not ready) transparently: retries after subscribing to
|
|
* /model/events SSE — proceeds as soon as state:"ready" arrives, or after the
|
|
* Retry-After timeout elapses (whichever comes first).
|
|
* Calls `onModelWaiting` on each 503 so the caller can surface the wait to the user.
|
|
*/
|
|
export async function submitJob(
|
|
wavPath: string,
|
|
webhookUrl: string,
|
|
language?: string,
|
|
onModelWaiting?: (state: ModelStateTag, retryAfterSecs: number) => void,
|
|
maxAttempts = 20
|
|
): Promise<string> {
|
|
const FormData = (await import('form-data')).default;
|
|
const { createReadStream } = await import('fs');
|
|
const { default: fetch } = await import('node-fetch');
|
|
|
|
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
// Recreate form with a fresh readable stream on every attempt.
|
|
// A consumed ReadStream cannot be rewound, so reusing it across retries
|
|
// would send an empty body to whisper after the first 503.
|
|
const form = new FormData();
|
|
form.append('audio', createReadStream(wavPath));
|
|
form.append('task', 'transcribe');
|
|
form.append('webhook_url', webhookUrl);
|
|
if (language) form.append('language', language);
|
|
|
|
const res = await fetch(`${whisperUrl()}/jobs`, {
|
|
method: 'POST',
|
|
body: form,
|
|
headers: form.getHeaders()
|
|
});
|
|
|
|
if (res.status === 202) {
|
|
const json = (await res.json()) as { job_id: string };
|
|
return json.job_id;
|
|
}
|
|
|
|
if (res.status === 503) {
|
|
const body = (await res.json().catch(() => ({}))) as {
|
|
state?: string;
|
|
retry_after_secs?: number;
|
|
};
|
|
const state = isModelStateTag(body.state) ? body.state : 'unloaded';
|
|
const waitSecs = body.retry_after_secs ?? parseInt(res.headers.get('Retry-After') ?? '15');
|
|
onModelWaiting?.(state, waitSecs);
|
|
let lastState = state;
|
|
await waitForModelReady((waitSecs + 1) * 1000, (nextState) => {
|
|
if (nextState === lastState) return;
|
|
lastState = nextState;
|
|
onModelWaiting?.(nextState, waitSecs);
|
|
});
|
|
continue;
|
|
}
|
|
|
|
const text = await res.text();
|
|
throw new Error(`whisper /jobs returned ${res.status}: ${text}`);
|
|
}
|
|
|
|
throw new Error(`Whisper model did not become ready after ${maxAttempts} attempts`);
|
|
}
|
|
|
|
/** Unload the model from VRAM. Throws if the whisper server returns non-ok. */
|
|
export async function unloadModel(): Promise<{ ok: boolean }> {
|
|
const { default: fetch } = await import('node-fetch');
|
|
const res = await fetch(`${whisperUrl()}/model/unload`, {
|
|
method: 'POST',
|
|
signal: AbortSignal.timeout(10000)
|
|
});
|
|
if (!res.ok) throw new Error(`/model/unload returned ${res.status}`);
|
|
return res.json() as Promise<{ ok: boolean }>;
|
|
}
|
|
|
|
/**
|
|
* Cancel a queued or running job on the whisper server (best-effort).
|
|
* Errors are silently ignored — local job status is already set to cancelled.
|
|
*/
|
|
export async function cancelJob(whisperJobId: string): Promise<void> {
|
|
try {
|
|
const { default: fetch } = await import('node-fetch');
|
|
await fetch(`${whisperUrl()}/jobs/${whisperJobId}`, {
|
|
method: 'DELETE',
|
|
signal: AbortSignal.timeout(5000)
|
|
});
|
|
} catch { /* best-effort */ }
|
|
}
|
|
|
|
/** Open an SSE stream from whisper and call onProgress/onDone callbacks. */
|
|
export async function streamJob(
|
|
whisperJobId: string,
|
|
onProgress: (percent: number, chunk: number, total: number) => void,
|
|
onDone: () => void,
|
|
onError: (msg: string) => void
|
|
): Promise<void> {
|
|
const { default: fetch } = await import('node-fetch');
|
|
const res = await fetch(`${whisperUrl()}/jobs/${whisperJobId}/stream`);
|
|
if (!res.ok || !res.body) throw new Error(`SSE stream returned ${res.status}`);
|
|
|
|
let buf = '';
|
|
for await (const chunk of res.body) {
|
|
buf += chunk.toString();
|
|
const { messages, rest } = extractSseMessages(buf);
|
|
buf = rest;
|
|
|
|
for (const message of messages) {
|
|
try {
|
|
const payload = JSON.parse(message.data);
|
|
if (payload.type === 'progress') {
|
|
onProgress(payload.percent ?? 0, payload.chunk ?? 0, payload.total ?? 0);
|
|
} else if (payload.type === 'done') {
|
|
onDone();
|
|
return;
|
|
} else if (payload.type === 'error') {
|
|
onError(payload.message ?? 'unknown error');
|
|
return;
|
|
}
|
|
} catch { /* ignore parse errors */ }
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Check if the whisper server is healthy. */
|
|
export async function checkHealth(): Promise<boolean> {
|
|
try {
|
|
const { default: fetch } = await import('node-fetch');
|
|
const res = await fetch(`${whisperUrl()}/health`, { signal: AbortSignal.timeout(3000) });
|
|
return res.ok;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|