tonemark/src/lib/server/whisper.ts

import type { ModelStateTag, ModelStatus } from '$lib/types.js';

const MODEL_STATES = new Set<ModelStateTag>(['unloaded', 'loading', 'waiting_for_gpu', 'ready']);

function isModelStateTag(value: unknown): value is ModelStateTag {
	return typeof value === 'string' && MODEL_STATES.has(value as ModelStateTag);
}

function extractSseMessages(buffer: string): { messages: { eventType: string; data: string }[]; rest: string } {
	const normalized = buffer.replace(/\r/g, '');
	const chunks = normalized.split('\n\n');
	const rest = chunks.pop() ?? '';
	const messages = chunks
		.map((chunk) => {
			let eventType = '';
			const dataLines: string[] = [];
			for (const line of chunk.split('\n')) {
				if (line.startsWith('event:')) {
					eventType = line.slice(6).trim();
				} else if (line.startsWith('data:')) {
					dataLines.push(line.slice(5).trim());
				}
			}
			return { eventType, data: dataLines.join('\n') };
		})
		.filter((message) => message.data.length > 0);

	return { messages, rest };
}

function whisperUrl() {
	return process.env.WHISPER_URL ?? 'http://localhost:8080';
}

/** Get the current model state from whisper-rtx2080. */
export async function getModelStatus(): Promise<ModelStatus> {
	const { default: fetch } = await import('node-fetch');
	const res = await fetch(`${whisperUrl()}/model/status`, {
		signal: AbortSignal.timeout(5000)
	});
	if (!res.ok) throw new Error(`/model/status returned ${res.status}`);
	return res.json() as Promise<ModelStatus>;
}

/**
 * Wait for the whisper model to become ready.
 *
 * Subscribes to /model/events SSE and resolves as soon as a payload with
 * state:"ready" arrives. Falls back to a plain timeout (`timeoutMs`) if the
 * SSE connection fails or closes without that event, so the retry loop can
 * try again without hanging indefinitely.
 */
async function waitForModelReady(
	timeoutMs: number,
	onStateChange?: (state: ModelStateTag) => void
): Promise<void> {
	const { default: fetch } = await import('node-fetch');
	const ac = new AbortController();
	return new Promise((resolve) => {
		let done = false;
		const finish = () => {
			if (!done) {
				done = true;
				ac.abort();
				resolve();
			}
		};

		const timer = setTimeout(finish, timeoutMs);

		fetch(`${whisperUrl()}/model/events`, { signal: ac.signal as AbortSignal })
			.then(async (res) => {
				if (!res.body) {
					clearTimeout(timer);
					return finish();
				}
				let buf = '';
				for await (const chunk of res.body) {
					if (ac.signal.aborted) break;
					buf += chunk.toString();
					const { messages, rest } = extractSseMessages(buf);
					buf = rest;
					for (const message of messages) {
						try {
							const payload = JSON.parse(message.data) as { state?: unknown };
							if (!isModelStateTag(payload.state)) continue;
							if (payload.state === 'ready') {
								clearTimeout(timer);
								finish();
								return;
							}
							onStateChange?.(payload.state);
						} catch { /* ignore parse errors */ }
					}
				}
				// Stream ended without model_ready → proceed to retry immediately
				clearTimeout(timer);
				finish();
			})
			.catch(() => {
				// SSE unreachable — fallback timer will fire eventually
			});
	});
}

/**
 * Submit an audio file to whisper-rtx2080. Returns the whisper job id.
 *
 * Handles 503 (model not ready) transparently: retries after subscribing to
 * /model/events SSE — proceeds as soon as state:"ready" arrives, or after the
 * Retry-After timeout elapses (whichever comes first).
 * Calls `onModelWaiting` on each 503 so the caller can surface the wait to the user.
 */
export async function submitJob(
	wavPath: string,
	webhookUrl: string,
	language?: string,
	onModelWaiting?: (state: ModelStateTag, retryAfterSecs: number) => void,
	maxAttempts = 20
): Promise<string> {
	const FormData = (await import('form-data')).default;
	const { createReadStream } = await import('fs');
	const { default: fetch } = await import('node-fetch');

	for (let attempt = 1; attempt <= maxAttempts; attempt++) {
		// Recreate form with a fresh readable stream on every attempt.
		// A consumed ReadStream cannot be rewound, so reusing it across retries
		// would send an empty body to whisper after the first 503.
		const form = new FormData();
		form.append('audio', createReadStream(wavPath));
		form.append('task', 'transcribe');
		form.append('webhook_url', webhookUrl);
		if (language) form.append('language', language);

		const res = await fetch(`${whisperUrl()}/jobs`, {
			method: 'POST',
			body: form,
			headers: form.getHeaders()
		});

		if (res.status === 202) {
			const json = (await res.json()) as { job_id: string };
			return json.job_id;
		}

		if (res.status === 503) {
			const body = (await res.json().catch(() => ({}))) as {
				state?: string;
				retry_after_secs?: number;
			};
			const state = isModelStateTag(body.state) ? body.state : 'unloaded';
			const waitSecs = body.retry_after_secs ?? parseInt(res.headers.get('Retry-After') ?? '15');
			onModelWaiting?.(state, waitSecs);
			let lastState = state;
			await waitForModelReady((waitSecs + 1) * 1000, (nextState) => {
				if (nextState === lastState) return;
				lastState = nextState;
				onModelWaiting?.(nextState, waitSecs);
			});
			continue;
		}

		const text = await res.text();
		throw new Error(`whisper /jobs returned ${res.status}: ${text}`);
	}

	throw new Error(`Whisper model did not become ready after ${maxAttempts} attempts`);
}

/** Unload the model from VRAM. Throws if the whisper server returns non-ok. */
export async function unloadModel(): Promise<{ ok: boolean }> {
	const { default: fetch } = await import('node-fetch');
	const res = await fetch(`${whisperUrl()}/model/unload`, {
		method: 'POST',
		signal: AbortSignal.timeout(10000)
	});
	if (!res.ok) throw new Error(`/model/unload returned ${res.status}`);
	return res.json() as Promise<{ ok: boolean }>;
}

/**
 * Cancel a queued or running job on the whisper server (best-effort).
 * Errors are silently ignored — local job status is already set to cancelled.
 */
export async function cancelJob(whisperJobId: string): Promise<void> {
	try {
		const { default: fetch } = await import('node-fetch');
		await fetch(`${whisperUrl()}/jobs/${whisperJobId}`, {
			method: 'DELETE',
			signal: AbortSignal.timeout(5000)
		});
	} catch { /* best-effort */ }
}

/** Open an SSE stream from whisper and call onProgress/onDone callbacks. */
export async function streamJob(
	whisperJobId: string,
	onProgress: (percent: number, chunk: number, total: number) => void,
	onDone: () => void,
	onError: (msg: string) => void
): Promise<void> {
	const { default: fetch } = await import('node-fetch');
	const res = await fetch(`${whisperUrl()}/jobs/${whisperJobId}/stream`);
	if (!res.ok || !res.body) throw new Error(`SSE stream returned ${res.status}`);

	let buf = '';
	for await (const chunk of res.body) {
		buf += chunk.toString();
		const { messages, rest } = extractSseMessages(buf);
		buf = rest;

		for (const message of messages) {
			try {
				const payload = JSON.parse(message.data);
				if (payload.type === 'progress') {
					onProgress(payload.percent ?? 0, payload.chunk ?? 0, payload.total ?? 0);
				} else if (payload.type === 'done') {
					onDone();
					return;
				} else if (payload.type === 'error') {
					onError(payload.message ?? 'unknown error');
					return;
				}
			} catch { /* ignore parse errors */ }
		}
	}
}

/** Check if the whisper server is healthy. */
export async function checkHealth(): Promise<boolean> {
	try {
		const { default: fetch } = await import('node-fetch');
		const res = await fetch(`${whisperUrl()}/health`, { signal: AbortSignal.timeout(3000) });
		return res.ok;
	} catch {
		return false;
	}
}