diff --git a/src/tests/whisper.test.ts b/src/tests/whisper.test.ts
index a2b4c84..dfc3de0 100644
--- a/src/tests/whisper.test.ts
+++ b/src/tests/whisper.test.ts
@@ -1,4 +1,4 @@
-import { describe, it, expect, vi, afterEach } from 'vitest';
+import { describe, it, expect, vi, afterEach, beforeEach } from 'vitest';
 import { Readable } from 'stream';
 
 // ── Hoist mocks so they're available inside vi.mock() factories ───────────────
@@ -21,7 +21,7 @@ vi.mock('form-data', () => ({
 
 vi.mock('fs', () => ({ createReadStream: vi.fn(() => 'STREAM_PLACEHOLDER') }));
 
-import { submitJob, streamJob } from '$lib/server/whisper.js';
+import { submitJob, streamJob, getModelStatus } from '$lib/server/whisper.js';
 
 afterEach(() => vi.clearAllMocks());
 
@@ -107,6 +107,253 @@ describe('submitJob', () => {
 	});
 });
 
+// ── submitJob — 503 retry & model-warming behavior ───────────────────────────
+
+/** Minimal 503 response the whisper server returns when model not ready. */
+function make503(state: string, retry_after_secs: number, headerRetryAfter?: string) {
+	return {
+		status: 503,
+		json: () => Promise.resolve({ error: 'model_not_ready', state, retry_after_secs }),
+		headers: {
+			get: (h: string) =>
+				h.toLowerCase() === 'retry-after' ? (headerRetryAfter ?? String(retry_after_secs)) : null
+		}
+	};
+}
+
+function make202(job_id: string) {
+	return { status: 202, json: () => Promise.resolve({ job_id }) };
+}
+
+describe('submitJob — 503 retry behavior', () => {
+	beforeEach(() => vi.useFakeTimers());
+	afterEach(() => vi.useRealTimers());
+
+	it('calls onModelWaiting with state and retryAfterSecs on first 503', async () => {
+		mocks.fetch
+			.mockResolvedValueOnce(make503('unloaded', 30))
+			.mockResolvedValueOnce(make202('job-1'));
+
+		const onModelWaiting = vi.fn();
+		const p = submitJob('/tmp/audio.wav', 'http://host/webhook', undefined, onModelWaiting);
+		await vi.runAllTimersAsync();
+
+		await expect(p).resolves.toBe('job-1');
+		expect(onModelWaiting).toHaveBeenCalledOnce();
+		expect(onModelWaiting).toHaveBeenCalledWith('unloaded', 30);
+	});
+
+	it('retries and returns job_id once model becomes ready', async () => {
+		mocks.fetch
+			.mockResolvedValueOnce(make503('loading', 10))
+			.mockResolvedValueOnce(make202('ready-id'));
+
+		const p = submitJob('/tmp/audio.wav', 'http://host/webhook');
+		await vi.runAllTimersAsync();
+
+		await expect(p).resolves.toBe('ready-id');
+		expect(mocks.fetch).toHaveBeenCalledTimes(2);
+	});
+
+	it('calls onModelWaiting once per 503, not on success', async () => {
+		mocks.fetch
+			.mockResolvedValueOnce(make503('loading', 0))
+			.mockResolvedValueOnce(make503('loading', 0))
+			.mockResolvedValueOnce(make202('final-id'));
+
+		const onModelWaiting = vi.fn();
+		const p = submitJob('/tmp/audio.wav', 'http://host/webhook', undefined, onModelWaiting, 10);
+		await vi.runAllTimersAsync();
+
+		await expect(p).resolves.toBe('final-id');
+		expect(onModelWaiting).toHaveBeenCalledTimes(2);
+	});
+
+	it('passes the correct state for each 503 response', async () => {
+		mocks.fetch
+			.mockResolvedValueOnce(make503('unloaded', 0))
+			.mockResolvedValueOnce(make503('loading', 0))
+			.mockResolvedValueOnce(make503('waiting_for_gpu', 0))
+			.mockResolvedValueOnce(make202('job-x'));
+
+		const onModelWaiting = vi.fn();
+		const p = submitJob('/tmp/audio.wav', 'http://host/webhook', undefined, onModelWaiting, 10);
+		await vi.runAllTimersAsync();
+
+		await expect(p).resolves.toBe('job-x');
+		expect(onModelWaiting).toHaveBeenNthCalledWith(1, 'unloaded', 0);
+		expect(onModelWaiting).toHaveBeenNthCalledWith(2, 'loading', 0);
+		expect(onModelWaiting).toHaveBeenNthCalledWith(3, 'waiting_for_gpu', 0);
+	});
+
+	it('falls back to Retry-After header when body lacks retry_after_secs', async () => {
+		// Body with no retry_after_secs — only header
+		mocks.fetch
+			.mockResolvedValueOnce({
+				status: 503,
+				json: () => Promise.resolve({ state: 'loading' }),
+				headers: { get: (h: string) => (h.toLowerCase() === 'retry-after' ? '7' : null) }
+			})
+			.mockResolvedValueOnce(make202('fallback-id'));
+
+		const onModelWaiting = vi.fn();
+		const p = submitJob('/tmp/audio.wav', 'http://host/webhook', undefined, onModelWaiting);
+		await vi.runAllTimersAsync();
+
+		await expect(p).resolves.toBe('fallback-id');
+		expect(onModelWaiting).toHaveBeenCalledWith('loading', 7);
+	});
+
+	it('falls back to 15s when both body and header are absent', async () => {
+		mocks.fetch
+			.mockResolvedValueOnce({
+				status: 503,
+				json: () => Promise.resolve({ state: 'unloaded' }),
+				headers: { get: () => null }
+			})
+			.mockResolvedValueOnce(make202('default-wait-id'));
+
+		const onModelWaiting = vi.fn();
+		const p = submitJob('/tmp/audio.wav', 'http://host/webhook', undefined, onModelWaiting);
+		await vi.runAllTimersAsync();
+
+		await expect(p).resolves.toBe('default-wait-id');
+		expect(onModelWaiting).toHaveBeenCalledWith('unloaded', 15);
+	});
+
+	it('throws after maxAttempts 503 responses', async () => {
+		mocks.fetch.mockResolvedValue(make503('loading', 0));
+
+		// Attach .rejects handler BEFORE advancing timers so the rejection
+		// is always handled before Vitest's unhandled-rejection detector fires.
+		const expectation = expect(
+			submitJob('/tmp/audio.wav', 'http://host/webhook', undefined, undefined, 3)
+		).rejects.toThrow(/did not become ready after 3 attempts/i);
+
+		await vi.runAllTimersAsync();
+		await expectation;
+
+		expect(mocks.fetch).toHaveBeenCalledTimes(3);
+	});
+
+	it('does NOT call onModelWaiting for non-503 errors', async () => {
+		mocks.fetch.mockResolvedValue({
+			status: 500,
+			text: () => Promise.resolve('internal error')
+		});
+
+		const onModelWaiting = vi.fn();
+		await expect(
+			submitJob('/tmp/audio.wav', 'http://host/webhook', undefined, onModelWaiting)
+		).rejects.toThrow('500');
+		expect(onModelWaiting).not.toHaveBeenCalled();
+	});
+
+	it('does NOT retry on non-503 errors (throws immediately)', async () => {
+		mocks.fetch.mockResolvedValue({
+			status: 400,
+			text: () => Promise.resolve("missing 'audio' field")
+		});
+
+		await expect(
+			submitJob('/tmp/audio.wav', 'http://host/webhook', undefined, undefined, 10)
+		).rejects.toThrow('400');
+		expect(mocks.fetch).toHaveBeenCalledTimes(1);
+	});
+
+	it('works correctly without an onModelWaiting callback', async () => {
+		mocks.fetch
+			.mockResolvedValueOnce(make503('unloaded', 0))
+			.mockResolvedValueOnce(make202('no-cb-id'));
+
+		const p = submitJob('/tmp/audio.wav', 'http://host/webhook');
+		await vi.runAllTimersAsync();
+
+		await expect(p).resolves.toBe('no-cb-id');
+	});
+});
+
+// ── getModelStatus ────────────────────────────────────────────────────────────
+
+describe('getModelStatus', () => {
+	it('returns parsed status when model is ready', async () => {
+		const readyStatus = {
+			state: 'ready',
+			loaded_at: '2026-05-09T00:00:00.000Z',
+			vram_used_mb: 4096,
+			vram_total_mb: 8192
+		};
+		mocks.fetch.mockResolvedValue({
+			ok: true,
+			json: () => Promise.resolve(readyStatus)
+		});
+
+		const status = await getModelStatus();
+		expect(status.state).toBe('ready');
+		expect(status.loaded_at).toBe('2026-05-09T00:00:00.000Z');
+		expect(status.vram_used_mb).toBe(4096);
+	});
+
+	it('returns parsed status when model is unloaded', async () => {
+		mocks.fetch.mockResolvedValue({
+			ok: true,
+			json: () => Promise.resolve({ state: 'unloaded' })
+		});
+
+		const status = await getModelStatus();
+		expect(status.state).toBe('unloaded');
+	});
+
+	it('returns parsed status when model is loading', async () => {
+		mocks.fetch.mockResolvedValue({
+			ok: true,
+			json: () => Promise.resolve({ state: 'loading' })
+		});
+
+		const status = await getModelStatus();
+		expect(status.state).toBe('loading');
+	});
+
+	it('returns parsed status when waiting_for_gpu with VRAM fields', async () => {
+		const waitingStatus = {
+			state: 'waiting_for_gpu',
+			vram_needed_mb: 3951,
+			vram_free_mb: 512,
+			retry_in_secs: 30
+		};
+		mocks.fetch.mockResolvedValue({
+			ok: true,
+			json: () => Promise.resolve(waitingStatus)
+		});
+
+		const status = await getModelStatus();
+		expect(status.state).toBe('waiting_for_gpu');
+		expect(status.vram_needed_mb).toBe(3951);
+		expect(status.vram_free_mb).toBe(512);
+	});
+
+	it('calls the correct WHISPER_URL endpoint', async () => {
+		vi.stubEnv('WHISPER_URL', 'http://gpu-box:9090');
+		mocks.fetch.mockResolvedValue({
+			ok: true,
+			json: () => Promise.resolve({ state: 'ready' })
+		});
+
+		await getModelStatus();
+		expect(mocks.fetch).toHaveBeenCalledWith(
+			'http://gpu-box:9090/model/status',
+			expect.objectContaining({ signal: expect.anything() })
+		);
+		vi.unstubAllEnvs();
+	});
+
+	it('throws when the server returns a non-ok response', async () => {
+		mocks.fetch.mockResolvedValue({ ok: false, status: 503 });
+
+		await expect(getModelStatus()).rejects.toThrow('/model/status');
+	});
+});
+
 // ── streamJob SSE parsing ─────────────────────────────────────────────────────
 
 function makeSSEResponse(lines: string[]) {