feat: proxy POST /model/unload endpoint
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 43s

- Add unloadModel() to whisper.ts: POSTs to /model/unload with 10s
  timeout, returns parsed JSON body, throws on non-ok response
- Create src/routes/api/model/unload/+server.ts: thin POST proxy,
  passes whisper's response through, returns 502 if whisper unreachable
- Add 3 unloadModel tests (success, WHISPER_URL config, error propagation)
  — 147/147 passing

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Giancarmine Salucci
2026-05-09 15:48:47 +02:00
parent 04142b17a8
commit 53f874aec7
3 changed files with 61 additions and 1 deletions

View File

@@ -127,6 +127,17 @@ export async function submitJob(
throw new Error(`Whisper model did not become ready after ${maxAttempts} attempts`); throw new Error(`Whisper model did not become ready after ${maxAttempts} attempts`);
} }
/** Unload the model from VRAM. Throws if the whisper server returns non-ok. */
export async function unloadModel(): Promise<{ ok: boolean }> {
const { default: fetch } = await import('node-fetch');
const res = await fetch(`${whisperUrl()}/model/unload`, {
method: 'POST',
signal: AbortSignal.timeout(10000)
});
if (!res.ok) throw new Error(`/model/unload returned ${res.status}`);
return res.json() as Promise<{ ok: boolean }>;
}
/** /**
* Cancel a queued or running job on the whisper server (best-effort). * Cancel a queued or running job on the whisper server (best-effort).
* Errors are silently ignored — local job status is already set to cancelled. * Errors are silently ignored — local job status is already set to cancelled.

View File

@@ -0,0 +1,13 @@
import { json } from '@sveltejs/kit';
import { unloadModel } from '$lib/server/whisper.js';
/** Proxy for POST /model/unload on the whisper backend. */
export async function POST() {
try {
const body = await unloadModel();
return json(body);
} catch (err: unknown) {
const message = err instanceof Error ? err.message : String(err);
return json({ ok: false, error: message }, { status: 502 });
}
}

View File

@@ -21,7 +21,7 @@ vi.mock('form-data', () => ({
vi.mock('fs', () => ({ createReadStream: vi.fn(() => 'STREAM_PLACEHOLDER') })); vi.mock('fs', () => ({ createReadStream: vi.fn(() => 'STREAM_PLACEHOLDER') }));
import { submitJob, streamJob, getModelStatus, cancelJob } from '$lib/server/whisper.js'; import { submitJob, streamJob, getModelStatus, cancelJob, unloadModel } from '$lib/server/whisper.js';
afterEach(() => vi.clearAllMocks()); afterEach(() => vi.clearAllMocks());
@@ -341,6 +341,42 @@ describe('submitJob — SSE-triggered retry', () => {
}); });
}); });
// ── unloadModel ───────────────────────────────────────────────────────────────
describe('unloadModel', () => {
it('POSTs to /model/unload and returns parsed body', async () => {
mocks.fetch.mockResolvedValue({
ok: true,
json: () => Promise.resolve({ ok: true })
});
const result = await unloadModel();
expect(result).toEqual({ ok: true });
expect(mocks.fetch).toHaveBeenCalledWith(
expect.stringContaining('/model/unload'),
expect.objectContaining({ method: 'POST' })
);
});
it('uses the configured WHISPER_URL', async () => {
vi.stubEnv('WHISPER_URL', 'http://gpu-box:9090');
mocks.fetch.mockResolvedValue({
ok: true,
json: () => Promise.resolve({ ok: true })
});
await unloadModel();
expect(mocks.fetch).toHaveBeenCalledWith(
'http://gpu-box:9090/model/unload',
expect.anything()
);
vi.unstubAllEnvs();
});
it('throws when whisper returns a non-ok response', async () => {
mocks.fetch.mockResolvedValue({ ok: false, status: 409 });
await expect(unloadModel()).rejects.toThrow('/model/unload');
});
});
// ── cancelJob ───────────────────────────────────────────────────────────────── // ── cancelJob ─────────────────────────────────────────────────────────────────
describe('cancelJob', () => { describe('cancelJob', () => {