feat: proxy POST /model/unload endpoint
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 43s
All checks were successful
Build & Push Docker Image / build-and-push (push) Successful in 43s
- Add unloadModel() to whisper.ts: POSTs to /model/unload with 10s timeout, returns parsed JSON body, throws on non-ok response - Create src/routes/api/model/unload/+server.ts: thin POST proxy, passes whisper's response through, returns 502 if whisper unreachable - Add 3 unloadModel tests (success, WHISPER_URL config, error propagation) — 147/147 passing Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -127,6 +127,17 @@ export async function submitJob(
|
||||
throw new Error(`Whisper model did not become ready after ${maxAttempts} attempts`);
|
||||
}
|
||||
|
||||
/** Unload the model from VRAM. Throws if the whisper server returns non-ok. */
|
||||
export async function unloadModel(): Promise<{ ok: boolean }> {
|
||||
const { default: fetch } = await import('node-fetch');
|
||||
const res = await fetch(`${whisperUrl()}/model/unload`, {
|
||||
method: 'POST',
|
||||
signal: AbortSignal.timeout(10000)
|
||||
});
|
||||
if (!res.ok) throw new Error(`/model/unload returned ${res.status}`);
|
||||
return res.json() as Promise<{ ok: boolean }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel a queued or running job on the whisper server (best-effort).
|
||||
* Errors are silently ignored — local job status is already set to cancelled.
|
||||
|
||||
13
src/routes/api/model/unload/+server.ts
Normal file
13
src/routes/api/model/unload/+server.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { json } from '@sveltejs/kit';
|
||||
import { unloadModel } from '$lib/server/whisper.js';
|
||||
|
||||
/** Proxy for POST /model/unload on the whisper backend. */
|
||||
export async function POST() {
|
||||
try {
|
||||
const body = await unloadModel();
|
||||
return json(body);
|
||||
} catch (err: unknown) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
return json({ ok: false, error: message }, { status: 502 });
|
||||
}
|
||||
}
|
||||
@@ -21,7 +21,7 @@ vi.mock('form-data', () => ({
|
||||
|
||||
vi.mock('fs', () => ({ createReadStream: vi.fn(() => 'STREAM_PLACEHOLDER') }));
|
||||
|
||||
import { submitJob, streamJob, getModelStatus, cancelJob } from '$lib/server/whisper.js';
|
||||
import { submitJob, streamJob, getModelStatus, cancelJob, unloadModel } from '$lib/server/whisper.js';
|
||||
|
||||
afterEach(() => vi.clearAllMocks());
|
||||
|
||||
@@ -341,6 +341,42 @@ describe('submitJob — SSE-triggered retry', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ── unloadModel ───────────────────────────────────────────────────────────────
|
||||
|
||||
describe('unloadModel', () => {
|
||||
it('POSTs to /model/unload and returns parsed body', async () => {
|
||||
mocks.fetch.mockResolvedValue({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ ok: true })
|
||||
});
|
||||
const result = await unloadModel();
|
||||
expect(result).toEqual({ ok: true });
|
||||
expect(mocks.fetch).toHaveBeenCalledWith(
|
||||
expect.stringContaining('/model/unload'),
|
||||
expect.objectContaining({ method: 'POST' })
|
||||
);
|
||||
});
|
||||
|
||||
it('uses the configured WHISPER_URL', async () => {
|
||||
vi.stubEnv('WHISPER_URL', 'http://gpu-box:9090');
|
||||
mocks.fetch.mockResolvedValue({
|
||||
ok: true,
|
||||
json: () => Promise.resolve({ ok: true })
|
||||
});
|
||||
await unloadModel();
|
||||
expect(mocks.fetch).toHaveBeenCalledWith(
|
||||
'http://gpu-box:9090/model/unload',
|
||||
expect.anything()
|
||||
);
|
||||
vi.unstubAllEnvs();
|
||||
});
|
||||
|
||||
it('throws when whisper returns a non-ok response', async () => {
|
||||
mocks.fetch.mockResolvedValue({ ok: false, status: 409 });
|
||||
await expect(unloadModel()).rejects.toThrow('/model/unload');
|
||||
});
|
||||
});
|
||||
|
||||
// ── cancelJob ─────────────────────────────────────────────────────────────────
|
||||
|
||||
describe('cancelJob', () => {
|
||||
|
||||
Reference in New Issue
Block a user