diff --git a/src/lib/server/embeddings/embedding.service.test.ts b/src/lib/server/embeddings/embedding.service.test.ts new file mode 100644 index 0000000..4e309b9 --- /dev/null +++ b/src/lib/server/embeddings/embedding.service.test.ts @@ -0,0 +1,415 @@ +/** + * Unit tests for the embedding provider abstraction and EmbeddingService + * storage logic. + * + * Tests use in-memory SQLite and mock providers — no real API calls are made. + */ + +import { describe, it, expect, beforeEach, vi } from 'vitest'; +import Database from 'better-sqlite3'; +import { drizzle } from 'drizzle-orm/better-sqlite3'; +import { migrate } from 'drizzle-orm/better-sqlite3/migrator'; +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import * as schema from '../db/schema.js'; + +import { NoopEmbeddingProvider, EmbeddingError, type EmbeddingVector } from './provider.js'; +import { OpenAIEmbeddingProvider } from './openai.provider.js'; +import { EmbeddingService } from './embedding.service.js'; +import { + createProviderFromConfig, + defaultEmbeddingConfig, + EMBEDDING_CONFIG_KEY, + type EmbeddingConfig +} from './factory.js'; + +// --------------------------------------------------------------------------- +// Test DB helpers +// --------------------------------------------------------------------------- + +function createTestDb() { + const client = new Database(':memory:'); + client.pragma('foreign_keys = ON'); + + const db = drizzle(client, { schema }); + const migrationsFolder = join(import.meta.dirname, '../db/migrations'); + migrate(db, { migrationsFolder }); + + const ftsSql = readFileSync(join(import.meta.dirname, '../db/fts.sql'), 'utf-8'); + client.exec(ftsSql); + + return { db, client }; +} + +const now = new Date(); + +function seedSnippet( + db: ReturnType, + client: Database.Database, + overrides: Partial = {} +): string { + const repoId = '/test/embed-repo'; + // Ensure repo exists (ignore if already there). + try { + db.insert(schema.repositories) + .values({ + id: repoId, + title: 'Embed Repo', + source: 'github', + sourceUrl: 'https://github.com/test/embed-repo', + createdAt: now, + updatedAt: now + }) + .run(); + } catch { + // already exists + } + + const docId = crypto.randomUUID(); + db.insert(schema.documents) + .values({ + id: docId, + repositoryId: repoId, + filePath: 'README.md', + checksum: 'abc', + indexedAt: now + }) + .run(); + + const snippetId = crypto.randomUUID(); + db.insert(schema.snippets) + .values({ + id: snippetId, + documentId: docId, + repositoryId: repoId, + type: 'info', + content: 'Hello embedding world', + title: 'Embed snippet', + createdAt: now, + ...overrides + }) + .run(); + + return snippetId; +} + +// --------------------------------------------------------------------------- +// NoopEmbeddingProvider +// --------------------------------------------------------------------------- + +describe('NoopEmbeddingProvider', () => { + it('returns an empty array for any input', async () => { + const provider = new NoopEmbeddingProvider(); + const result = await provider.embed(['text1', 'text2']); + expect(result).toEqual([]); + }); + + it('isAvailable() returns false', async () => { + const provider = new NoopEmbeddingProvider(); + expect(await provider.isAvailable()).toBe(false); + }); + + it('has the expected name and dimensions', () => { + const provider = new NoopEmbeddingProvider(); + expect(provider.name).toBe('noop'); + expect(provider.dimensions).toBe(0); + expect(provider.model).toBe('none'); + }); +}); + +// --------------------------------------------------------------------------- +// OpenAIEmbeddingProvider (with fetch mocking) +// --------------------------------------------------------------------------- + +describe('OpenAIEmbeddingProvider', () => { + function makeFakeEmbedding(dim: number, index = 0): number[] { + return Array.from({ length: dim }, (_, i) => (i + index + 1) * 0.01); + } + + function mockFetchSuccess(embeddings: number[][]) { + const data = embeddings.map((emb, index) => ({ embedding: emb, index })); + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ data }) + }) + ); + } + + function mockFetchFailure(status: number) { + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue({ + ok: false, + status, + json: async () => ({ error: { message: 'Bad request' } }) + }) + ); + } + + beforeEach(() => { + vi.restoreAllMocks(); + }); + + it('embeds texts and returns Float32Array vectors', async () => { + const emb = makeFakeEmbedding(4); + mockFetchSuccess([emb]); + + const provider = new OpenAIEmbeddingProvider({ + baseUrl: 'https://api.openai.com/v1', + apiKey: 'test-key', + model: 'text-embedding-3-small' + }); + + const result = await provider.embed(['hello world']); + expect(result).toHaveLength(1); + expect(result[0].model).toBe('text-embedding-3-small'); + expect(result[0].dimensions).toBe(4); + expect(result[0].values).toBeInstanceOf(Float32Array); + expect(result[0].values[0]).toBeCloseTo(0.01, 5); + }); + + it('batches large input into multiple fetch calls', async () => { + // Make fetch always succeed with 2 fake embeddings. + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ + data: [ + { embedding: makeFakeEmbedding(2, 0), index: 0 }, + { embedding: makeFakeEmbedding(2, 1), index: 1 } + ] + }) + }); + vi.stubGlobal('fetch', fetchMock); + + const provider = new OpenAIEmbeddingProvider({ + baseUrl: 'https://api.openai.com/v1', + apiKey: 'sk-test', + model: 'text-embedding-3-small', + maxBatchSize: 2 + }); + + // 4 texts with maxBatchSize=2 → 2 fetch calls. + const result = await provider.embed(['a', 'b', 'c', 'd']); + expect(fetchMock).toHaveBeenCalledTimes(2); + expect(result).toHaveLength(4); + }); + + it('throws EmbeddingError on API failure', async () => { + mockFetchFailure(400); + + const provider = new OpenAIEmbeddingProvider({ + baseUrl: 'https://api.openai.com/v1', + apiKey: 'bad-key', + model: 'text-embedding-3-small' + }); + + await expect(provider.embed(['hello'])).rejects.toThrow(EmbeddingError); + }); + + it('includes dimensions in request body when configured', async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ data: [{ embedding: [0.1, 0.2], index: 0 }] }) + }); + vi.stubGlobal('fetch', fetchMock); + + const provider = new OpenAIEmbeddingProvider({ + baseUrl: 'https://api.openai.com/v1', + apiKey: 'sk-test', + model: 'text-embedding-3-small', + dimensions: 512 + }); + + await provider.embed(['test']); + + const callBody = JSON.parse(fetchMock.mock.calls[0][1].body); + expect(callBody.dimensions).toBe(512); + }); + + it('omits dimensions field when not configured', async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + json: async () => ({ data: [{ embedding: [0.1], index: 0 }] }) + }); + vi.stubGlobal('fetch', fetchMock); + + const provider = new OpenAIEmbeddingProvider({ + baseUrl: 'https://api.openai.com/v1', + apiKey: 'sk-test', + model: 'nomic-embed-text' + }); + + await provider.embed(['test']); + + const callBody = JSON.parse(fetchMock.mock.calls[0][1].body); + expect(callBody.dimensions).toBeUndefined(); + }); +}); + +// --------------------------------------------------------------------------- +// EmbeddingService — storage logic +// --------------------------------------------------------------------------- + +describe('EmbeddingService', () => { + let client: Database.Database; + let db: ReturnType>; + + beforeEach(() => { + ({ client, db } = createTestDb()); + }); + + function makeProvider(dim: number, modelName = 'test-model') { + return { + name: 'mock', + dimensions: dim, + model: modelName, + async embed(texts: string[]): Promise { + return texts.map(() => ({ + values: new Float32Array(Array.from({ length: dim }, (_, i) => i * 0.1)), + dimensions: dim, + model: modelName + })); + }, + async isAvailable() { + return true; + } + }; + } + + it('stores embeddings in snippet_embeddings table', async () => { + const snippetId = seedSnippet(db, client); + const provider = makeProvider(4); + const service = new EmbeddingService(client, provider); + + await service.embedSnippets([snippetId]); + + const rows = client.prepare('SELECT * FROM snippet_embeddings WHERE snippet_id = ?').all(snippetId); + expect(rows).toHaveLength(1); + + const row = rows[0] as { model: string; dimensions: number; embedding: Buffer }; + expect(row.model).toBe('test-model'); + expect(row.dimensions).toBe(4); + expect(row.embedding).toBeInstanceOf(Buffer); + }); + + it('stores embeddings as retrievable Float32Array blobs', async () => { + const snippetId = seedSnippet(db, client); + const provider = makeProvider(3); + const service = new EmbeddingService(client, provider); + + await service.embedSnippets([snippetId]); + + const retrieved = service.getEmbedding(snippetId); + expect(retrieved).toBeInstanceOf(Float32Array); + expect(retrieved!.length).toBe(3); + expect(retrieved![0]).toBeCloseTo(0.0, 5); + expect(retrieved![1]).toBeCloseTo(0.1, 5); + expect(retrieved![2]).toBeCloseTo(0.2, 5); + }); + + it('is idempotent — re-embedding replaces the existing row', async () => { + const snippetId = seedSnippet(db, client); + const provider = makeProvider(2); + const service = new EmbeddingService(client, provider); + + await service.embedSnippets([snippetId]); + await service.embedSnippets([snippetId]); + + const rows = client + .prepare('SELECT COUNT(*) as cnt FROM snippet_embeddings WHERE snippet_id = ?') + .get(snippetId) as { cnt: number }; + expect(rows.cnt).toBe(1); + }); + + it('calls onProgress after each batch', async () => { + const ids: string[] = []; + for (let i = 0; i < 3; i++) { + ids.push(seedSnippet(db, client)); + } + + const provider = makeProvider(2); + const service = new EmbeddingService(client, provider); + + const progress: Array<[number, number]> = []; + await service.embedSnippets(ids, (done, total) => { + progress.push([done, total]); + }); + + // With BATCH_SIZE=50 and 3 items, we expect exactly one progress call. + expect(progress.length).toBeGreaterThan(0); + expect(progress[progress.length - 1][0]).toBe(3); + expect(progress[progress.length - 1][1]).toBe(3); + }); + + it('handles empty snippetIds gracefully', async () => { + const provider = makeProvider(4); + const service = new EmbeddingService(client, provider); + // Should not throw. + await expect(service.embedSnippets([])).resolves.toBeUndefined(); + }); + + it('returns null from getEmbedding when no embedding exists', () => { + const provider = makeProvider(4); + const service = new EmbeddingService(client, provider); + + const result = service.getEmbedding('nonexistent-id'); + expect(result).toBeNull(); + }); + + it('ignores snippet IDs that do not exist in the database', async () => { + const provider = makeProvider(4); + const service = new EmbeddingService(client, provider); + + // Should complete without error. + await expect(service.embedSnippets(['ghost-id-1', 'ghost-id-2'])).resolves.toBeUndefined(); + + const rows = client.prepare('SELECT COUNT(*) as cnt FROM snippet_embeddings').get() as { + cnt: number; + }; + expect(rows.cnt).toBe(0); + }); +}); + +// --------------------------------------------------------------------------- +// Factory +// --------------------------------------------------------------------------- + +describe('createProviderFromConfig', () => { + it('returns NoopEmbeddingProvider for provider=none', () => { + const provider = createProviderFromConfig({ provider: 'none' }); + expect(provider.name).toBe('noop'); + }); + + it('returns OpenAIEmbeddingProvider for provider=openai', () => { + const provider = createProviderFromConfig({ + provider: 'openai', + openai: { + baseUrl: 'https://api.openai.com/v1', + apiKey: 'sk-test', + model: 'text-embedding-3-small' + } + }); + expect(provider.name).toBe('openai'); + }); + + it('returns LocalEmbeddingProvider for provider=local', () => { + const provider = createProviderFromConfig({ provider: 'local' }); + expect(provider.name).toBe('local'); + }); + + it('throws when openai provider is selected without config', () => { + expect(() => + createProviderFromConfig({ provider: 'openai' } as EmbeddingConfig) + ).toThrow(); + }); + + it('defaultEmbeddingConfig returns provider=none', () => { + expect(defaultEmbeddingConfig().provider).toBe('none'); + }); + + it('EMBEDDING_CONFIG_KEY is the expected settings key', () => { + expect(EMBEDDING_CONFIG_KEY).toBe('embedding_config'); + }); +}); diff --git a/src/lib/server/embeddings/embedding.service.ts b/src/lib/server/embeddings/embedding.service.ts new file mode 100644 index 0000000..e245dd6 --- /dev/null +++ b/src/lib/server/embeddings/embedding.service.ts @@ -0,0 +1,101 @@ +/** + * EmbeddingService — batches embedding requests and persists results to + * the snippet_embeddings table. + */ + +import type Database from 'better-sqlite3'; +import type { EmbeddingProvider } from './provider.js'; + +interface SnippetRow { + id: string; + title: string | null; + breadcrumb: string | null; + content: string; +} + +const BATCH_SIZE = 50; +const TEXT_MAX_CHARS = 2048; + +export class EmbeddingService { + constructor( + private readonly db: Database.Database, + private readonly provider: EmbeddingProvider + ) {} + + /** + * Embed the given snippet IDs and store the results in snippet_embeddings. + * + * Only snippets that actually exist in the database are processed. + * Results are upserted (INSERT OR REPLACE) so re-embedding is idempotent. + * + * @param snippetIds - Array of snippet UUIDs to embed. + * @param onProgress - Optional callback invoked after each batch with + * (completedCount, totalCount). + */ + async embedSnippets( + snippetIds: string[], + onProgress?: (done: number, total: number) => void + ): Promise { + if (snippetIds.length === 0) return; + + const placeholders = snippetIds.map(() => '?').join(','); + const snippets = this.db + .prepare( + `SELECT id, title, breadcrumb, content FROM snippets WHERE id IN (${placeholders})` + ) + .all(...snippetIds); + + if (snippets.length === 0) return; + + const texts = snippets.map((s) => + [s.title, s.breadcrumb, s.content] + .filter(Boolean) + .join('\n') + .slice(0, TEXT_MAX_CHARS) + ); + + const insert = this.db.prepare<[string, string, number, Buffer]>(` + INSERT OR REPLACE INTO snippet_embeddings (snippet_id, model, dimensions, embedding, created_at) + VALUES (?, ?, ?, ?, unixepoch()) + `); + + for (let i = 0; i < snippets.length; i += BATCH_SIZE) { + const batchSnippets = snippets.slice(i, i + BATCH_SIZE); + const batchTexts = texts.slice(i, i + BATCH_SIZE); + + const embeddings = await this.provider.embed(batchTexts); + + const insertMany = this.db.transaction(() => { + for (let j = 0; j < batchSnippets.length; j++) { + const snippet = batchSnippets[j]; + const embedding = embeddings[j]; + insert.run( + snippet.id, + embedding.model, + embedding.dimensions, + Buffer.from(embedding.values.buffer) + ); + } + }); + insertMany(); + + onProgress?.(Math.min(i + BATCH_SIZE, snippets.length), snippets.length); + } + } + + /** + * Retrieve a stored embedding for a snippet as a Float32Array. + * Returns null when no embedding has been stored for the given snippet. + */ + getEmbedding(snippetId: string): Float32Array | null { + const row = this.db + .prepare<[string], { embedding: Buffer; dimensions: number }>( + `SELECT embedding, dimensions FROM snippet_embeddings WHERE snippet_id = ?` + ) + .get(snippetId); + + if (!row) return null; + + return new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.dimensions); + } +} diff --git a/src/lib/server/embeddings/factory.ts b/src/lib/server/embeddings/factory.ts new file mode 100644 index 0000000..692206b --- /dev/null +++ b/src/lib/server/embeddings/factory.ts @@ -0,0 +1,52 @@ +/** + * Factory — create an EmbeddingProvider from a persisted EmbeddingConfig. + */ + +import type { EmbeddingProvider } from './provider.js'; +import { NoopEmbeddingProvider } from './provider.js'; +import { OpenAIEmbeddingProvider } from './openai.provider.js'; +import { LocalEmbeddingProvider } from './local.provider.js'; + +export interface EmbeddingConfig { + provider: 'openai' | 'local' | 'none'; + openai?: { + baseUrl: string; + apiKey: string; + model: string; + dimensions?: number; + maxBatchSize?: number; + }; +} + +/** The settings table key used to persist the embedding configuration. */ +export const EMBEDDING_CONFIG_KEY = 'embedding_config'; + +/** + * Construct the appropriate EmbeddingProvider for the given config. + * Falls back to NoopEmbeddingProvider for unknown or missing providers. + */ +export function createProviderFromConfig(config: EmbeddingConfig): EmbeddingProvider { + switch (config.provider) { + case 'openai': { + if (!config.openai) { + throw new Error('OpenAI provider selected but no openai config provided.'); + } + return new OpenAIEmbeddingProvider(config.openai); + } + case 'local': { + return new LocalEmbeddingProvider(); + } + case 'none': + default: + return new NoopEmbeddingProvider(); + } +} + +/** + * Return a default (noop) config when nothing has been stored yet. + */ +export function defaultEmbeddingConfig(): EmbeddingConfig { + return { provider: 'none' }; +} + +export type { EmbeddingProvider }; diff --git a/src/lib/server/embeddings/local.provider.ts b/src/lib/server/embeddings/local.provider.ts new file mode 100644 index 0000000..f27fed7 --- /dev/null +++ b/src/lib/server/embeddings/local.provider.ts @@ -0,0 +1,61 @@ +/** + * LocalEmbeddingProvider — uses @xenova/transformers (optional dependency). + * + * @xenova/transformers is NOT installed by default. This provider uses a + * dynamic import so the module is only required at runtime when the local + * provider is actually configured. If the package is absent, isAvailable() + * returns false and embed() throws a clear error. + */ + +import { EmbeddingError, type EmbeddingProvider, type EmbeddingVector } from './provider.js'; + +export class LocalEmbeddingProvider implements EmbeddingProvider { + readonly name = 'local'; + readonly model = 'Xenova/all-MiniLM-L6-v2'; + readonly dimensions = 384; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + private pipeline: ((text: string, options: Record) => Promise) | null = + null; + + async embed(texts: string[]): Promise { + if (!this.pipeline) { + let transformers: { pipeline: Function }; + try { + // Dynamic import — only succeeds when @xenova/transformers is installed. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + transformers = (await import('@xenova/transformers' as any)) as { + pipeline: Function; + }; + } catch { + throw new EmbeddingError( + '@xenova/transformers is not installed. Install it to use the local embedding provider.' + ); + } + this.pipeline = await transformers.pipeline('feature-extraction', this.model); + } + + const results: EmbeddingVector[] = []; + for (const text of texts) { + const output = await this.pipeline!(text, { + pooling: 'mean', + normalize: true + }); + results.push({ + values: new Float32Array(output.data), + dimensions: this.dimensions, + model: this.model + }); + } + return results; + } + + async isAvailable(): Promise { + try { + await import('@xenova/transformers' as never); + return true; + } catch { + return false; + } + } +} diff --git a/src/lib/server/embeddings/openai.provider.ts b/src/lib/server/embeddings/openai.provider.ts new file mode 100644 index 0000000..aab6564 --- /dev/null +++ b/src/lib/server/embeddings/openai.provider.ts @@ -0,0 +1,104 @@ +/** + * OpenAIEmbeddingProvider — works with any OpenAI-compatible embeddings API + * including OpenAI, Azure OpenAI, and Ollama. + */ + +import { EmbeddingError, type EmbeddingProvider, type EmbeddingVector } from './provider.js'; + +export interface OpenAIProviderConfig { + /** e.g. "https://api.openai.com/v1" or "http://localhost:11434/v1" */ + baseUrl: string; + apiKey: string; + /** e.g. "text-embedding-3-small", "nomic-embed-text" */ + model: string; + /** Override dimensions for models that support it (e.g. text-embedding-3-small). */ + dimensions?: number; + /** Maximum number of texts per API request. Default: 100. */ + maxBatchSize?: number; +} + +function chunk(arr: T[], size: number): T[][] { + const chunks: T[][] = []; + for (let i = 0; i < arr.length; i += size) { + chunks.push(arr.slice(i, i + size)); + } + return chunks; +} + +export class OpenAIEmbeddingProvider implements EmbeddingProvider { + readonly name = 'openai'; + + get model(): string { + return this.config.model; + } + + get dimensions(): number { + return this.config.dimensions ?? 1536; + } + + constructor(private readonly config: OpenAIProviderConfig) {} + + async embed(texts: string[]): Promise { + const batchSize = this.config.maxBatchSize ?? 100; + const batches = chunk(texts, batchSize); + const allEmbeddings: EmbeddingVector[] = []; + + for (const batch of batches) { + const body: Record = { + model: this.config.model, + input: batch + }; + if (this.config.dimensions !== undefined) { + body.dimensions = this.config.dimensions; + } + + const response = await fetch(`${this.config.baseUrl}/embeddings`, { + method: 'POST', + headers: { + Authorization: `Bearer ${this.config.apiKey}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify(body) + }); + + if (!response.ok) { + let detail = ''; + try { + const errBody = await response.json(); + detail = errBody?.error?.message ?? ''; + } catch { + // ignore parse failure + } + throw new EmbeddingError( + `OpenAI API error ${response.status}${detail ? `: ${detail}` : ''}` + ); + } + + const data = (await response.json()) as { + data: Array<{ embedding: number[]; index: number }>; + }; + + // API returns results in order; sort by index just in case. + const sorted = [...data.data].sort((a, b) => a.index - b.index); + for (const item of sorted) { + allEmbeddings.push({ + values: new Float32Array(item.embedding), + dimensions: item.embedding.length, + model: this.config.model + }); + } + } + + return allEmbeddings; + } + + async isAvailable(): Promise { + try { + // Ping with a minimal single-item embed request. + await this.embed(['ping']); + return true; + } catch { + return false; + } + } +} diff --git a/src/lib/server/embeddings/provider.ts b/src/lib/server/embeddings/provider.ts new file mode 100644 index 0000000..f26e278 --- /dev/null +++ b/src/lib/server/embeddings/provider.ts @@ -0,0 +1,47 @@ +/** + * EmbeddingProvider interface and NoopEmbeddingProvider. + * + * The Noop provider returns null embeddings and enables FTS5-only mode + * when no embedding backend is configured. + */ + +export interface EmbeddingVector { + values: Float32Array; + dimensions: number; + model: string; +} + +export interface EmbeddingProvider { + readonly name: string; + readonly dimensions: number; + readonly model: string; + + embed(texts: string[]): Promise; + isAvailable(): Promise; +} + +export class EmbeddingError extends Error { + constructor(message: string) { + super(message); + this.name = 'EmbeddingError'; + } +} + +/** + * NoopEmbeddingProvider — always returns empty results. + * Used as the default when no provider is configured; the system + * falls back gracefully to FTS5-only search. + */ +export class NoopEmbeddingProvider implements EmbeddingProvider { + readonly name = 'noop'; + readonly dimensions = 0; + readonly model = 'none'; + + async embed(_texts: string[]): Promise { + return []; + } + + async isAvailable(): Promise { + return false; + } +} diff --git a/src/routes/api/v1/settings/embedding/+server.ts b/src/routes/api/v1/settings/embedding/+server.ts new file mode 100644 index 0000000..22e4111 --- /dev/null +++ b/src/routes/api/v1/settings/embedding/+server.ts @@ -0,0 +1,147 @@ +/** + * GET /api/v1/settings/embedding — retrieve current embedding configuration + * PUT /api/v1/settings/embedding — update embedding configuration + */ + +import { json } from '@sveltejs/kit'; +import type { RequestHandler } from './$types'; +import { getClient } from '$lib/server/db/client'; +import { + EMBEDDING_CONFIG_KEY, + createProviderFromConfig, + defaultEmbeddingConfig, + type EmbeddingConfig +} from '$lib/server/embeddings/factory'; +import { handleServiceError, InvalidInputError } from '$lib/server/utils/validation'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function readConfig(db: ReturnType): EmbeddingConfig { + const row = db + .prepare(`SELECT value FROM settings WHERE key = ?`) + .get(EMBEDDING_CONFIG_KEY) as { value: string } | undefined; + + if (!row) return defaultEmbeddingConfig(); + + try { + return JSON.parse(row.value) as EmbeddingConfig; + } catch { + return defaultEmbeddingConfig(); + } +} + +function validateConfig(body: unknown): EmbeddingConfig { + if (typeof body !== 'object' || body === null) { + throw new InvalidInputError('Request body must be a JSON object'); + } + + const obj = body as Record; + + const provider = obj.provider; + if (provider !== 'openai' && provider !== 'local' && provider !== 'none') { + throw new InvalidInputError( + `Invalid provider "${String(provider)}". Must be one of: openai, local, none.` + ); + } + + if (provider === 'openai') { + const openai = obj.openai as Record | undefined; + if (!openai || typeof openai !== 'object') { + throw new InvalidInputError('openai config object is required when provider is "openai"'); + } + if (typeof openai.baseUrl !== 'string' || !openai.baseUrl) { + throw new InvalidInputError('openai.baseUrl must be a non-empty string'); + } + if (typeof openai.apiKey !== 'string' || !openai.apiKey) { + throw new InvalidInputError('openai.apiKey must be a non-empty string'); + } + if (typeof openai.model !== 'string' || !openai.model) { + throw new InvalidInputError('openai.model must be a non-empty string'); + } + + const config: EmbeddingConfig = { + provider: 'openai', + openai: { + baseUrl: openai.baseUrl as string, + apiKey: openai.apiKey as string, + model: openai.model as string, + dimensions: + typeof openai.dimensions === 'number' ? (openai.dimensions as number) : undefined, + maxBatchSize: + typeof openai.maxBatchSize === 'number' + ? (openai.maxBatchSize as number) + : undefined + } + }; + return config; + } + + return { provider: provider as 'local' | 'none' }; +} + +// --------------------------------------------------------------------------- +// GET +// --------------------------------------------------------------------------- + +export const GET: RequestHandler = () => { + try { + const db = getClient(); + const config = readConfig(db); + + // Strip the apiKey from the response for security. + const safeConfig = sanitizeForResponse(config); + return json(safeConfig); + } catch (err) { + return handleServiceError(err); + } +}; + +// --------------------------------------------------------------------------- +// PUT +// --------------------------------------------------------------------------- + +export const PUT: RequestHandler = async ({ request }) => { + try { + const body = await request.json(); + const config = validateConfig(body); + + // Verify provider connectivity before persisting (skip for noop). + if (config.provider !== 'none') { + const provider = createProviderFromConfig(config); + const available = await provider.isAvailable(); + if (!available) { + throw new InvalidInputError( + `Could not connect to the "${config.provider}" embedding provider. Check your configuration.` + ); + } + } + + const db = getClient(); + db.prepare( + `INSERT INTO settings (key, value, updated_at) + VALUES (?, ?, unixepoch()) + ON CONFLICT (key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at` + ).run(EMBEDDING_CONFIG_KEY, JSON.stringify(config)); + + const safeConfig = sanitizeForResponse(config); + return json(safeConfig); + } catch (err) { + return handleServiceError(err); + } +}; + +// --------------------------------------------------------------------------- +// Sanitize — remove sensitive fields before returning to clients +// --------------------------------------------------------------------------- + +function sanitizeForResponse(config: EmbeddingConfig): Omit & { + openai?: Omit, 'apiKey'>; +} { + if (config.provider === 'openai' && config.openai) { + const { apiKey: _apiKey, ...rest } = config.openai; + return { ...config, openai: rest }; + } + return config; +}