feat(TRUEREF-0007): implement pluggable embedding generation and vector storage

Add EmbeddingProvider interface with OpenAI-compatible, local (optional
@xenova/transformers via dynamic import), and Noop (FTS5-only fallback)
implementations. EmbeddingService batches requests and persists Float32Array
blobs to snippet_embeddings. GET/PUT /api/v1/settings/embedding endpoints
read and write embedding config from the settings table.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Giancarmine Salucci
2026-03-22 18:07:26 +01:00
parent 3d1bef5003
commit bf4caf5e3b
7 changed files with 927 additions and 0 deletions

View File

@@ -0,0 +1,415 @@
/**
* Unit tests for the embedding provider abstraction and EmbeddingService
* storage logic.
*
* Tests use in-memory SQLite and mock providers — no real API calls are made.
*/
import { describe, it, expect, beforeEach, vi } from 'vitest';
import Database from 'better-sqlite3';
import { drizzle } from 'drizzle-orm/better-sqlite3';
import { migrate } from 'drizzle-orm/better-sqlite3/migrator';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import * as schema from '../db/schema.js';
import { NoopEmbeddingProvider, EmbeddingError, type EmbeddingVector } from './provider.js';
import { OpenAIEmbeddingProvider } from './openai.provider.js';
import { EmbeddingService } from './embedding.service.js';
import {
createProviderFromConfig,
defaultEmbeddingConfig,
EMBEDDING_CONFIG_KEY,
type EmbeddingConfig
} from './factory.js';
// ---------------------------------------------------------------------------
// Test DB helpers
// ---------------------------------------------------------------------------
function createTestDb() {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
const db = drizzle(client, { schema });
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
migrate(db, { migrationsFolder });
const ftsSql = readFileSync(join(import.meta.dirname, '../db/fts.sql'), 'utf-8');
client.exec(ftsSql);
return { db, client };
}
const now = new Date();
function seedSnippet(
db: ReturnType<typeof drizzle>,
client: Database.Database,
overrides: Partial<schema.NewSnippet> = {}
): string {
const repoId = '/test/embed-repo';
// Ensure repo exists (ignore if already there).
try {
db.insert(schema.repositories)
.values({
id: repoId,
title: 'Embed Repo',
source: 'github',
sourceUrl: 'https://github.com/test/embed-repo',
createdAt: now,
updatedAt: now
})
.run();
} catch {
// already exists
}
const docId = crypto.randomUUID();
db.insert(schema.documents)
.values({
id: docId,
repositoryId: repoId,
filePath: 'README.md',
checksum: 'abc',
indexedAt: now
})
.run();
const snippetId = crypto.randomUUID();
db.insert(schema.snippets)
.values({
id: snippetId,
documentId: docId,
repositoryId: repoId,
type: 'info',
content: 'Hello embedding world',
title: 'Embed snippet',
createdAt: now,
...overrides
})
.run();
return snippetId;
}
// ---------------------------------------------------------------------------
// NoopEmbeddingProvider
// ---------------------------------------------------------------------------
describe('NoopEmbeddingProvider', () => {
it('returns an empty array for any input', async () => {
const provider = new NoopEmbeddingProvider();
const result = await provider.embed(['text1', 'text2']);
expect(result).toEqual([]);
});
it('isAvailable() returns false', async () => {
const provider = new NoopEmbeddingProvider();
expect(await provider.isAvailable()).toBe(false);
});
it('has the expected name and dimensions', () => {
const provider = new NoopEmbeddingProvider();
expect(provider.name).toBe('noop');
expect(provider.dimensions).toBe(0);
expect(provider.model).toBe('none');
});
});
// ---------------------------------------------------------------------------
// OpenAIEmbeddingProvider (with fetch mocking)
// ---------------------------------------------------------------------------
describe('OpenAIEmbeddingProvider', () => {
function makeFakeEmbedding(dim: number, index = 0): number[] {
return Array.from({ length: dim }, (_, i) => (i + index + 1) * 0.01);
}
function mockFetchSuccess(embeddings: number[][]) {
const data = embeddings.map((emb, index) => ({ embedding: emb, index }));
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue({
ok: true,
json: async () => ({ data })
})
);
}
function mockFetchFailure(status: number) {
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue({
ok: false,
status,
json: async () => ({ error: { message: 'Bad request' } })
})
);
}
beforeEach(() => {
vi.restoreAllMocks();
});
it('embeds texts and returns Float32Array vectors', async () => {
const emb = makeFakeEmbedding(4);
mockFetchSuccess([emb]);
const provider = new OpenAIEmbeddingProvider({
baseUrl: 'https://api.openai.com/v1',
apiKey: 'test-key',
model: 'text-embedding-3-small'
});
const result = await provider.embed(['hello world']);
expect(result).toHaveLength(1);
expect(result[0].model).toBe('text-embedding-3-small');
expect(result[0].dimensions).toBe(4);
expect(result[0].values).toBeInstanceOf(Float32Array);
expect(result[0].values[0]).toBeCloseTo(0.01, 5);
});
it('batches large input into multiple fetch calls', async () => {
// Make fetch always succeed with 2 fake embeddings.
const fetchMock = vi.fn().mockResolvedValue({
ok: true,
json: async () => ({
data: [
{ embedding: makeFakeEmbedding(2, 0), index: 0 },
{ embedding: makeFakeEmbedding(2, 1), index: 1 }
]
})
});
vi.stubGlobal('fetch', fetchMock);
const provider = new OpenAIEmbeddingProvider({
baseUrl: 'https://api.openai.com/v1',
apiKey: 'sk-test',
model: 'text-embedding-3-small',
maxBatchSize: 2
});
// 4 texts with maxBatchSize=2 → 2 fetch calls.
const result = await provider.embed(['a', 'b', 'c', 'd']);
expect(fetchMock).toHaveBeenCalledTimes(2);
expect(result).toHaveLength(4);
});
it('throws EmbeddingError on API failure', async () => {
mockFetchFailure(400);
const provider = new OpenAIEmbeddingProvider({
baseUrl: 'https://api.openai.com/v1',
apiKey: 'bad-key',
model: 'text-embedding-3-small'
});
await expect(provider.embed(['hello'])).rejects.toThrow(EmbeddingError);
});
it('includes dimensions in request body when configured', async () => {
const fetchMock = vi.fn().mockResolvedValue({
ok: true,
json: async () => ({ data: [{ embedding: [0.1, 0.2], index: 0 }] })
});
vi.stubGlobal('fetch', fetchMock);
const provider = new OpenAIEmbeddingProvider({
baseUrl: 'https://api.openai.com/v1',
apiKey: 'sk-test',
model: 'text-embedding-3-small',
dimensions: 512
});
await provider.embed(['test']);
const callBody = JSON.parse(fetchMock.mock.calls[0][1].body);
expect(callBody.dimensions).toBe(512);
});
it('omits dimensions field when not configured', async () => {
const fetchMock = vi.fn().mockResolvedValue({
ok: true,
json: async () => ({ data: [{ embedding: [0.1], index: 0 }] })
});
vi.stubGlobal('fetch', fetchMock);
const provider = new OpenAIEmbeddingProvider({
baseUrl: 'https://api.openai.com/v1',
apiKey: 'sk-test',
model: 'nomic-embed-text'
});
await provider.embed(['test']);
const callBody = JSON.parse(fetchMock.mock.calls[0][1].body);
expect(callBody.dimensions).toBeUndefined();
});
});
// ---------------------------------------------------------------------------
// EmbeddingService — storage logic
// ---------------------------------------------------------------------------
describe('EmbeddingService', () => {
let client: Database.Database;
let db: ReturnType<typeof drizzle<typeof schema>>;
beforeEach(() => {
({ client, db } = createTestDb());
});
function makeProvider(dim: number, modelName = 'test-model') {
return {
name: 'mock',
dimensions: dim,
model: modelName,
async embed(texts: string[]): Promise<EmbeddingVector[]> {
return texts.map(() => ({
values: new Float32Array(Array.from({ length: dim }, (_, i) => i * 0.1)),
dimensions: dim,
model: modelName
}));
},
async isAvailable() {
return true;
}
};
}
it('stores embeddings in snippet_embeddings table', async () => {
const snippetId = seedSnippet(db, client);
const provider = makeProvider(4);
const service = new EmbeddingService(client, provider);
await service.embedSnippets([snippetId]);
const rows = client.prepare('SELECT * FROM snippet_embeddings WHERE snippet_id = ?').all(snippetId);
expect(rows).toHaveLength(1);
const row = rows[0] as { model: string; dimensions: number; embedding: Buffer };
expect(row.model).toBe('test-model');
expect(row.dimensions).toBe(4);
expect(row.embedding).toBeInstanceOf(Buffer);
});
it('stores embeddings as retrievable Float32Array blobs', async () => {
const snippetId = seedSnippet(db, client);
const provider = makeProvider(3);
const service = new EmbeddingService(client, provider);
await service.embedSnippets([snippetId]);
const retrieved = service.getEmbedding(snippetId);
expect(retrieved).toBeInstanceOf(Float32Array);
expect(retrieved!.length).toBe(3);
expect(retrieved![0]).toBeCloseTo(0.0, 5);
expect(retrieved![1]).toBeCloseTo(0.1, 5);
expect(retrieved![2]).toBeCloseTo(0.2, 5);
});
it('is idempotent — re-embedding replaces the existing row', async () => {
const snippetId = seedSnippet(db, client);
const provider = makeProvider(2);
const service = new EmbeddingService(client, provider);
await service.embedSnippets([snippetId]);
await service.embedSnippets([snippetId]);
const rows = client
.prepare('SELECT COUNT(*) as cnt FROM snippet_embeddings WHERE snippet_id = ?')
.get(snippetId) as { cnt: number };
expect(rows.cnt).toBe(1);
});
it('calls onProgress after each batch', async () => {
const ids: string[] = [];
for (let i = 0; i < 3; i++) {
ids.push(seedSnippet(db, client));
}
const provider = makeProvider(2);
const service = new EmbeddingService(client, provider);
const progress: Array<[number, number]> = [];
await service.embedSnippets(ids, (done, total) => {
progress.push([done, total]);
});
// With BATCH_SIZE=50 and 3 items, we expect exactly one progress call.
expect(progress.length).toBeGreaterThan(0);
expect(progress[progress.length - 1][0]).toBe(3);
expect(progress[progress.length - 1][1]).toBe(3);
});
it('handles empty snippetIds gracefully', async () => {
const provider = makeProvider(4);
const service = new EmbeddingService(client, provider);
// Should not throw.
await expect(service.embedSnippets([])).resolves.toBeUndefined();
});
it('returns null from getEmbedding when no embedding exists', () => {
const provider = makeProvider(4);
const service = new EmbeddingService(client, provider);
const result = service.getEmbedding('nonexistent-id');
expect(result).toBeNull();
});
it('ignores snippet IDs that do not exist in the database', async () => {
const provider = makeProvider(4);
const service = new EmbeddingService(client, provider);
// Should complete without error.
await expect(service.embedSnippets(['ghost-id-1', 'ghost-id-2'])).resolves.toBeUndefined();
const rows = client.prepare('SELECT COUNT(*) as cnt FROM snippet_embeddings').get() as {
cnt: number;
};
expect(rows.cnt).toBe(0);
});
});
// ---------------------------------------------------------------------------
// Factory
// ---------------------------------------------------------------------------
describe('createProviderFromConfig', () => {
it('returns NoopEmbeddingProvider for provider=none', () => {
const provider = createProviderFromConfig({ provider: 'none' });
expect(provider.name).toBe('noop');
});
it('returns OpenAIEmbeddingProvider for provider=openai', () => {
const provider = createProviderFromConfig({
provider: 'openai',
openai: {
baseUrl: 'https://api.openai.com/v1',
apiKey: 'sk-test',
model: 'text-embedding-3-small'
}
});
expect(provider.name).toBe('openai');
});
it('returns LocalEmbeddingProvider for provider=local', () => {
const provider = createProviderFromConfig({ provider: 'local' });
expect(provider.name).toBe('local');
});
it('throws when openai provider is selected without config', () => {
expect(() =>
createProviderFromConfig({ provider: 'openai' } as EmbeddingConfig)
).toThrow();
});
it('defaultEmbeddingConfig returns provider=none', () => {
expect(defaultEmbeddingConfig().provider).toBe('none');
});
it('EMBEDDING_CONFIG_KEY is the expected settings key', () => {
expect(EMBEDDING_CONFIG_KEY).toBe('embedding_config');
});
});

View File

@@ -0,0 +1,101 @@
/**
* EmbeddingService — batches embedding requests and persists results to
* the snippet_embeddings table.
*/
import type Database from 'better-sqlite3';
import type { EmbeddingProvider } from './provider.js';
interface SnippetRow {
id: string;
title: string | null;
breadcrumb: string | null;
content: string;
}
const BATCH_SIZE = 50;
const TEXT_MAX_CHARS = 2048;
export class EmbeddingService {
constructor(
private readonly db: Database.Database,
private readonly provider: EmbeddingProvider
) {}
/**
* Embed the given snippet IDs and store the results in snippet_embeddings.
*
* Only snippets that actually exist in the database are processed.
* Results are upserted (INSERT OR REPLACE) so re-embedding is idempotent.
*
* @param snippetIds - Array of snippet UUIDs to embed.
* @param onProgress - Optional callback invoked after each batch with
* (completedCount, totalCount).
*/
async embedSnippets(
snippetIds: string[],
onProgress?: (done: number, total: number) => void
): Promise<void> {
if (snippetIds.length === 0) return;
const placeholders = snippetIds.map(() => '?').join(',');
const snippets = this.db
.prepare<string[], SnippetRow>(
`SELECT id, title, breadcrumb, content FROM snippets WHERE id IN (${placeholders})`
)
.all(...snippetIds);
if (snippets.length === 0) return;
const texts = snippets.map((s) =>
[s.title, s.breadcrumb, s.content]
.filter(Boolean)
.join('\n')
.slice(0, TEXT_MAX_CHARS)
);
const insert = this.db.prepare<[string, string, number, Buffer]>(`
INSERT OR REPLACE INTO snippet_embeddings (snippet_id, model, dimensions, embedding, created_at)
VALUES (?, ?, ?, ?, unixepoch())
`);
for (let i = 0; i < snippets.length; i += BATCH_SIZE) {
const batchSnippets = snippets.slice(i, i + BATCH_SIZE);
const batchTexts = texts.slice(i, i + BATCH_SIZE);
const embeddings = await this.provider.embed(batchTexts);
const insertMany = this.db.transaction(() => {
for (let j = 0; j < batchSnippets.length; j++) {
const snippet = batchSnippets[j];
const embedding = embeddings[j];
insert.run(
snippet.id,
embedding.model,
embedding.dimensions,
Buffer.from(embedding.values.buffer)
);
}
});
insertMany();
onProgress?.(Math.min(i + BATCH_SIZE, snippets.length), snippets.length);
}
}
/**
* Retrieve a stored embedding for a snippet as a Float32Array.
* Returns null when no embedding has been stored for the given snippet.
*/
getEmbedding(snippetId: string): Float32Array | null {
const row = this.db
.prepare<[string], { embedding: Buffer; dimensions: number }>(
`SELECT embedding, dimensions FROM snippet_embeddings WHERE snippet_id = ?`
)
.get(snippetId);
if (!row) return null;
return new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.dimensions);
}
}

View File

@@ -0,0 +1,52 @@
/**
* Factory — create an EmbeddingProvider from a persisted EmbeddingConfig.
*/
import type { EmbeddingProvider } from './provider.js';
import { NoopEmbeddingProvider } from './provider.js';
import { OpenAIEmbeddingProvider } from './openai.provider.js';
import { LocalEmbeddingProvider } from './local.provider.js';
export interface EmbeddingConfig {
provider: 'openai' | 'local' | 'none';
openai?: {
baseUrl: string;
apiKey: string;
model: string;
dimensions?: number;
maxBatchSize?: number;
};
}
/** The settings table key used to persist the embedding configuration. */
export const EMBEDDING_CONFIG_KEY = 'embedding_config';
/**
* Construct the appropriate EmbeddingProvider for the given config.
* Falls back to NoopEmbeddingProvider for unknown or missing providers.
*/
export function createProviderFromConfig(config: EmbeddingConfig): EmbeddingProvider {
switch (config.provider) {
case 'openai': {
if (!config.openai) {
throw new Error('OpenAI provider selected but no openai config provided.');
}
return new OpenAIEmbeddingProvider(config.openai);
}
case 'local': {
return new LocalEmbeddingProvider();
}
case 'none':
default:
return new NoopEmbeddingProvider();
}
}
/**
* Return a default (noop) config when nothing has been stored yet.
*/
export function defaultEmbeddingConfig(): EmbeddingConfig {
return { provider: 'none' };
}
export type { EmbeddingProvider };

View File

@@ -0,0 +1,61 @@
/**
* LocalEmbeddingProvider — uses @xenova/transformers (optional dependency).
*
* @xenova/transformers is NOT installed by default. This provider uses a
* dynamic import so the module is only required at runtime when the local
* provider is actually configured. If the package is absent, isAvailable()
* returns false and embed() throws a clear error.
*/
import { EmbeddingError, type EmbeddingProvider, type EmbeddingVector } from './provider.js';
export class LocalEmbeddingProvider implements EmbeddingProvider {
readonly name = 'local';
readonly model = 'Xenova/all-MiniLM-L6-v2';
readonly dimensions = 384;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
private pipeline: ((text: string, options: Record<string, unknown>) => Promise<any>) | null =
null;
async embed(texts: string[]): Promise<EmbeddingVector[]> {
if (!this.pipeline) {
let transformers: { pipeline: Function };
try {
// Dynamic import — only succeeds when @xenova/transformers is installed.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
transformers = (await import('@xenova/transformers' as any)) as {
pipeline: Function;
};
} catch {
throw new EmbeddingError(
'@xenova/transformers is not installed. Install it to use the local embedding provider.'
);
}
this.pipeline = await transformers.pipeline('feature-extraction', this.model);
}
const results: EmbeddingVector[] = [];
for (const text of texts) {
const output = await this.pipeline!(text, {
pooling: 'mean',
normalize: true
});
results.push({
values: new Float32Array(output.data),
dimensions: this.dimensions,
model: this.model
});
}
return results;
}
async isAvailable(): Promise<boolean> {
try {
await import('@xenova/transformers' as never);
return true;
} catch {
return false;
}
}
}

View File

@@ -0,0 +1,104 @@
/**
* OpenAIEmbeddingProvider — works with any OpenAI-compatible embeddings API
* including OpenAI, Azure OpenAI, and Ollama.
*/
import { EmbeddingError, type EmbeddingProvider, type EmbeddingVector } from './provider.js';
export interface OpenAIProviderConfig {
/** e.g. "https://api.openai.com/v1" or "http://localhost:11434/v1" */
baseUrl: string;
apiKey: string;
/** e.g. "text-embedding-3-small", "nomic-embed-text" */
model: string;
/** Override dimensions for models that support it (e.g. text-embedding-3-small). */
dimensions?: number;
/** Maximum number of texts per API request. Default: 100. */
maxBatchSize?: number;
}
function chunk<T>(arr: T[], size: number): T[][] {
const chunks: T[][] = [];
for (let i = 0; i < arr.length; i += size) {
chunks.push(arr.slice(i, i + size));
}
return chunks;
}
export class OpenAIEmbeddingProvider implements EmbeddingProvider {
readonly name = 'openai';
get model(): string {
return this.config.model;
}
get dimensions(): number {
return this.config.dimensions ?? 1536;
}
constructor(private readonly config: OpenAIProviderConfig) {}
async embed(texts: string[]): Promise<EmbeddingVector[]> {
const batchSize = this.config.maxBatchSize ?? 100;
const batches = chunk(texts, batchSize);
const allEmbeddings: EmbeddingVector[] = [];
for (const batch of batches) {
const body: Record<string, unknown> = {
model: this.config.model,
input: batch
};
if (this.config.dimensions !== undefined) {
body.dimensions = this.config.dimensions;
}
const response = await fetch(`${this.config.baseUrl}/embeddings`, {
method: 'POST',
headers: {
Authorization: `Bearer ${this.config.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(body)
});
if (!response.ok) {
let detail = '';
try {
const errBody = await response.json();
detail = errBody?.error?.message ?? '';
} catch {
// ignore parse failure
}
throw new EmbeddingError(
`OpenAI API error ${response.status}${detail ? `: ${detail}` : ''}`
);
}
const data = (await response.json()) as {
data: Array<{ embedding: number[]; index: number }>;
};
// API returns results in order; sort by index just in case.
const sorted = [...data.data].sort((a, b) => a.index - b.index);
for (const item of sorted) {
allEmbeddings.push({
values: new Float32Array(item.embedding),
dimensions: item.embedding.length,
model: this.config.model
});
}
}
return allEmbeddings;
}
async isAvailable(): Promise<boolean> {
try {
// Ping with a minimal single-item embed request.
await this.embed(['ping']);
return true;
} catch {
return false;
}
}
}

View File

@@ -0,0 +1,47 @@
/**
* EmbeddingProvider interface and NoopEmbeddingProvider.
*
* The Noop provider returns null embeddings and enables FTS5-only mode
* when no embedding backend is configured.
*/
export interface EmbeddingVector {
values: Float32Array;
dimensions: number;
model: string;
}
export interface EmbeddingProvider {
readonly name: string;
readonly dimensions: number;
readonly model: string;
embed(texts: string[]): Promise<EmbeddingVector[]>;
isAvailable(): Promise<boolean>;
}
export class EmbeddingError extends Error {
constructor(message: string) {
super(message);
this.name = 'EmbeddingError';
}
}
/**
* NoopEmbeddingProvider — always returns empty results.
* Used as the default when no provider is configured; the system
* falls back gracefully to FTS5-only search.
*/
export class NoopEmbeddingProvider implements EmbeddingProvider {
readonly name = 'noop';
readonly dimensions = 0;
readonly model = 'none';
async embed(_texts: string[]): Promise<EmbeddingVector[]> {
return [];
}
async isAvailable(): Promise<boolean> {
return false;
}
}

View File

@@ -0,0 +1,147 @@
/**
* GET /api/v1/settings/embedding — retrieve current embedding configuration
* PUT /api/v1/settings/embedding — update embedding configuration
*/
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { getClient } from '$lib/server/db/client';
import {
EMBEDDING_CONFIG_KEY,
createProviderFromConfig,
defaultEmbeddingConfig,
type EmbeddingConfig
} from '$lib/server/embeddings/factory';
import { handleServiceError, InvalidInputError } from '$lib/server/utils/validation';
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function readConfig(db: ReturnType<typeof getClient>): EmbeddingConfig {
const row = db
.prepare(`SELECT value FROM settings WHERE key = ?`)
.get(EMBEDDING_CONFIG_KEY) as { value: string } | undefined;
if (!row) return defaultEmbeddingConfig();
try {
return JSON.parse(row.value) as EmbeddingConfig;
} catch {
return defaultEmbeddingConfig();
}
}
function validateConfig(body: unknown): EmbeddingConfig {
if (typeof body !== 'object' || body === null) {
throw new InvalidInputError('Request body must be a JSON object');
}
const obj = body as Record<string, unknown>;
const provider = obj.provider;
if (provider !== 'openai' && provider !== 'local' && provider !== 'none') {
throw new InvalidInputError(
`Invalid provider "${String(provider)}". Must be one of: openai, local, none.`
);
}
if (provider === 'openai') {
const openai = obj.openai as Record<string, unknown> | undefined;
if (!openai || typeof openai !== 'object') {
throw new InvalidInputError('openai config object is required when provider is "openai"');
}
if (typeof openai.baseUrl !== 'string' || !openai.baseUrl) {
throw new InvalidInputError('openai.baseUrl must be a non-empty string');
}
if (typeof openai.apiKey !== 'string' || !openai.apiKey) {
throw new InvalidInputError('openai.apiKey must be a non-empty string');
}
if (typeof openai.model !== 'string' || !openai.model) {
throw new InvalidInputError('openai.model must be a non-empty string');
}
const config: EmbeddingConfig = {
provider: 'openai',
openai: {
baseUrl: openai.baseUrl as string,
apiKey: openai.apiKey as string,
model: openai.model as string,
dimensions:
typeof openai.dimensions === 'number' ? (openai.dimensions as number) : undefined,
maxBatchSize:
typeof openai.maxBatchSize === 'number'
? (openai.maxBatchSize as number)
: undefined
}
};
return config;
}
return { provider: provider as 'local' | 'none' };
}
// ---------------------------------------------------------------------------
// GET
// ---------------------------------------------------------------------------
export const GET: RequestHandler = () => {
try {
const db = getClient();
const config = readConfig(db);
// Strip the apiKey from the response for security.
const safeConfig = sanitizeForResponse(config);
return json(safeConfig);
} catch (err) {
return handleServiceError(err);
}
};
// ---------------------------------------------------------------------------
// PUT
// ---------------------------------------------------------------------------
export const PUT: RequestHandler = async ({ request }) => {
try {
const body = await request.json();
const config = validateConfig(body);
// Verify provider connectivity before persisting (skip for noop).
if (config.provider !== 'none') {
const provider = createProviderFromConfig(config);
const available = await provider.isAvailable();
if (!available) {
throw new InvalidInputError(
`Could not connect to the "${config.provider}" embedding provider. Check your configuration.`
);
}
}
const db = getClient();
db.prepare(
`INSERT INTO settings (key, value, updated_at)
VALUES (?, ?, unixepoch())
ON CONFLICT (key) DO UPDATE SET value = excluded.value, updated_at = excluded.updated_at`
).run(EMBEDDING_CONFIG_KEY, JSON.stringify(config));
const safeConfig = sanitizeForResponse(config);
return json(safeConfig);
} catch (err) {
return handleServiceError(err);
}
};
// ---------------------------------------------------------------------------
// Sanitize — remove sensitive fields before returning to clients
// ---------------------------------------------------------------------------
function sanitizeForResponse(config: EmbeddingConfig): Omit<EmbeddingConfig, 'openai'> & {
openai?: Omit<NonNullable<EmbeddingConfig['openai']>, 'apiKey'>;
} {
if (config.provider === 'openai' && config.openai) {
const { apiKey: _apiKey, ...rest } = config.openai;
return { ...config, openai: rest };
}
return config;
}