diff --git a/src/hooks.server.ts b/src/hooks.server.ts index 56fb88d..d66593b 100644 --- a/src/hooks.server.ts +++ b/src/hooks.server.ts @@ -9,13 +9,13 @@ import { initializeDatabase } from '$lib/server/db/index.js'; import { getClient } from '$lib/server/db/client.js'; import { initializePipeline } from '$lib/server/pipeline/startup.js'; -import { - EMBEDDING_CONFIG_KEY, - createProviderFromConfig, - defaultEmbeddingConfig -} from '$lib/server/embeddings/factory.js'; +import { createProviderFromProfile } from '$lib/server/embeddings/registry.js'; import { EmbeddingService } from '$lib/server/embeddings/embedding.service.js'; -import type { EmbeddingConfig } from '$lib/server/embeddings/factory.js'; +import { + EmbeddingProfileEntity, + type EmbeddingProfileEntityProps +} from '$lib/server/models/embedding-profile.js'; +import { EmbeddingProfileMapper } from '$lib/server/mappers/embedding-profile.mapper.js'; import type { Handle } from '@sveltejs/kit'; // --------------------------------------------------------------------------- @@ -26,37 +26,20 @@ try { initializeDatabase(); const db = getClient(); - - // Load persisted embedding configuration (if any). - const configRow = db - .prepare<[string], { value: string }>(`SELECT value FROM settings WHERE key = ?`) - .get(EMBEDDING_CONFIG_KEY); + const activeProfileRow = db + .prepare<[], EmbeddingProfileEntityProps>( + 'SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1' + ) + .get(); let embeddingService: EmbeddingService | null = null; - if (configRow) { - try { - const config: EmbeddingConfig = - typeof configRow.value === 'string' - ? JSON.parse(configRow.value) - : (configRow.value as EmbeddingConfig); - - if (config.provider !== 'none') { - const provider = createProviderFromConfig(config); - embeddingService = new EmbeddingService(db, provider); - } - } catch (err) { - console.warn( - `[hooks.server] Could not load embedding config: ${err instanceof Error ? err.message : String(err)}` - ); - } - } else { - // Use the default (noop) config so the pipeline is still wired up. - const config = defaultEmbeddingConfig(); - if (config.provider !== 'none') { - const provider = createProviderFromConfig(config); - embeddingService = new EmbeddingService(db, provider); - } + if (activeProfileRow) { + const activeProfile = EmbeddingProfileMapper.fromEntity( + new EmbeddingProfileEntity(activeProfileRow) + ); + const provider = createProviderFromProfile(activeProfile); + embeddingService = new EmbeddingService(db, provider, activeProfile.id); } initializePipeline(db, embeddingService); diff --git a/src/lib/components/RepositoryCard.svelte b/src/lib/components/RepositoryCard.svelte index dcc959c..8cbdb84 100644 --- a/src/lib/components/RepositoryCard.svelte +++ b/src/lib/components/RepositoryCard.svelte @@ -1,13 +1,25 @@
@@ -67,6 +92,12 @@ {/if}
+
+ {embeddingCount.toLocaleString()} embeddings + · + Indexed: {indexedVersionsLabel} +
+ {#if repo.state === 'error'}

Indexing failed. Check jobs for details.

{/if} diff --git a/src/lib/components/RepositoryCard.svelte.test.ts b/src/lib/components/RepositoryCard.svelte.test.ts index ee2be10..34a5511 100644 --- a/src/lib/components/RepositoryCard.svelte.test.ts +++ b/src/lib/components/RepositoryCard.svelte.test.ts @@ -12,6 +12,8 @@ describe('RepositoryCard.svelte', () => { description: 'A JavaScript library for building user interfaces', state: 'indexed', totalSnippets: 1234, + embeddingCount: 1200, + indexedVersions: ['main', 'v18.3.0'], trustScore: 9.7, stars: 230000, lastIndexedAt: null @@ -23,5 +25,8 @@ describe('RepositoryCard.svelte', () => { await expect .element(page.getByRole('link', { name: 'Details' })) .toHaveAttribute('href', '/repos/%2Ffacebook%2Freact'); + + await expect.element(page.getByText('1,200 embeddings')).toBeInTheDocument(); + await expect.element(page.getByText('Indexed: main, v18.3.0')).toBeInTheDocument(); }); }); \ No newline at end of file diff --git a/src/lib/dtos/embedding-settings.ts b/src/lib/dtos/embedding-settings.ts new file mode 100644 index 0000000..f35c85f --- /dev/null +++ b/src/lib/dtos/embedding-settings.ts @@ -0,0 +1,41 @@ +import type { EmbeddingProviderKind } from '$lib/types'; + +export interface EmbeddingProfileConfigEntryDto { + key: string; + value: string; + redacted: boolean; +} + +export interface EmbeddingProfileDto { + id: string; + providerKind: string; + title: string; + enabled: boolean; + isDefault: boolean; + model: string; + dimensions: number; + config: Record; + configEntries: EmbeddingProfileConfigEntryDto[]; + createdAt: number; + updatedAt: number; +} + +export interface EmbeddingSettingsDto { + profiles: EmbeddingProfileDto[]; + activeProfileId: string | null; + activeProfile: EmbeddingProfileDto | null; +} + +export interface EmbeddingProfileUpsertDto { + id: string; + providerKind: EmbeddingProviderKind; + title: string; + model: string; + dimensions: number; + config: Record; +} + +export interface EmbeddingSettingsUpdateDto { + activeProfileId: string | null; + profile?: EmbeddingProfileUpsertDto; +} \ No newline at end of file diff --git a/src/lib/server/embeddings/embedding.service.test.ts b/src/lib/server/embeddings/embedding.service.test.ts index 7100789..4d49b7c 100644 --- a/src/lib/server/embeddings/embedding.service.test.ts +++ b/src/lib/server/embeddings/embedding.service.test.ts @@ -408,6 +408,36 @@ describe('EmbeddingService', () => { expect(embedding![2]).toBeCloseTo(0.2, 5); }); + it('stores embeddings under the configured profile ID', async () => { + client + .prepare( + `INSERT INTO embedding_profiles + (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, unixepoch(), unixepoch())` + ) + .run( + 'openai-custom', + 'openai-compatible', + 'OpenAI Custom', + 1, + 0, + 'test-model', + 4, + '{}' + ); + + const snippetId = seedSnippet(db, client); + const provider = makeProvider(4, 'test-model'); + const service = new EmbeddingService(client, provider, 'openai-custom'); + + await service.embedSnippets([snippetId]); + + const row = client + .prepare('SELECT profile_id FROM snippet_embeddings WHERE snippet_id = ?') + .get(snippetId) as { profile_id: string }; + expect(row.profile_id).toBe('openai-custom'); + }); + it('is idempotent — re-embedding replaces the existing row', async () => { const snippetId = seedSnippet(db, client); const provider = makeProvider(2); @@ -469,6 +499,19 @@ describe('EmbeddingService', () => { }; expect(rows.cnt).toBe(0); }); + + it('finds snippets missing embeddings for the active profile', async () => { + const firstSnippetId = seedSnippet(db, client); + const secondSnippetId = seedSnippet(db, client, { content: 'Second snippet content' }); + const provider = makeProvider(4); + const service = new EmbeddingService(client, provider, 'local-default'); + + await service.embedSnippets([firstSnippetId]); + + expect(service.findSnippetIdsMissingEmbeddings('/test/embed-repo', null)).toEqual([ + secondSnippetId + ]); + }); }); // --------------------------------------------------------------------------- diff --git a/src/lib/server/embeddings/embedding.service.ts b/src/lib/server/embeddings/embedding.service.ts index 8a60589..1dc18d5 100644 --- a/src/lib/server/embeddings/embedding.service.ts +++ b/src/lib/server/embeddings/embedding.service.ts @@ -23,6 +23,42 @@ export class EmbeddingService { private readonly profileId: string = 'local-default' ) {} + findSnippetIdsMissingEmbeddings(repositoryId: string, versionId: string | null): string[] { + if (versionId) { + const rows = this.db + .prepare<[string, string, string], { id: string }>( + `SELECT snippets.id + FROM snippets + LEFT JOIN snippet_embeddings + ON snippet_embeddings.snippet_id = snippets.id + AND snippet_embeddings.profile_id = ? + WHERE snippets.repository_id = ? + AND snippets.version_id = ? + AND snippet_embeddings.snippet_id IS NULL + ORDER BY snippets.id` + ) + .all(this.profileId, repositoryId, versionId); + + return rows.map((row) => row.id); + } + + const rows = this.db + .prepare<[string, string], { id: string }>( + `SELECT snippets.id + FROM snippets + LEFT JOIN snippet_embeddings + ON snippet_embeddings.snippet_id = snippets.id + AND snippet_embeddings.profile_id = ? + WHERE snippets.repository_id = ? + AND snippets.version_id IS NULL + AND snippet_embeddings.snippet_id IS NULL + ORDER BY snippets.id` + ) + .all(this.profileId, repositoryId); + + return rows.map((row) => row.id); + } + /** * Embed the given snippet IDs and store the results in snippet_embeddings. * diff --git a/src/lib/server/embeddings/local.provider.ts b/src/lib/server/embeddings/local.provider.ts index af638e2..eac0618 100644 --- a/src/lib/server/embeddings/local.provider.ts +++ b/src/lib/server/embeddings/local.provider.ts @@ -1,10 +1,10 @@ /** - * LocalEmbeddingProvider — uses @xenova/transformers (optional dependency). + * LocalEmbeddingProvider — uses @xenova/transformers via dynamic import. * - * @xenova/transformers is NOT installed by default. This provider uses a - * dynamic import so the module is only required at runtime when the local - * provider is actually configured. If the package is absent, isAvailable() - * returns false and embed() throws a clear error. + * The dynamic import keeps server startup cheap and defers loading the model + * runtime until the local provider is actually used. If the package is absent + * or cannot be resolved, isAvailable() returns false and embed() throws a + * clear error. */ import { EmbeddingError, type EmbeddingProvider, type EmbeddingVector } from './provider.js'; diff --git a/src/lib/server/embeddings/registry.ts b/src/lib/server/embeddings/registry.ts index d9b9bef..e5c699e 100644 --- a/src/lib/server/embeddings/registry.ts +++ b/src/lib/server/embeddings/registry.ts @@ -44,11 +44,12 @@ export function createProviderFromProfile(profile: EmbeddingProfile): EmbeddingP */ export function getDefaultLocalProfile(): Pick< EmbeddingProfile, - 'id' | 'providerKind' | 'model' | 'dimensions' + 'id' | 'providerKind' | 'title' | 'model' | 'dimensions' > { return { id: 'local-default', providerKind: 'local-transformers', + title: 'Local (Xenova/all-MiniLM-L6-v2)', model: 'Xenova/all-MiniLM-L6-v2', dimensions: 384 }; diff --git a/src/lib/server/mappers/embedding-profile.mapper.ts b/src/lib/server/mappers/embedding-profile.mapper.ts new file mode 100644 index 0000000..8126083 --- /dev/null +++ b/src/lib/server/mappers/embedding-profile.mapper.ts @@ -0,0 +1,38 @@ +import { + EmbeddingProfile, + EmbeddingProfileEntity +} from '$lib/server/models/embedding-profile.js'; + +function parseConfig(config: Record | string | null): Record { + if (!config) { + return {}; + } + + if (typeof config === 'string') { + try { + const parsed = JSON.parse(config); + return parsed && typeof parsed === 'object' ? (parsed as Record) : {}; + } catch { + return {}; + } + } + + return config; +} + +export class EmbeddingProfileMapper { + static fromEntity(entity: EmbeddingProfileEntity): EmbeddingProfile { + return new EmbeddingProfile({ + id: entity.id, + providerKind: entity.provider_kind, + title: entity.title, + enabled: Boolean(entity.enabled), + isDefault: Boolean(entity.is_default), + model: entity.model, + dimensions: entity.dimensions, + config: parseConfig(entity.config), + createdAt: entity.created_at, + updatedAt: entity.updated_at + }); + } +} \ No newline at end of file diff --git a/src/lib/server/mappers/embedding-settings.dto.mapper.ts b/src/lib/server/mappers/embedding-settings.dto.mapper.ts new file mode 100644 index 0000000..e1f12a8 --- /dev/null +++ b/src/lib/server/mappers/embedding-settings.dto.mapper.ts @@ -0,0 +1,71 @@ +import type { + EmbeddingProfileConfigEntryDto, + EmbeddingProfileDto, + EmbeddingSettingsDto +} from '$lib/dtos/embedding-settings.js'; +import type { EmbeddingProfile } from '$lib/server/models/embedding-profile.js'; +import { EmbeddingSettings } from '$lib/server/models/embedding-settings.js'; + +const REDACTED_VALUE = '[redacted]'; +const SENSITIVE_CONFIG_KEY = /(api[-_]?key|token|secret|password|authorization)/i; + +function formatConfigValue(value: unknown): string { + if (value === null || value === undefined) return 'null'; + if (typeof value === 'string') return value; + if (typeof value === 'number' || typeof value === 'boolean') return String(value); + return JSON.stringify(value); +} + +function sanitizeConfig(config: Record): { + visibleConfig: Record; + configEntries: EmbeddingProfileConfigEntryDto[]; +} { + const visibleConfig: Record = {}; + const configEntries = Object.entries(config) + .sort(([left], [right]) => left.localeCompare(right)) + .map(([key, value]) => { + const redacted = SENSITIVE_CONFIG_KEY.test(key); + if (!redacted) { + visibleConfig[key] = value; + } + + return { + key, + value: redacted ? REDACTED_VALUE : formatConfigValue(value), + redacted + }; + }); + + return { visibleConfig, configEntries }; +} + +function toProfileDto(profile: EmbeddingProfile): EmbeddingProfileDto { + const { visibleConfig, configEntries } = sanitizeConfig(profile.config); + + return { + id: profile.id, + providerKind: profile.providerKind, + title: profile.title, + enabled: profile.enabled, + isDefault: profile.isDefault, + model: profile.model, + dimensions: profile.dimensions, + config: visibleConfig, + configEntries, + createdAt: profile.createdAt, + updatedAt: profile.updatedAt + }; +} + +export class EmbeddingSettingsDtoMapper { + static toDto(settings: EmbeddingSettings): EmbeddingSettingsDto { + const profiles = settings.profiles.map(toProfileDto); + const activeProfile = settings.activeProfile ? toProfileDto(settings.activeProfile) : null; + + return { + profiles, + activeProfileId: settings.activeProfileId, + activeProfile + }; + } +} \ No newline at end of file diff --git a/src/lib/server/models/embedding-profile.ts b/src/lib/server/models/embedding-profile.ts new file mode 100644 index 0000000..47c7a65 --- /dev/null +++ b/src/lib/server/models/embedding-profile.ts @@ -0,0 +1,77 @@ +export interface EmbeddingProfileEntityProps { + id: string; + provider_kind: string; + title: string; + enabled: boolean | number; + is_default: boolean | number; + model: string; + dimensions: number; + config: Record | string | null; + created_at: number; + updated_at: number; +} + +export class EmbeddingProfileEntity { + id: string; + provider_kind: string; + title: string; + enabled: boolean | number; + is_default: boolean | number; + model: string; + dimensions: number; + config: Record | string | null; + created_at: number; + updated_at: number; + + constructor(props: EmbeddingProfileEntityProps) { + this.id = props.id; + this.provider_kind = props.provider_kind; + this.title = props.title; + this.enabled = props.enabled; + this.is_default = props.is_default; + this.model = props.model; + this.dimensions = props.dimensions; + this.config = props.config; + this.created_at = props.created_at; + this.updated_at = props.updated_at; + } +} + +export interface EmbeddingProfileProps { + id: string; + providerKind: string; + title: string; + enabled: boolean; + isDefault: boolean; + model: string; + dimensions: number; + config: Record; + createdAt: number; + updatedAt: number; +} + +export class EmbeddingProfile { + id: string; + providerKind: string; + title: string; + enabled: boolean; + isDefault: boolean; + model: string; + dimensions: number; + config: Record; + createdAt: number; + updatedAt: number; + + constructor(props: EmbeddingProfileProps) { + this.id = props.id; + this.providerKind = props.providerKind; + this.title = props.title; + this.enabled = props.enabled; + this.isDefault = props.isDefault; + this.model = props.model; + this.dimensions = props.dimensions; + this.config = props.config; + this.createdAt = props.createdAt; + this.updatedAt = props.updatedAt; + } +} \ No newline at end of file diff --git a/src/lib/server/models/embedding-settings.ts b/src/lib/server/models/embedding-settings.ts new file mode 100644 index 0000000..9fe86be --- /dev/null +++ b/src/lib/server/models/embedding-settings.ts @@ -0,0 +1,20 @@ +import type { EmbeddingProfile } from './embedding-profile.js'; + +export interface EmbeddingSettingsProps { + profiles: EmbeddingProfile[]; + activeProfile: EmbeddingProfile | null; +} + +export class EmbeddingSettings { + profiles: EmbeddingProfile[]; + activeProfile: EmbeddingProfile | null; + + constructor(props: EmbeddingSettingsProps) { + this.profiles = props.profiles; + this.activeProfile = props.activeProfile; + } + + get activeProfileId(): string | null { + return this.activeProfile?.id ?? null; + } +} \ No newline at end of file diff --git a/src/lib/server/pipeline/indexing.pipeline.test.ts b/src/lib/server/pipeline/indexing.pipeline.test.ts index c5faa54..5d3405c 100644 --- a/src/lib/server/pipeline/indexing.pipeline.test.ts +++ b/src/lib/server/pipeline/indexing.pipeline.test.ts @@ -12,6 +12,7 @@ import { join } from 'node:path'; import { JobQueue } from './job-queue.js'; import { IndexingPipeline } from './indexing.pipeline.js'; import { recoverStaleJobs } from './startup.js'; +import { EmbeddingService } from '$lib/server/embeddings/embedding.service.js'; // --------------------------------------------------------------------------- // Test DB factory @@ -22,15 +23,21 @@ function createTestDb(): Database.Database { client.pragma('foreign_keys = ON'); const migrationsFolder = join(import.meta.dirname, '../db/migrations'); - const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8'); + for (const migrationFile of [ + '0000_large_master_chief.sql', + '0001_quick_nighthawk.sql', + '0002_silky_stellaris.sql' + ]) { + const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8'); - const statements = migrationSql - .split('--> statement-breakpoint') - .map((s) => s.trim()) - .filter(Boolean); + const statements = migrationSql + .split('--> statement-breakpoint') + .map((s) => s.trim()) + .filter(Boolean); - for (const stmt of statements) { - client.exec(stmt); + for (const stmt of statements) { + client.exec(stmt); + } } return client; @@ -238,7 +245,8 @@ describe('IndexingPipeline', () => { crawlResult: { files: Array<{ path: string; content: string; sha: string; language: string }>; totalFiles: number; - } = { files: [], totalFiles: 0 } + } = { files: [], totalFiles: 0 }, + embeddingService: EmbeddingService | null = null ) { const mockGithubCrawl = vi.fn().mockResolvedValue({ ...crawlResult, @@ -256,7 +264,12 @@ describe('IndexingPipeline', () => { }) }; - return new IndexingPipeline(db, mockGithubCrawl as never, mockLocalCrawler as never, null); + return new IndexingPipeline( + db, + mockGithubCrawl as never, + mockLocalCrawler as never, + embeddingService + ); } function makeJob(repositoryId = '/test/repo') { @@ -388,6 +401,64 @@ describe('IndexingPipeline', () => { expect(secondSnippetIds).toEqual(firstSnippetIds); }); + it('re-index backfills missing embeddings for unchanged snippets', async () => { + const provider = { + name: 'test-provider', + model: 'test-model', + dimensions: 3, + embed: vi.fn(async (texts: string[]) => + texts.map(() => ({ + values: new Float32Array([0.1, 0.2, 0.3]), + dimensions: 3, + model: 'test-model' + })) + ), + isAvailable: vi.fn(async () => true) + }; + const embeddingService = new EmbeddingService(db, provider, 'local-default'); + const files = [ + { + path: 'README.md', + content: '# Hello\n\nThis is documentation.', + sha: 'sha-readme', + language: 'markdown' + } + ]; + + const pipeline = makePipeline({ files, totalFiles: 1 }, embeddingService); + const job1 = makeJob(); + await pipeline.run(job1 as never); + + const firstSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as { id: string }[]) + .map((row) => row.id); + expect(firstSnippetIds.length).toBeGreaterThan(0); + + const firstEmbeddingCount = ( + db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as { + n: number; + } + ).n; + expect(firstEmbeddingCount).toBe(firstSnippetIds.length); + + db.prepare(`DELETE FROM snippet_embeddings WHERE profile_id = 'local-default'`).run(); + + const job2Id = insertJob(db, { repository_id: '/test/repo', status: 'queued' }); + const job2 = db.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`).get(job2Id) as never; + await pipeline.run(job2); + + const secondSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as { + id: string; + }[]).map((row) => row.id); + const secondEmbeddingCount = ( + db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as { + n: number; + } + ).n; + + expect(secondSnippetIds).toEqual(firstSnippetIds); + expect(secondEmbeddingCount).toBe(firstSnippetIds.length); + }); + it('replaces snippets atomically when a file changes', async () => { const pipeline1 = makePipeline({ files: [ diff --git a/src/lib/server/pipeline/indexing.pipeline.ts b/src/lib/server/pipeline/indexing.pipeline.ts index 0bbc1ed..8e8c677 100644 --- a/src/lib/server/pipeline/indexing.pipeline.ts +++ b/src/lib/server/pipeline/indexing.pipeline.ts @@ -187,20 +187,28 @@ export class IndexingPipeline { this.replaceSnippets(repo.id, changedDocIds, newDocuments, newSnippets); // ---- Stage 4: Embeddings (if provider is configured) ---------------- - if (this.embeddingService && newSnippets.length > 0) { - const snippetIds = newSnippets.map((s) => s.id!); + if (this.embeddingService) { + const snippetIds = this.embeddingService.findSnippetIdsMissingEmbeddings( + repo.id, + normJob.versionId + ); + + if (snippetIds.length === 0) { + // No missing embeddings for the active profile; parsing progress is final. + } else { const embeddingsTotal = snippetIds.length; - await this.embeddingService.embedSnippets(snippetIds, (done) => { - const progress = calculateProgress( - processedFiles, - totalFiles, - done, - embeddingsTotal, - true - ); - this.updateJob(job.id, { progress }); - }); + await this.embeddingService.embedSnippets(snippetIds, (done) => { + const progress = calculateProgress( + processedFiles, + totalFiles, + done, + embeddingsTotal, + true + ); + this.updateJob(job.id, { progress }); + }); + } } // ---- Stage 5: Update repository stats -------------------------------- diff --git a/src/lib/server/services/embedding-settings.service.ts b/src/lib/server/services/embedding-settings.service.ts new file mode 100644 index 0000000..e0ffb10 --- /dev/null +++ b/src/lib/server/services/embedding-settings.service.ts @@ -0,0 +1,131 @@ +import type Database from 'better-sqlite3'; +import type { EmbeddingSettingsUpdateDto } from '$lib/dtos/embedding-settings.js'; +import { createProviderFromProfile, getDefaultLocalProfile } from '$lib/server/embeddings/registry.js'; +import { EmbeddingProfileMapper } from '$lib/server/mappers/embedding-profile.mapper.js'; +import { EmbeddingProfile, EmbeddingProfileEntity } from '$lib/server/models/embedding-profile.js'; +import { EmbeddingSettings } from '$lib/server/models/embedding-settings.js'; +import { InvalidInputError } from '$lib/server/utils/validation.js'; + +export class EmbeddingSettingsService { + constructor(private readonly db: Database.Database) {} + + getSettings(): EmbeddingSettings { + const profiles = this.loadProfiles(); + const activeProfile = profiles.find((profile) => profile.isDefault && profile.enabled) ?? null; + + return new EmbeddingSettings({ profiles, activeProfile }); + } + + async updateSettings(input: EmbeddingSettingsUpdateDto): Promise { + const now = Math.floor(Date.now() / 1000); + + this.db.prepare('UPDATE embedding_profiles SET is_default = 0, updated_at = ?').run(now); + + if (input.activeProfileId === null) { + return this.getSettings(); + } + + const profile = + input.activeProfileId === 'local-default' + ? this.buildDefaultLocalProfile(now) + : this.buildCustomProfile(input, now); + + const available = await createProviderFromProfile(profile).isAvailable(); + if (!available) { + throw new InvalidInputError( + `Could not connect to the "${profile.providerKind}" provider. Check your configuration.` + ); + } + + this.persistProfile(profile); + return this.getSettings(); + } + + private loadProfiles(): EmbeddingProfile[] { + return this.db + .prepare('SELECT * FROM embedding_profiles ORDER BY is_default DESC, created_at ASC') + .all() + .map((row) => EmbeddingProfileMapper.fromEntity(new EmbeddingProfileEntity(row as never))); + } + + private buildDefaultLocalProfile(now: number): EmbeddingProfile { + const defaultLocal = getDefaultLocalProfile(); + + return new EmbeddingProfile({ + id: defaultLocal.id, + providerKind: defaultLocal.providerKind, + title: defaultLocal.title, + enabled: true, + isDefault: true, + model: defaultLocal.model, + dimensions: defaultLocal.dimensions, + config: {}, + createdAt: this.getCreatedAt(defaultLocal.id, now), + updatedAt: now + }); + } + + private buildCustomProfile(input: EmbeddingSettingsUpdateDto, now: number): EmbeddingProfile { + const candidate = input.profile; + if (!candidate) { + throw new InvalidInputError('profile is required for custom embedding providers'); + } + if (candidate.id !== input.activeProfileId) { + throw new InvalidInputError('activeProfileId must match profile.id'); + } + if (!candidate.title || !candidate.model) { + throw new InvalidInputError('profile title and model are required'); + } + + return new EmbeddingProfile({ + id: candidate.id, + providerKind: candidate.providerKind, + title: candidate.title, + enabled: true, + isDefault: true, + model: candidate.model, + dimensions: candidate.dimensions, + config: candidate.config, + createdAt: this.getCreatedAt(candidate.id, now), + updatedAt: now + }); + } + + private getCreatedAt(id: string, fallback: number): number { + return ( + this.db + .prepare<[string], { created_at: number }>('SELECT created_at FROM embedding_profiles WHERE id = ?') + .get(id)?.created_at ?? fallback + ); + } + + private persistProfile(profile: EmbeddingProfile): void { + this.db + .prepare( + `INSERT INTO embedding_profiles + (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(id) DO UPDATE SET + provider_kind = excluded.provider_kind, + title = excluded.title, + enabled = excluded.enabled, + is_default = excluded.is_default, + model = excluded.model, + dimensions = excluded.dimensions, + config = excluded.config, + updated_at = excluded.updated_at` + ) + .run( + profile.id, + profile.providerKind, + profile.title, + profile.enabled ? 1 : 0, + profile.isDefault ? 1 : 0, + profile.model, + profile.dimensions, + JSON.stringify(profile.config), + profile.createdAt, + profile.updatedAt + ); + } +} \ No newline at end of file diff --git a/src/lib/server/services/repository.service.test.ts b/src/lib/server/services/repository.service.test.ts index bbf14f3..e5e4aa8 100644 --- a/src/lib/server/services/repository.service.test.ts +++ b/src/lib/server/services/repository.service.test.ts @@ -27,16 +27,20 @@ function createTestDb(): Database.Database { client.pragma('foreign_keys = ON'); const migrationsFolder = join(import.meta.dirname, '../db/migrations'); - const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8'); - // Drizzle migration files use `--> statement-breakpoint` as separator. - const statements = migrationSql - .split('--> statement-breakpoint') - .map((s) => s.trim()) - .filter(Boolean); + for (const migration of [ + '0000_large_master_chief.sql', + '0001_quick_nighthawk.sql', + '0002_silky_stellaris.sql' + ]) { + const statements = readFileSync(join(migrationsFolder, migration), 'utf-8') + .split('--> statement-breakpoint') + .map((statement) => statement.trim()) + .filter(Boolean); - for (const stmt of statements) { - client.exec(stmt); + for (const statement of statements) { + client.exec(statement); + } } return client; @@ -408,6 +412,83 @@ describe('RepositoryService.getVersions()', () => { }); }); +// --------------------------------------------------------------------------- +// getIndexSummary() +// --------------------------------------------------------------------------- + +describe('RepositoryService.getIndexSummary()', () => { + let client: Database.Database; + let service: RepositoryService; + + beforeEach(() => { + client = createTestDb(); + service = makeService(client); + service.add({ source: 'github', sourceUrl: 'https://github.com/facebook/react', branch: 'main' }); + }); + + it('returns embedding counts and indexed version labels', () => { + const now = Math.floor(Date.now() / 1000); + const docId = crypto.randomUUID(); + const versionDocId = crypto.randomUUID(); + const snippetId = crypto.randomUUID(); + const versionSnippetId = crypto.randomUUID(); + + client + .prepare( + `INSERT INTO repository_versions (id, repository_id, tag, state, created_at) + VALUES (?, '/facebook/react', ?, 'indexed', ?)` + ) + .run('/facebook/react/v18.3.0', 'v18.3.0', now); + + client + .prepare( + `INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at) + VALUES (?, '/facebook/react', NULL, 'README.md', 'base', ?)` + ) + .run(docId, now); + + client + .prepare( + `INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at) + VALUES (?, '/facebook/react', ?, 'README.md', 'version', ?)` + ) + .run(versionDocId, '/facebook/react/v18.3.0', now); + + client + .prepare( + `INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at) + VALUES (?, ?, '/facebook/react', NULL, 'info', 'base snippet', ?)` + ) + .run(snippetId, docId, now); + + client + .prepare( + `INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at) + VALUES (?, ?, '/facebook/react', ?, 'info', 'version snippet', ?)` + ) + .run(versionSnippetId, versionDocId, '/facebook/react/v18.3.0', now); + + client + .prepare( + `INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at) + VALUES (?, 'local-default', 'Xenova/all-MiniLM-L6-v2', 2, ?, ?)` + ) + .run(snippetId, Buffer.from(Float32Array.from([1, 0]).buffer), now); + + client + .prepare( + `INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at) + VALUES (?, 'local-default', 'Xenova/all-MiniLM-L6-v2', 2, ?, ?)` + ) + .run(versionSnippetId, Buffer.from(Float32Array.from([0, 1]).buffer), now); + + expect(service.getIndexSummary('/facebook/react')).toEqual({ + embeddingCount: 2, + indexedVersions: ['main', 'v18.3.0'] + }); + }); +}); + // --------------------------------------------------------------------------- // createIndexingJob() // --------------------------------------------------------------------------- diff --git a/src/lib/server/services/repository.service.ts b/src/lib/server/services/repository.service.ts index 4b20fc2..b8e5e16 100644 --- a/src/lib/server/services/repository.service.ts +++ b/src/lib/server/services/repository.service.ts @@ -39,6 +39,11 @@ export interface RepositoryStats { lastIndexedAt: Date | null; } +export interface RepositoryIndexSummary { + embeddingCount: number; + indexedVersions: string[]; +} + export class RepositoryService { constructor(private readonly db: Database.Database) {} @@ -266,6 +271,49 @@ export class RepositoryService { return rows.map((r) => r.tag); } + getIndexSummary(repositoryId: string): RepositoryIndexSummary { + const repository = this.get(repositoryId); + if (!repository) throw new NotFoundError(`Repository ${repositoryId} not found`); + + const embeddingRow = this.db + .prepare( + `SELECT COUNT(*) AS count + FROM snippet_embeddings se + INNER JOIN snippets s ON s.id = se.snippet_id + WHERE s.repository_id = ?` + ) + .get(repositoryId) as { count: number }; + + const versionRows = this.db + .prepare( + `SELECT tag FROM repository_versions + WHERE repository_id = ? AND state = 'indexed' + ORDER BY created_at DESC` + ) + .all(repositoryId) as { tag: string }[]; + + const hasDefaultBranchIndex = Boolean( + this.db + .prepare( + `SELECT 1 AS found + FROM documents + WHERE repository_id = ? AND version_id IS NULL + LIMIT 1` + ) + .get(repositoryId) + ); + + const indexedVersions = [ + ...(hasDefaultBranchIndex ? [repository.branch ?? 'default branch'] : []), + ...versionRows.map((row) => row.tag) + ]; + + return { + embeddingCount: embeddingRow.count, + indexedVersions: Array.from(new Set(indexedVersions)) + }; + } + /** * Create an indexing job for a repository. * If a job is already running, returns the existing job. diff --git a/src/lib/types.ts b/src/lib/types.ts index 37da65c..236bd01 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -32,6 +32,7 @@ export type RepositoryState = 'pending' | 'indexing' | 'indexed' | 'error'; export type SnippetType = 'code' | 'info'; export type JobStatus = 'queued' | 'running' | 'done' | 'failed'; export type VersionState = 'pending' | 'indexing' | 'indexed' | 'error'; +export type EmbeddingProviderKind = 'local-transformers' | 'openai-compatible'; // --------------------------------------------------------------------------- // API / service layer types diff --git a/src/routes/api/v1/api-contract.integration.test.ts b/src/routes/api/v1/api-contract.integration.test.ts index 58c7e09..5f6bce0 100644 --- a/src/routes/api/v1/api-contract.integration.test.ts +++ b/src/routes/api/v1/api-contract.integration.test.ts @@ -34,6 +34,7 @@ vi.mock('$lib/server/embeddings/registry.js', () => ({ })); import { POST as postLibraries } from './libs/+server.js'; +import { GET as getLibraries } from './libs/+server.js'; import { GET as getLibrary } from './libs/[id]/+server.js'; import { GET as getJobs } from './jobs/+server.js'; import { GET as getJob } from './jobs/[id]/+server.js'; @@ -186,6 +187,16 @@ function seedSnippet( return snippetId; } +function seedEmbedding(client: Database.Database, snippetId: string, values: number[]): void { + client + .prepare( + `INSERT INTO snippet_embeddings + (snippet_id, profile_id, model, dimensions, embedding, created_at) + VALUES (?, 'local-default', 'Xenova/all-MiniLM-L6-v2', ?, ?, ?)` + ) + .run(snippetId, values.length, Buffer.from(Float32Array.from(values).buffer), NOW_S); +} + function seedRules(client: Database.Database, repositoryId: string, rules: string[]) { client .prepare( @@ -249,6 +260,36 @@ describe('API contract integration', () => { expect(body).not.toHaveProperty('total_snippets'); }); + it('GET /api/v1/libs includes embedding counts and indexed versions per repository', async () => { + const repositoryId = seedRepo(db); + const versionId = seedVersion(db, repositoryId, 'v18.3.0'); + const baseDocId = seedDocument(db, repositoryId); + const versionDocId = seedDocument(db, repositoryId, versionId); + const baseSnippetId = seedSnippet(db, { + documentId: baseDocId, + repositoryId, + content: 'Base branch snippet' + }); + const versionSnippetId = seedSnippet(db, { + documentId: versionDocId, + repositoryId, + versionId, + content: 'Versioned snippet' + }); + seedEmbedding(db, baseSnippetId, [1, 0]); + seedEmbedding(db, versionSnippetId, [0, 1]); + + const response = await getLibraries({ + url: new URL('http://test/api/v1/libs') + } as never); + + expect(response.status).toBe(200); + const body = await response.json(); + expect(body.libraries).toHaveLength(1); + expect(body.libraries[0].embeddingCount).toBe(2); + expect(body.libraries[0].indexedVersions).toEqual(['main', 'v18.3.0']); + }); + it('GET /api/v1/jobs and /api/v1/jobs/:id return job DTOs in camelCase', async () => { const repoService = new RepositoryService(db); repoService.add({ source: 'github', sourceUrl: 'https://github.com/facebook/react' }); diff --git a/src/routes/api/v1/context/+server.ts b/src/routes/api/v1/context/+server.ts index 784182c..1f26375 100644 --- a/src/routes/api/v1/context/+server.ts +++ b/src/routes/api/v1/context/+server.ts @@ -17,7 +17,11 @@ import { dtoJsonResponse } from '$lib/server/api/dto-response'; import { SearchService } from '$lib/server/search/search.service'; import { HybridSearchService } from '$lib/server/search/hybrid.search.service'; import { createProviderFromProfile } from '$lib/server/embeddings/registry'; -import type { EmbeddingProfile } from '$lib/server/db/schema'; +import { + EmbeddingProfileEntity, + type EmbeddingProfileEntityProps +} from '$lib/server/models/embedding-profile'; +import { EmbeddingProfileMapper } from '$lib/server/mappers/embedding-profile.mapper'; import { parseLibraryId } from '$lib/server/api/library-id'; import { selectSnippetsWithinBudget, DEFAULT_TOKEN_BUDGET } from '$lib/server/api/token-budget'; import { formatContextJson, formatContextTxt, CORS_HEADERS } from '$lib/server/api/formatters'; @@ -32,16 +36,18 @@ function getServices(db: ReturnType) { // Load the active embedding profile from the database const profileRow = db - .prepare< - [], - EmbeddingProfile - >('SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1') + .prepare<[], EmbeddingProfileEntityProps>( + 'SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1' + ) .get(); - const provider = profileRow ? createProviderFromProfile(profileRow) : null; + const profile = profileRow + ? EmbeddingProfileMapper.fromEntity(new EmbeddingProfileEntity(profileRow)) + : null; + const provider = profile ? createProviderFromProfile(profile) : null; const hybridService = new HybridSearchService(db, searchService, provider); - return { db, searchService, hybridService, profileId: profileRow?.id }; + return { db, searchService, hybridService, profileId: profile?.id }; } interface RawRepoConfig { diff --git a/src/routes/api/v1/libs/+server.ts b/src/routes/api/v1/libs/+server.ts index 8e31eee..c3db516 100644 --- a/src/routes/api/v1/libs/+server.ts +++ b/src/routes/api/v1/libs/+server.ts @@ -32,7 +32,8 @@ export const GET: RequestHandler = ({ url }) => { const enriched = libraries.map((repo) => ({ ...RepositoryMapper.toDto(repo), - versions: service.getVersions(repo.id) + versions: service.getVersions(repo.id), + ...service.getIndexSummary(repo.id) })); return json({ libraries: enriched, total, limit, offset }); diff --git a/src/routes/api/v1/libs/[id]/+server.ts b/src/routes/api/v1/libs/[id]/+server.ts index d40f0db..e13c68c 100644 --- a/src/routes/api/v1/libs/[id]/+server.ts +++ b/src/routes/api/v1/libs/[id]/+server.ts @@ -23,7 +23,7 @@ export const GET: RequestHandler = ({ params }) => { return json({ error: 'Repository not found', code: 'NOT_FOUND' }, { status: 404 }); } const versions = service.getVersions(id); - return json({ ...RepositoryMapper.toDto(repo), versions }); + return json({ ...RepositoryMapper.toDto(repo), versions, ...service.getIndexSummary(id) }); } catch (err) { return handleServiceError(err); } diff --git a/src/routes/api/v1/settings/embedding/+server.ts b/src/routes/api/v1/settings/embedding/+server.ts index fa0b5af..1374ca9 100644 --- a/src/routes/api/v1/settings/embedding/+server.ts +++ b/src/routes/api/v1/settings/embedding/+server.ts @@ -1,30 +1,25 @@ /** - * GET /api/v1/settings/embedding — retrieve all embedding profiles - * POST /api/v1/settings/embedding — create or update an embedding profile - * PUT /api/v1/settings/embedding — alias for POST (backward compat) + * GET /api/v1/settings/embedding — retrieve embedding settings + * POST /api/v1/settings/embedding — update active embedding settings + * PUT /api/v1/settings/embedding — alias for POST */ import { json } from '@sveltejs/kit'; import type { RequestHandler } from './$types'; +import type { EmbeddingSettingsUpdateDto } from '$lib/dtos/embedding-settings.js'; import { getClient } from '$lib/server/db/client'; -import { createProviderFromProfile } from '$lib/server/embeddings/registry'; -import type { EmbeddingProfile, NewEmbeddingProfile } from '$lib/server/db/schema'; +import { EmbeddingSettingsDtoMapper } from '$lib/server/mappers/embedding-settings.dto.mapper.js'; +import { EmbeddingSettingsService } from '$lib/server/services/embedding-settings.service.js'; import { handleServiceError, InvalidInputError } from '$lib/server/utils/validation'; // --------------------------------------------------------------------------- -// GET — Return all profiles +// GET — Return embedding settings // --------------------------------------------------------------------------- export const GET: RequestHandler = () => { try { - const db = getClient(); - const profiles = db - .prepare('SELECT * FROM embedding_profiles ORDER BY is_default DESC, created_at ASC') - .all() as EmbeddingProfile[]; - - // Sanitize: remove sensitive config fields like apiKey - const safeProfiles = profiles.map(sanitizeProfile); - return json({ profiles: safeProfiles }); + const service = new EmbeddingSettingsService(getClient()); + return json(EmbeddingSettingsDtoMapper.toDto(service.getSettings())); } catch (err) { return handleServiceError(err); } @@ -34,116 +29,23 @@ export const GET: RequestHandler = () => { // POST/PUT — Create or update a profile // --------------------------------------------------------------------------- -async function upsertProfile(body: unknown) { +async function upsertSettings(body: unknown) { if (typeof body !== 'object' || body === null) { throw new InvalidInputError('Request body must be a JSON object'); } - const obj = body as Record; - - // Required fields - if (typeof obj.id !== 'string' || !obj.id) { - throw new InvalidInputError('id is required'); - } - if (typeof obj.providerKind !== 'string' || !obj.providerKind) { - throw new InvalidInputError('providerKind is required'); - } - if (typeof obj.title !== 'string' || !obj.title) { - throw new InvalidInputError('title is required'); - } - if (typeof obj.model !== 'string' || !obj.model) { - throw new InvalidInputError('model is required'); - } - if (typeof obj.dimensions !== 'number') { - throw new InvalidInputError('dimensions must be a number'); - } - - const profile: NewEmbeddingProfile = { - id: obj.id, - providerKind: obj.providerKind, - title: obj.title, - enabled: typeof obj.enabled === 'boolean' ? obj.enabled : true, - isDefault: typeof obj.isDefault === 'boolean' ? obj.isDefault : false, - model: obj.model, - dimensions: obj.dimensions, - config: (obj.config as Record) ?? {}, - createdAt: Date.now(), - updatedAt: Date.now() - }; - - // Validate provider availability before persisting - const provider = createProviderFromProfile(profile as EmbeddingProfile); - const available = await provider.isAvailable(); - if (!available) { - throw new InvalidInputError( - `Could not connect to the "${profile.providerKind}" provider. Check your configuration.` - ); - } - - const db = getClient(); - - // If setting as default, clear other defaults first - if (profile.isDefault) { - db.prepare('UPDATE embedding_profiles SET is_default = 0').run(); - } - - // Upsert the profile - db.prepare( - `INSERT INTO embedding_profiles - (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - ON CONFLICT(id) DO UPDATE SET - provider_kind = excluded.provider_kind, - title = excluded.title, - enabled = excluded.enabled, - is_default = excluded.is_default, - model = excluded.model, - dimensions = excluded.dimensions, - config = excluded.config, - updated_at = excluded.updated_at` - ).run( - profile.id, - profile.providerKind, - profile.title, - profile.enabled ? 1 : 0, - profile.isDefault ? 1 : 0, - profile.model, - profile.dimensions, - JSON.stringify(profile.config), - profile.createdAt, - profile.updatedAt - ); - - const inserted = db - .prepare('SELECT * FROM embedding_profiles WHERE id = ?') - .get(profile.id) as EmbeddingProfile; - - return sanitizeProfile(inserted); + const service = new EmbeddingSettingsService(getClient()); + const settings = await service.updateSettings(body as EmbeddingSettingsUpdateDto); + return EmbeddingSettingsDtoMapper.toDto(settings); } export const POST: RequestHandler = async ({ request }) => { try { const body = await request.json(); - const profile = await upsertProfile(body); - return json(profile); + return json(await upsertSettings(body)); } catch (err) { return handleServiceError(err); } }; -// Backward compat alias export const PUT: RequestHandler = POST; - -// --------------------------------------------------------------------------- -// Sanitize — remove sensitive config fields before returning to clients -// --------------------------------------------------------------------------- - -function sanitizeProfile(profile: EmbeddingProfile): EmbeddingProfile { - const config = profile.config as Record; - if (config && config.apiKey) { - const rest = { ...config }; - delete rest.apiKey; - return { ...profile, config: rest }; - } - return profile; -} diff --git a/src/routes/api/v1/settings/embedding/server.test.ts b/src/routes/api/v1/settings/embedding/server.test.ts new file mode 100644 index 0000000..100f336 --- /dev/null +++ b/src/routes/api/v1/settings/embedding/server.test.ts @@ -0,0 +1,183 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import Database from 'better-sqlite3'; +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; + +let db: Database.Database; + +vi.mock('$lib/server/db/client', () => ({ + getClient: () => db +})); + +vi.mock('$lib/server/db/client.js', () => ({ + getClient: () => db +})); + +vi.mock('$lib/server/embeddings/registry', () => ({ + createProviderFromProfile: () => ({ + isAvailable: async () => true + }) +})); + +vi.mock('$lib/server/embeddings/registry.js', () => ({ + createProviderFromProfile: () => ({ + isAvailable: async () => true + }) +})); + +vi.mock('$lib/server/embeddings/local.provider', () => ({ + LocalEmbeddingProvider: class { + readonly model = 'Xenova/all-MiniLM-L6-v2'; + readonly dimensions = 384; + + async isAvailable() { + return true; + } + } +})); + +vi.mock('$lib/server/embeddings/local.provider.js', () => ({ + LocalEmbeddingProvider: class { + readonly model = 'Xenova/all-MiniLM-L6-v2'; + readonly dimensions = 384; + + async isAvailable() { + return true; + } + } +})); + +import { GET as getEmbeddingSettings, PUT as putEmbeddingSettings } from './+server.js'; +import { GET as getEmbeddingTest } from './test/+server.js'; + +function createTestDb(): Database.Database { + const client = new Database(':memory:'); + client.pragma('foreign_keys = ON'); + + const migrationsFolder = join(import.meta.dirname, '../../../../../lib/server/db/migrations'); + const ftsFile = join(import.meta.dirname, '../../../../../lib/server/db/fts.sql'); + + for (const migration of [ + '0000_large_master_chief.sql', + '0001_quick_nighthawk.sql', + '0002_silky_stellaris.sql' + ]) { + const statements = readFileSync(join(migrationsFolder, migration), 'utf-8') + .split('--> statement-breakpoint') + .map((statement) => statement.trim()) + .filter(Boolean); + + for (const statement of statements) { + client.exec(statement); + } + } + + client.exec(readFileSync(ftsFile, 'utf-8')); + + return client; +} + +describe('embedding settings routes', () => { + beforeEach(() => { + db = createTestDb(); + }); + + it('GET /api/v1/settings/embedding returns profile-based settings for the seeded default profile', async () => { + const response = await getEmbeddingSettings({} as never); + expect(response.status).toBe(200); + + const body = await response.json(); + expect(body.activeProfileId).toBe('local-default'); + expect(body.activeProfile).toMatchObject({ + id: 'local-default', + providerKind: 'local-transformers', + title: 'Local (Xenova/all-MiniLM-L6-v2)' + }); + expect(body.profiles).toHaveLength(1); + expect(body.profiles[0].providerKind).toBe('local-transformers'); + expect(body.profiles[0].isDefault).toBe(true); + }); + + it('PUT /api/v1/settings/embedding persists a clean profile-based OpenAI payload', async () => { + const response = await putEmbeddingSettings({ + request: new Request('http://test/api/v1/settings/embedding', { + method: 'PUT', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ + activeProfileId: 'openai-default', + profile: { + id: 'openai-default', + providerKind: 'openai-compatible', + title: 'OpenAI-compatible', + model: 'text-embedding-3-small', + dimensions: 1536, + config: { + baseUrl: 'https://api.openai.com/v1', + apiKey: 'sk-test', + model: 'text-embedding-3-small' + } + } + }) + }) + } as never); + + expect(response.status).toBe(200); + const body = await response.json(); + expect(body.activeProfileId).toBe('openai-default'); + expect(body.activeProfile).toMatchObject({ + id: 'openai-default', + providerKind: 'openai-compatible' + }); + expect(body.activeProfile.config).toEqual({ + baseUrl: 'https://api.openai.com/v1', + model: 'text-embedding-3-small' + }); + expect(body.activeProfile.configEntries).toEqual( + expect.arrayContaining([ + expect.objectContaining({ key: 'apiKey', value: '[redacted]', redacted: true }) + ]) + ); + expect(body.profiles).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + id: 'openai-default', + providerKind: 'openai-compatible', + model: 'text-embedding-3-small', + dimensions: 1536, + isDefault: true + }) + ]) + ); + + const activeProfile = db + .prepare( + 'SELECT id, provider_kind, is_default, enabled, model, dimensions FROM embedding_profiles WHERE is_default = 1 LIMIT 1' + ) + .get() as Record; + + expect(activeProfile).toMatchObject({ + id: 'openai-default', + provider_kind: 'openai-compatible', + is_default: 1, + enabled: 1, + model: 'text-embedding-3-small', + dimensions: 1536 + }); + }); + + it('GET /api/v1/settings/embedding/test checks local-provider availability directly', async () => { + const response = await getEmbeddingTest({} as never); + expect(response.status).toBe(200); + + const body = await response.json(); + expect(body).toEqual({ + available: true, + profile: { + id: 'local-default', + providerKind: 'local-transformers', + model: 'Xenova/all-MiniLM-L6-v2', + dimensions: 384 + } + }); + }); +}); \ No newline at end of file diff --git a/src/routes/api/v1/settings/embedding/test/+server.ts b/src/routes/api/v1/settings/embedding/test/+server.ts index 9f25e5d..a306883 100644 --- a/src/routes/api/v1/settings/embedding/test/+server.ts +++ b/src/routes/api/v1/settings/embedding/test/+server.ts @@ -7,35 +7,24 @@ import { json } from '@sveltejs/kit'; import type { RequestHandler } from './$types'; -import { getClient } from '$lib/server/db/client'; +import { LocalEmbeddingProvider } from '$lib/server/embeddings/local.provider'; import { createProviderFromProfile } from '$lib/server/embeddings/registry'; -import type { EmbeddingProfile } from '$lib/server/db/schema'; +import { EmbeddingProfileEntity } from '$lib/server/models/embedding-profile'; +import { EmbeddingProfileMapper } from '$lib/server/mappers/embedding-profile.mapper'; import { handleServiceError } from '$lib/server/utils/validation'; export const GET: RequestHandler = async () => { try { - const db = getClient(); - const profile = db - .prepare< - [], - EmbeddingProfile - >('SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1') - .get(); - - if (!profile) { - return json({ available: false, error: 'No active embedding profile configured' }); - } - - const provider = createProviderFromProfile(profile); + const provider = new LocalEmbeddingProvider(); const available = await provider.isAvailable(); return json({ available, profile: { - id: profile.id, - providerKind: profile.providerKind, - model: profile.model, - dimensions: profile.dimensions + id: 'local-default', + providerKind: 'local-transformers', + model: provider.model, + dimensions: provider.dimensions } }); } catch (err) { @@ -46,19 +35,43 @@ export const GET: RequestHandler = async () => { export const POST: RequestHandler = async ({ request }) => { try { const body = await request.json(); - const config = validateConfig(body); - - if (config.provider === 'none') { - throw new InvalidInputError('Cannot test the "none" provider — no backend is configured.'); + if (typeof body !== 'object' || body === null) { + throw new Error('Request body must be a JSON object'); } - const provider = createProviderFromConfig(config); + const candidate = body as Record; + if (candidate.providerKind !== 'openai-compatible') { + throw new Error('Only openai-compatible providers can be tested via this endpoint'); + } + if (typeof candidate.model !== 'string' || typeof candidate.dimensions !== 'number') { + throw new Error('model and dimensions are required'); + } + + const provider = createProviderFromProfile( + EmbeddingProfileMapper.fromEntity( + new EmbeddingProfileEntity({ + id: typeof candidate.id === 'string' ? candidate.id : 'test-openai-profile', + provider_kind: 'openai-compatible', + title: typeof candidate.title === 'string' ? candidate.title : 'Test Provider', + enabled: true, + is_default: false, + model: candidate.model, + dimensions: candidate.dimensions, + config: + typeof candidate.config === 'object' && candidate.config !== null + ? (candidate.config as Record) + : {}, + created_at: Date.now(), + updated_at: Date.now() + }) + ) + ); const available = await provider.isAvailable(); if (!available) { return new Response( JSON.stringify({ - error: `Provider "${config.provider}" is not available. Check your configuration.` + error: 'Provider is not available. Check your configuration.' }), { status: 400, headers: { 'Content-Type': 'application/json' } } ); diff --git a/src/routes/repos/[id]/+page.svelte b/src/routes/repos/[id]/+page.svelte index 39c4193..056c3ca 100644 --- a/src/routes/repos/[id]/+page.svelte +++ b/src/routes/repos/[id]/+page.svelte @@ -2,22 +2,24 @@ import { goto } from '$app/navigation'; import { resolve as resolveRoute } from '$app/paths'; import type { PageData } from './$types'; - import type { Repository, RepositoryVersion, IndexingJob } from '$lib/types'; + import type { Repository, IndexingJob } from '$lib/types'; import ConfirmDialog from '$lib/components/ConfirmDialog.svelte'; import IndexingProgress from '$lib/components/IndexingProgress.svelte'; import StatBadge from '$lib/components/StatBadge.svelte'; let { data }: { data: PageData } = $props(); - // Initialized empty; $effect syncs from data prop on every navigation/reload. - let repo = $state( - {} as Repository & { versions?: RepositoryVersion[] } + let repoOverride = $state< + (Repository & { indexedVersions?: string[]; embeddingCount?: number }) | null + >(null); + const repo = $derived( + repoOverride ?? + ((data.repo ?? {}) as Repository & { + indexedVersions?: string[]; + embeddingCount?: number; + }) ); - let recentJobs = $state([]); - $effect(() => { - if (data.repo) repo = data.repo; - recentJobs = data.recentJobs ?? []; - }); + const recentJobs = $derived((data.recentJobs ?? []) as IndexingJob[]); let showDeleteConfirm = $state(false); let activeJobId = $state(null); let errorMessage = $state(null); @@ -41,7 +43,7 @@ try { const res = await fetch(`/api/v1/libs/${encodeURIComponent(repo.id)}`); if (res.ok) { - repo = await res.json(); + repoOverride = await res.json(); } } catch { // ignore @@ -92,7 +94,8 @@ return new Date(ts as string).toLocaleString(); } - const versions = $derived(repo.versions ?? []); + const indexedVersions = $derived(repo.indexedVersions ?? []); + const embeddingCount = $derived(repo.embeddingCount ?? 0); const totalSnippets = $derived(repo.totalSnippets ?? 0); const totalTokens = $derived(repo.totalTokens ?? 0); const trustScore = $derived(repo.trustScore ?? 0); @@ -180,6 +183,7 @@
+ {#if repo.stars != null} @@ -210,31 +214,17 @@
- -{#if versions.length > 0} + +{#if indexedVersions.length > 0}

Indexed Versions

-
- {#each versions as version (version.id)} -
-
- {version.tag} - {#if version.title} - {version.title} - {/if} -
-
- - {stateLabels[version.state] ?? version.state} - - {#if version.indexedAt} - {formatDate(version.indexedAt)} - {/if} -
-
+
+ {#each indexedVersions as versionTag (versionTag)} + + {versionTag} + {/each}
diff --git a/src/routes/repos/[id]/page.server.test.ts b/src/routes/repos/[id]/page.server.test.ts index db1db3e..c25a1c9 100644 --- a/src/routes/repos/[id]/page.server.test.ts +++ b/src/routes/repos/[id]/page.server.test.ts @@ -8,7 +8,11 @@ describe('/repos/[id] page server load', () => { .mockResolvedValueOnce({ ok: true, status: 200, - json: async () => ({ id: '/facebook/react', title: 'React' }) + json: async () => ({ + id: '/facebook/react', + title: 'React', + indexedVersions: ['main', 'v18.3.0'] + }) }) .mockResolvedValueOnce({ ok: true, @@ -27,7 +31,11 @@ describe('/repos/[id] page server load', () => { '/api/v1/jobs?repositoryId=%2Ffacebook%2Freact&limit=5' ); expect(result).toEqual({ - repo: { id: '/facebook/react', title: 'React' }, + repo: { + id: '/facebook/react', + title: 'React', + indexedVersions: ['main', 'v18.3.0'] + }, recentJobs: [{ id: 'job-1', repositoryId: '/facebook/react' }] }); }); diff --git a/src/routes/settings/+page.server.ts b/src/routes/settings/+page.server.ts new file mode 100644 index 0000000..9b1f617 --- /dev/null +++ b/src/routes/settings/+page.server.ts @@ -0,0 +1,22 @@ +import type { PageServerLoad } from './$types'; +import { getClient } from '$lib/server/db/client.js'; +import { LocalEmbeddingProvider } from '$lib/server/embeddings/local.provider.js'; +import { EmbeddingSettingsDtoMapper } from '$lib/server/mappers/embedding-settings.dto.mapper.js'; +import { EmbeddingSettingsService } from '$lib/server/services/embedding-settings.service.js'; + +export const load: PageServerLoad = async () => { + const service = new EmbeddingSettingsService(getClient()); + const settings = EmbeddingSettingsDtoMapper.toDto(service.getSettings()); + + let localProviderAvailable = false; + try { + localProviderAvailable = await new LocalEmbeddingProvider().isAvailable(); + } catch { + localProviderAvailable = false; + } + + return { + settings, + localProviderAvailable + }; +}; \ No newline at end of file diff --git a/src/routes/settings/+page.svelte b/src/routes/settings/+page.svelte index 59a6836..9790bfb 100644 --- a/src/routes/settings/+page.svelte +++ b/src/routes/settings/+page.svelte @@ -1,5 +1,12 @@ @@ -175,17 +237,109 @@

Configure TrueRef embedding and indexing options

- +
+
+

Current Active Profile

+

+ This is the profile used for semantic indexing and retrieval right now. +

+ + {#if activeProfile} +
+
+
+

{activeProfile.title}

+

Profile ID: {activeProfile.id}

+
+ +
+
+
Provider
+
{activeProfile.providerKind}
+
Model
+
{activeProfile.model}
+
Dimensions
+
{activeProfile.dimensions}
+
+ +
+
Enabled
+
{activeProfile.enabled ? 'Yes' : 'No'}
+
Default
+
{activeProfile.isDefault ? 'Yes' : 'No'}
+
Updated
+
{formatTimestamp(activeProfile.updatedAt)}
+
+
+
+ +
+

Provider configuration

+

+ These are the provider-specific settings currently saved for the active profile. +

+ + {#if activeConfigEntries.length > 0} +
    + {#each activeConfigEntries as entry (entry.key)} +
  • + {entry.key} + {entry.value} +
  • + {/each} +
+ {:else} +

+ No provider-specific configuration is stored for this profile. +

+

+ For OpenAI-compatible profiles, edit the + settings in the Embedding Provider form + below. The built-in Local Model profile + does not currently expose extra configurable fields. +

+ {/if} +
+
+ {:else} +
+ Embeddings are currently disabled. Keyword search remains available, but no embedding profile is active. +
+ {/if} +
+ +
+

Profile Inventory

+

Profiles stored in the database and available for activation.

+
+ + +
+
+ {#each currentSettings.profiles as profile (profile.id)} +
+
+
+

{profile.title}

+

{profile.id}

+
+ {#if profile.id === currentSettings.activeProfileId} + Active + {/if} +
+
+ {/each} +
+
+
+

Embedding Provider

Embeddings enable semantic search. Without them, only keyword search (FTS5) is used.

- {#if loading} -

Loading current configuration…

- {:else} -
+
{#each ['none', 'openai', 'local'] as p (p)} @@ -314,9 +468,7 @@

Local ONNX model via @xenova/transformers

Model: Xenova/all-MiniLM-L6-v2 · 384 dimensions

- {#if localAvailable === null} -

Checking availability…

- {:else if localAvailable} + {#if getInitialLocalProviderAvailability()}

@xenova/transformers is installed and ready.

{:else}

@@ -381,8 +533,7 @@ {saving ? 'Saving…' : 'Save Settings'}

- - {/if} +
diff --git a/src/routes/settings/page.server.test.ts b/src/routes/settings/page.server.test.ts new file mode 100644 index 0000000..3795eb4 --- /dev/null +++ b/src/routes/settings/page.server.test.ts @@ -0,0 +1,103 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import Database from 'better-sqlite3'; +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; + +let db: Database.Database; + +vi.mock('$lib/server/db/client.js', () => ({ + getClient: () => db +})); + +vi.mock('$lib/server/embeddings/local.provider.js', () => ({ + LocalEmbeddingProvider: class { + async isAvailable() { + return true; + } + } +})); + +import { load } from './+page.server.js'; + +function createTestDb(): Database.Database { + const client = new Database(':memory:'); + client.pragma('foreign_keys = ON'); + + const migrationsFolder = join(import.meta.dirname, '../../lib/server/db/migrations'); + const ftsFile = join(import.meta.dirname, '../../lib/server/db/fts.sql'); + + for (const migration of [ + '0000_large_master_chief.sql', + '0001_quick_nighthawk.sql', + '0002_silky_stellaris.sql' + ]) { + const statements = readFileSync(join(migrationsFolder, migration), 'utf-8') + .split('--> statement-breakpoint') + .map((statement) => statement.trim()) + .filter(Boolean); + + for (const statement of statements) { + client.exec(statement); + } + } + + client.exec(readFileSync(ftsFile, 'utf-8')); + return client; +} + +describe('/settings page server load', () => { + beforeEach(() => { + db = createTestDb(); + }); + + it('returns the active profile and local provider availability', async () => { + db.prepare( + `INSERT INTO embedding_profiles + (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` + ).run( + 'openai-default', + 'openai-compatible', + 'OpenAI-compatible', + 1, + 1, + 'text-embedding-3-small', + 1536, + JSON.stringify({ + baseUrl: 'https://api.openai.com/v1', + apiKey: 'sk-test', + model: 'text-embedding-3-small' + }), + 1710000000, + 1710000000 + ); + db.prepare('UPDATE embedding_profiles SET is_default = 0 WHERE id = ?').run('local-default'); + + const result = (await load({} as never)) as { + localProviderAvailable: boolean; + settings: { + activeProfileId: string | null; + activeProfile: { + config: Record; + configEntries: Array<{ key: string; value: string; redacted: boolean }>; + } | null; + }; + }; + + expect(result.localProviderAvailable).toBe(true); + expect(result.settings.activeProfileId).toBe('openai-default'); + expect(result.settings.activeProfile).toMatchObject({ + id: 'openai-default', + providerKind: 'openai-compatible' + }); + expect(result.settings.activeProfile?.config).toEqual({ + baseUrl: 'https://api.openai.com/v1', + model: 'text-embedding-3-small' + }); + expect(result.settings.activeProfile?.configEntries).toEqual( + expect.arrayContaining([ + expect.objectContaining({ key: 'apiKey', value: '[redacted]', redacted: true }) + ]) + ); + }); +}); \ No newline at end of file