feat(EMBEDDINGS-0001): enable local embedder by default and overhaul settings page

- Wire local embedding provider as the default on startup when no profile is configured
- Refactor embedding settings into dedicated service, DTOs, mappers and models
- Rebuild settings page with profile management UI and live test feedback
- Expose index summary (indexed versions + embedding count) on repo endpoints
- Harden indexing pipeline and context search with additional test coverage

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Giancarmine Salucci
2026-03-28 09:28:01 +01:00
parent d1381f7fc0
commit 781d224adc
30 changed files with 1419 additions and 313 deletions

View File

@@ -12,6 +12,7 @@ import { join } from 'node:path';
import { JobQueue } from './job-queue.js';
import { IndexingPipeline } from './indexing.pipeline.js';
import { recoverStaleJobs } from './startup.js';
import { EmbeddingService } from '$lib/server/embeddings/embedding.service.js';
// ---------------------------------------------------------------------------
// Test DB factory
@@ -22,15 +23,21 @@ function createTestDb(): Database.Database {
client.pragma('foreign_keys = ON');
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
for (const migrationFile of [
'0000_large_master_chief.sql',
'0001_quick_nighthawk.sql',
'0002_silky_stellaris.sql'
]) {
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
const statements = migrationSql
.split('--> statement-breakpoint')
.map((s) => s.trim())
.filter(Boolean);
const statements = migrationSql
.split('--> statement-breakpoint')
.map((s) => s.trim())
.filter(Boolean);
for (const stmt of statements) {
client.exec(stmt);
for (const stmt of statements) {
client.exec(stmt);
}
}
return client;
@@ -238,7 +245,8 @@ describe('IndexingPipeline', () => {
crawlResult: {
files: Array<{ path: string; content: string; sha: string; language: string }>;
totalFiles: number;
} = { files: [], totalFiles: 0 }
} = { files: [], totalFiles: 0 },
embeddingService: EmbeddingService | null = null
) {
const mockGithubCrawl = vi.fn().mockResolvedValue({
...crawlResult,
@@ -256,7 +264,12 @@ describe('IndexingPipeline', () => {
})
};
return new IndexingPipeline(db, mockGithubCrawl as never, mockLocalCrawler as never, null);
return new IndexingPipeline(
db,
mockGithubCrawl as never,
mockLocalCrawler as never,
embeddingService
);
}
function makeJob(repositoryId = '/test/repo') {
@@ -388,6 +401,64 @@ describe('IndexingPipeline', () => {
expect(secondSnippetIds).toEqual(firstSnippetIds);
});
it('re-index backfills missing embeddings for unchanged snippets', async () => {
const provider = {
name: 'test-provider',
model: 'test-model',
dimensions: 3,
embed: vi.fn(async (texts: string[]) =>
texts.map(() => ({
values: new Float32Array([0.1, 0.2, 0.3]),
dimensions: 3,
model: 'test-model'
}))
),
isAvailable: vi.fn(async () => true)
};
const embeddingService = new EmbeddingService(db, provider, 'local-default');
const files = [
{
path: 'README.md',
content: '# Hello\n\nThis is documentation.',
sha: 'sha-readme',
language: 'markdown'
}
];
const pipeline = makePipeline({ files, totalFiles: 1 }, embeddingService);
const job1 = makeJob();
await pipeline.run(job1 as never);
const firstSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as { id: string }[])
.map((row) => row.id);
expect(firstSnippetIds.length).toBeGreaterThan(0);
const firstEmbeddingCount = (
db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as {
n: number;
}
).n;
expect(firstEmbeddingCount).toBe(firstSnippetIds.length);
db.prepare(`DELETE FROM snippet_embeddings WHERE profile_id = 'local-default'`).run();
const job2Id = insertJob(db, { repository_id: '/test/repo', status: 'queued' });
const job2 = db.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`).get(job2Id) as never;
await pipeline.run(job2);
const secondSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as {
id: string;
}[]).map((row) => row.id);
const secondEmbeddingCount = (
db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as {
n: number;
}
).n;
expect(secondSnippetIds).toEqual(firstSnippetIds);
expect(secondEmbeddingCount).toBe(firstSnippetIds.length);
});
it('replaces snippets atomically when a file changes', async () => {
const pipeline1 = makePipeline({
files: [