feat(EMBEDDINGS-0001): enable local embedder by default and overhaul settings page
- Wire local embedding provider as the default on startup when no profile is configured - Refactor embedding settings into dedicated service, DTOs, mappers and models - Rebuild settings page with profile management UI and live test feedback - Expose index summary (indexed versions + embedding count) on repo endpoints - Harden indexing pipeline and context search with additional test coverage Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,7 @@ import { join } from 'node:path';
|
||||
import { JobQueue } from './job-queue.js';
|
||||
import { IndexingPipeline } from './indexing.pipeline.js';
|
||||
import { recoverStaleJobs } from './startup.js';
|
||||
import { EmbeddingService } from '$lib/server/embeddings/embedding.service.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test DB factory
|
||||
@@ -22,15 +23,21 @@ function createTestDb(): Database.Database {
|
||||
client.pragma('foreign_keys = ON');
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
|
||||
for (const migrationFile of [
|
||||
'0000_large_master_chief.sql',
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql'
|
||||
]) {
|
||||
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
|
||||
|
||||
const statements = migrationSql
|
||||
.split('--> statement-breakpoint')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
const statements = migrationSql
|
||||
.split('--> statement-breakpoint')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
for (const stmt of statements) {
|
||||
client.exec(stmt);
|
||||
for (const stmt of statements) {
|
||||
client.exec(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
return client;
|
||||
@@ -238,7 +245,8 @@ describe('IndexingPipeline', () => {
|
||||
crawlResult: {
|
||||
files: Array<{ path: string; content: string; sha: string; language: string }>;
|
||||
totalFiles: number;
|
||||
} = { files: [], totalFiles: 0 }
|
||||
} = { files: [], totalFiles: 0 },
|
||||
embeddingService: EmbeddingService | null = null
|
||||
) {
|
||||
const mockGithubCrawl = vi.fn().mockResolvedValue({
|
||||
...crawlResult,
|
||||
@@ -256,7 +264,12 @@ describe('IndexingPipeline', () => {
|
||||
})
|
||||
};
|
||||
|
||||
return new IndexingPipeline(db, mockGithubCrawl as never, mockLocalCrawler as never, null);
|
||||
return new IndexingPipeline(
|
||||
db,
|
||||
mockGithubCrawl as never,
|
||||
mockLocalCrawler as never,
|
||||
embeddingService
|
||||
);
|
||||
}
|
||||
|
||||
function makeJob(repositoryId = '/test/repo') {
|
||||
@@ -388,6 +401,64 @@ describe('IndexingPipeline', () => {
|
||||
expect(secondSnippetIds).toEqual(firstSnippetIds);
|
||||
});
|
||||
|
||||
it('re-index backfills missing embeddings for unchanged snippets', async () => {
|
||||
const provider = {
|
||||
name: 'test-provider',
|
||||
model: 'test-model',
|
||||
dimensions: 3,
|
||||
embed: vi.fn(async (texts: string[]) =>
|
||||
texts.map(() => ({
|
||||
values: new Float32Array([0.1, 0.2, 0.3]),
|
||||
dimensions: 3,
|
||||
model: 'test-model'
|
||||
}))
|
||||
),
|
||||
isAvailable: vi.fn(async () => true)
|
||||
};
|
||||
const embeddingService = new EmbeddingService(db, provider, 'local-default');
|
||||
const files = [
|
||||
{
|
||||
path: 'README.md',
|
||||
content: '# Hello\n\nThis is documentation.',
|
||||
sha: 'sha-readme',
|
||||
language: 'markdown'
|
||||
}
|
||||
];
|
||||
|
||||
const pipeline = makePipeline({ files, totalFiles: 1 }, embeddingService);
|
||||
const job1 = makeJob();
|
||||
await pipeline.run(job1 as never);
|
||||
|
||||
const firstSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as { id: string }[])
|
||||
.map((row) => row.id);
|
||||
expect(firstSnippetIds.length).toBeGreaterThan(0);
|
||||
|
||||
const firstEmbeddingCount = (
|
||||
db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as {
|
||||
n: number;
|
||||
}
|
||||
).n;
|
||||
expect(firstEmbeddingCount).toBe(firstSnippetIds.length);
|
||||
|
||||
db.prepare(`DELETE FROM snippet_embeddings WHERE profile_id = 'local-default'`).run();
|
||||
|
||||
const job2Id = insertJob(db, { repository_id: '/test/repo', status: 'queued' });
|
||||
const job2 = db.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`).get(job2Id) as never;
|
||||
await pipeline.run(job2);
|
||||
|
||||
const secondSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as {
|
||||
id: string;
|
||||
}[]).map((row) => row.id);
|
||||
const secondEmbeddingCount = (
|
||||
db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as {
|
||||
n: number;
|
||||
}
|
||||
).n;
|
||||
|
||||
expect(secondSnippetIds).toEqual(firstSnippetIds);
|
||||
expect(secondEmbeddingCount).toBe(firstSnippetIds.length);
|
||||
});
|
||||
|
||||
it('replaces snippets atomically when a file changes', async () => {
|
||||
const pipeline1 = makePipeline({
|
||||
files: [
|
||||
|
||||
@@ -187,20 +187,28 @@ export class IndexingPipeline {
|
||||
this.replaceSnippets(repo.id, changedDocIds, newDocuments, newSnippets);
|
||||
|
||||
// ---- Stage 4: Embeddings (if provider is configured) ----------------
|
||||
if (this.embeddingService && newSnippets.length > 0) {
|
||||
const snippetIds = newSnippets.map((s) => s.id!);
|
||||
if (this.embeddingService) {
|
||||
const snippetIds = this.embeddingService.findSnippetIdsMissingEmbeddings(
|
||||
repo.id,
|
||||
normJob.versionId
|
||||
);
|
||||
|
||||
if (snippetIds.length === 0) {
|
||||
// No missing embeddings for the active profile; parsing progress is final.
|
||||
} else {
|
||||
const embeddingsTotal = snippetIds.length;
|
||||
|
||||
await this.embeddingService.embedSnippets(snippetIds, (done) => {
|
||||
const progress = calculateProgress(
|
||||
processedFiles,
|
||||
totalFiles,
|
||||
done,
|
||||
embeddingsTotal,
|
||||
true
|
||||
);
|
||||
this.updateJob(job.id, { progress });
|
||||
});
|
||||
await this.embeddingService.embedSnippets(snippetIds, (done) => {
|
||||
const progress = calculateProgress(
|
||||
processedFiles,
|
||||
totalFiles,
|
||||
done,
|
||||
embeddingsTotal,
|
||||
true
|
||||
);
|
||||
this.updateJob(job.id, { progress });
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Stage 5: Update repository stats --------------------------------
|
||||
|
||||
Reference in New Issue
Block a user