feat(EMBEDDINGS-0001): enable local embedder by default and overhaul settings page
- Wire local embedding provider as the default on startup when no profile is configured - Refactor embedding settings into dedicated service, DTOs, mappers and models - Rebuild settings page with profile management UI and live test feedback - Expose index summary (indexed versions + embedding count) on repo endpoints - Harden indexing pipeline and context search with additional test coverage Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -408,6 +408,36 @@ describe('EmbeddingService', () => {
|
||||
expect(embedding![2]).toBeCloseTo(0.2, 5);
|
||||
});
|
||||
|
||||
it('stores embeddings under the configured profile ID', async () => {
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO embedding_profiles
|
||||
(id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, unixepoch(), unixepoch())`
|
||||
)
|
||||
.run(
|
||||
'openai-custom',
|
||||
'openai-compatible',
|
||||
'OpenAI Custom',
|
||||
1,
|
||||
0,
|
||||
'test-model',
|
||||
4,
|
||||
'{}'
|
||||
);
|
||||
|
||||
const snippetId = seedSnippet(db, client);
|
||||
const provider = makeProvider(4, 'test-model');
|
||||
const service = new EmbeddingService(client, provider, 'openai-custom');
|
||||
|
||||
await service.embedSnippets([snippetId]);
|
||||
|
||||
const row = client
|
||||
.prepare('SELECT profile_id FROM snippet_embeddings WHERE snippet_id = ?')
|
||||
.get(snippetId) as { profile_id: string };
|
||||
expect(row.profile_id).toBe('openai-custom');
|
||||
});
|
||||
|
||||
it('is idempotent — re-embedding replaces the existing row', async () => {
|
||||
const snippetId = seedSnippet(db, client);
|
||||
const provider = makeProvider(2);
|
||||
@@ -469,6 +499,19 @@ describe('EmbeddingService', () => {
|
||||
};
|
||||
expect(rows.cnt).toBe(0);
|
||||
});
|
||||
|
||||
it('finds snippets missing embeddings for the active profile', async () => {
|
||||
const firstSnippetId = seedSnippet(db, client);
|
||||
const secondSnippetId = seedSnippet(db, client, { content: 'Second snippet content' });
|
||||
const provider = makeProvider(4);
|
||||
const service = new EmbeddingService(client, provider, 'local-default');
|
||||
|
||||
await service.embedSnippets([firstSnippetId]);
|
||||
|
||||
expect(service.findSnippetIdsMissingEmbeddings('/test/embed-repo', null)).toEqual([
|
||||
secondSnippetId
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -23,6 +23,42 @@ export class EmbeddingService {
|
||||
private readonly profileId: string = 'local-default'
|
||||
) {}
|
||||
|
||||
findSnippetIdsMissingEmbeddings(repositoryId: string, versionId: string | null): string[] {
|
||||
if (versionId) {
|
||||
const rows = this.db
|
||||
.prepare<[string, string, string], { id: string }>(
|
||||
`SELECT snippets.id
|
||||
FROM snippets
|
||||
LEFT JOIN snippet_embeddings
|
||||
ON snippet_embeddings.snippet_id = snippets.id
|
||||
AND snippet_embeddings.profile_id = ?
|
||||
WHERE snippets.repository_id = ?
|
||||
AND snippets.version_id = ?
|
||||
AND snippet_embeddings.snippet_id IS NULL
|
||||
ORDER BY snippets.id`
|
||||
)
|
||||
.all(this.profileId, repositoryId, versionId);
|
||||
|
||||
return rows.map((row) => row.id);
|
||||
}
|
||||
|
||||
const rows = this.db
|
||||
.prepare<[string, string], { id: string }>(
|
||||
`SELECT snippets.id
|
||||
FROM snippets
|
||||
LEFT JOIN snippet_embeddings
|
||||
ON snippet_embeddings.snippet_id = snippets.id
|
||||
AND snippet_embeddings.profile_id = ?
|
||||
WHERE snippets.repository_id = ?
|
||||
AND snippets.version_id IS NULL
|
||||
AND snippet_embeddings.snippet_id IS NULL
|
||||
ORDER BY snippets.id`
|
||||
)
|
||||
.all(this.profileId, repositoryId);
|
||||
|
||||
return rows.map((row) => row.id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Embed the given snippet IDs and store the results in snippet_embeddings.
|
||||
*
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
/**
|
||||
* LocalEmbeddingProvider — uses @xenova/transformers (optional dependency).
|
||||
* LocalEmbeddingProvider — uses @xenova/transformers via dynamic import.
|
||||
*
|
||||
* @xenova/transformers is NOT installed by default. This provider uses a
|
||||
* dynamic import so the module is only required at runtime when the local
|
||||
* provider is actually configured. If the package is absent, isAvailable()
|
||||
* returns false and embed() throws a clear error.
|
||||
* The dynamic import keeps server startup cheap and defers loading the model
|
||||
* runtime until the local provider is actually used. If the package is absent
|
||||
* or cannot be resolved, isAvailable() returns false and embed() throws a
|
||||
* clear error.
|
||||
*/
|
||||
|
||||
import { EmbeddingError, type EmbeddingProvider, type EmbeddingVector } from './provider.js';
|
||||
|
||||
@@ -44,11 +44,12 @@ export function createProviderFromProfile(profile: EmbeddingProfile): EmbeddingP
|
||||
*/
|
||||
export function getDefaultLocalProfile(): Pick<
|
||||
EmbeddingProfile,
|
||||
'id' | 'providerKind' | 'model' | 'dimensions'
|
||||
'id' | 'providerKind' | 'title' | 'model' | 'dimensions'
|
||||
> {
|
||||
return {
|
||||
id: 'local-default',
|
||||
providerKind: 'local-transformers',
|
||||
title: 'Local (Xenova/all-MiniLM-L6-v2)',
|
||||
model: 'Xenova/all-MiniLM-L6-v2',
|
||||
dimensions: 384
|
||||
};
|
||||
|
||||
38
src/lib/server/mappers/embedding-profile.mapper.ts
Normal file
38
src/lib/server/mappers/embedding-profile.mapper.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import {
|
||||
EmbeddingProfile,
|
||||
EmbeddingProfileEntity
|
||||
} from '$lib/server/models/embedding-profile.js';
|
||||
|
||||
function parseConfig(config: Record<string, unknown> | string | null): Record<string, unknown> {
|
||||
if (!config) {
|
||||
return {};
|
||||
}
|
||||
|
||||
if (typeof config === 'string') {
|
||||
try {
|
||||
const parsed = JSON.parse(config);
|
||||
return parsed && typeof parsed === 'object' ? (parsed as Record<string, unknown>) : {};
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
export class EmbeddingProfileMapper {
|
||||
static fromEntity(entity: EmbeddingProfileEntity): EmbeddingProfile {
|
||||
return new EmbeddingProfile({
|
||||
id: entity.id,
|
||||
providerKind: entity.provider_kind,
|
||||
title: entity.title,
|
||||
enabled: Boolean(entity.enabled),
|
||||
isDefault: Boolean(entity.is_default),
|
||||
model: entity.model,
|
||||
dimensions: entity.dimensions,
|
||||
config: parseConfig(entity.config),
|
||||
createdAt: entity.created_at,
|
||||
updatedAt: entity.updated_at
|
||||
});
|
||||
}
|
||||
}
|
||||
71
src/lib/server/mappers/embedding-settings.dto.mapper.ts
Normal file
71
src/lib/server/mappers/embedding-settings.dto.mapper.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
import type {
|
||||
EmbeddingProfileConfigEntryDto,
|
||||
EmbeddingProfileDto,
|
||||
EmbeddingSettingsDto
|
||||
} from '$lib/dtos/embedding-settings.js';
|
||||
import type { EmbeddingProfile } from '$lib/server/models/embedding-profile.js';
|
||||
import { EmbeddingSettings } from '$lib/server/models/embedding-settings.js';
|
||||
|
||||
const REDACTED_VALUE = '[redacted]';
|
||||
const SENSITIVE_CONFIG_KEY = /(api[-_]?key|token|secret|password|authorization)/i;
|
||||
|
||||
function formatConfigValue(value: unknown): string {
|
||||
if (value === null || value === undefined) return 'null';
|
||||
if (typeof value === 'string') return value;
|
||||
if (typeof value === 'number' || typeof value === 'boolean') return String(value);
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
|
||||
function sanitizeConfig(config: Record<string, unknown>): {
|
||||
visibleConfig: Record<string, unknown>;
|
||||
configEntries: EmbeddingProfileConfigEntryDto[];
|
||||
} {
|
||||
const visibleConfig: Record<string, unknown> = {};
|
||||
const configEntries = Object.entries(config)
|
||||
.sort(([left], [right]) => left.localeCompare(right))
|
||||
.map(([key, value]) => {
|
||||
const redacted = SENSITIVE_CONFIG_KEY.test(key);
|
||||
if (!redacted) {
|
||||
visibleConfig[key] = value;
|
||||
}
|
||||
|
||||
return {
|
||||
key,
|
||||
value: redacted ? REDACTED_VALUE : formatConfigValue(value),
|
||||
redacted
|
||||
};
|
||||
});
|
||||
|
||||
return { visibleConfig, configEntries };
|
||||
}
|
||||
|
||||
function toProfileDto(profile: EmbeddingProfile): EmbeddingProfileDto {
|
||||
const { visibleConfig, configEntries } = sanitizeConfig(profile.config);
|
||||
|
||||
return {
|
||||
id: profile.id,
|
||||
providerKind: profile.providerKind,
|
||||
title: profile.title,
|
||||
enabled: profile.enabled,
|
||||
isDefault: profile.isDefault,
|
||||
model: profile.model,
|
||||
dimensions: profile.dimensions,
|
||||
config: visibleConfig,
|
||||
configEntries,
|
||||
createdAt: profile.createdAt,
|
||||
updatedAt: profile.updatedAt
|
||||
};
|
||||
}
|
||||
|
||||
export class EmbeddingSettingsDtoMapper {
|
||||
static toDto(settings: EmbeddingSettings): EmbeddingSettingsDto {
|
||||
const profiles = settings.profiles.map(toProfileDto);
|
||||
const activeProfile = settings.activeProfile ? toProfileDto(settings.activeProfile) : null;
|
||||
|
||||
return {
|
||||
profiles,
|
||||
activeProfileId: settings.activeProfileId,
|
||||
activeProfile
|
||||
};
|
||||
}
|
||||
}
|
||||
77
src/lib/server/models/embedding-profile.ts
Normal file
77
src/lib/server/models/embedding-profile.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
export interface EmbeddingProfileEntityProps {
|
||||
id: string;
|
||||
provider_kind: string;
|
||||
title: string;
|
||||
enabled: boolean | number;
|
||||
is_default: boolean | number;
|
||||
model: string;
|
||||
dimensions: number;
|
||||
config: Record<string, unknown> | string | null;
|
||||
created_at: number;
|
||||
updated_at: number;
|
||||
}
|
||||
|
||||
export class EmbeddingProfileEntity {
|
||||
id: string;
|
||||
provider_kind: string;
|
||||
title: string;
|
||||
enabled: boolean | number;
|
||||
is_default: boolean | number;
|
||||
model: string;
|
||||
dimensions: number;
|
||||
config: Record<string, unknown> | string | null;
|
||||
created_at: number;
|
||||
updated_at: number;
|
||||
|
||||
constructor(props: EmbeddingProfileEntityProps) {
|
||||
this.id = props.id;
|
||||
this.provider_kind = props.provider_kind;
|
||||
this.title = props.title;
|
||||
this.enabled = props.enabled;
|
||||
this.is_default = props.is_default;
|
||||
this.model = props.model;
|
||||
this.dimensions = props.dimensions;
|
||||
this.config = props.config;
|
||||
this.created_at = props.created_at;
|
||||
this.updated_at = props.updated_at;
|
||||
}
|
||||
}
|
||||
|
||||
export interface EmbeddingProfileProps {
|
||||
id: string;
|
||||
providerKind: string;
|
||||
title: string;
|
||||
enabled: boolean;
|
||||
isDefault: boolean;
|
||||
model: string;
|
||||
dimensions: number;
|
||||
config: Record<string, unknown>;
|
||||
createdAt: number;
|
||||
updatedAt: number;
|
||||
}
|
||||
|
||||
export class EmbeddingProfile {
|
||||
id: string;
|
||||
providerKind: string;
|
||||
title: string;
|
||||
enabled: boolean;
|
||||
isDefault: boolean;
|
||||
model: string;
|
||||
dimensions: number;
|
||||
config: Record<string, unknown>;
|
||||
createdAt: number;
|
||||
updatedAt: number;
|
||||
|
||||
constructor(props: EmbeddingProfileProps) {
|
||||
this.id = props.id;
|
||||
this.providerKind = props.providerKind;
|
||||
this.title = props.title;
|
||||
this.enabled = props.enabled;
|
||||
this.isDefault = props.isDefault;
|
||||
this.model = props.model;
|
||||
this.dimensions = props.dimensions;
|
||||
this.config = props.config;
|
||||
this.createdAt = props.createdAt;
|
||||
this.updatedAt = props.updatedAt;
|
||||
}
|
||||
}
|
||||
20
src/lib/server/models/embedding-settings.ts
Normal file
20
src/lib/server/models/embedding-settings.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
import type { EmbeddingProfile } from './embedding-profile.js';
|
||||
|
||||
export interface EmbeddingSettingsProps {
|
||||
profiles: EmbeddingProfile[];
|
||||
activeProfile: EmbeddingProfile | null;
|
||||
}
|
||||
|
||||
export class EmbeddingSettings {
|
||||
profiles: EmbeddingProfile[];
|
||||
activeProfile: EmbeddingProfile | null;
|
||||
|
||||
constructor(props: EmbeddingSettingsProps) {
|
||||
this.profiles = props.profiles;
|
||||
this.activeProfile = props.activeProfile;
|
||||
}
|
||||
|
||||
get activeProfileId(): string | null {
|
||||
return this.activeProfile?.id ?? null;
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import { join } from 'node:path';
|
||||
import { JobQueue } from './job-queue.js';
|
||||
import { IndexingPipeline } from './indexing.pipeline.js';
|
||||
import { recoverStaleJobs } from './startup.js';
|
||||
import { EmbeddingService } from '$lib/server/embeddings/embedding.service.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test DB factory
|
||||
@@ -22,15 +23,21 @@ function createTestDb(): Database.Database {
|
||||
client.pragma('foreign_keys = ON');
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
|
||||
for (const migrationFile of [
|
||||
'0000_large_master_chief.sql',
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql'
|
||||
]) {
|
||||
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
|
||||
|
||||
const statements = migrationSql
|
||||
.split('--> statement-breakpoint')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
const statements = migrationSql
|
||||
.split('--> statement-breakpoint')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
for (const stmt of statements) {
|
||||
client.exec(stmt);
|
||||
for (const stmt of statements) {
|
||||
client.exec(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
return client;
|
||||
@@ -238,7 +245,8 @@ describe('IndexingPipeline', () => {
|
||||
crawlResult: {
|
||||
files: Array<{ path: string; content: string; sha: string; language: string }>;
|
||||
totalFiles: number;
|
||||
} = { files: [], totalFiles: 0 }
|
||||
} = { files: [], totalFiles: 0 },
|
||||
embeddingService: EmbeddingService | null = null
|
||||
) {
|
||||
const mockGithubCrawl = vi.fn().mockResolvedValue({
|
||||
...crawlResult,
|
||||
@@ -256,7 +264,12 @@ describe('IndexingPipeline', () => {
|
||||
})
|
||||
};
|
||||
|
||||
return new IndexingPipeline(db, mockGithubCrawl as never, mockLocalCrawler as never, null);
|
||||
return new IndexingPipeline(
|
||||
db,
|
||||
mockGithubCrawl as never,
|
||||
mockLocalCrawler as never,
|
||||
embeddingService
|
||||
);
|
||||
}
|
||||
|
||||
function makeJob(repositoryId = '/test/repo') {
|
||||
@@ -388,6 +401,64 @@ describe('IndexingPipeline', () => {
|
||||
expect(secondSnippetIds).toEqual(firstSnippetIds);
|
||||
});
|
||||
|
||||
it('re-index backfills missing embeddings for unchanged snippets', async () => {
|
||||
const provider = {
|
||||
name: 'test-provider',
|
||||
model: 'test-model',
|
||||
dimensions: 3,
|
||||
embed: vi.fn(async (texts: string[]) =>
|
||||
texts.map(() => ({
|
||||
values: new Float32Array([0.1, 0.2, 0.3]),
|
||||
dimensions: 3,
|
||||
model: 'test-model'
|
||||
}))
|
||||
),
|
||||
isAvailable: vi.fn(async () => true)
|
||||
};
|
||||
const embeddingService = new EmbeddingService(db, provider, 'local-default');
|
||||
const files = [
|
||||
{
|
||||
path: 'README.md',
|
||||
content: '# Hello\n\nThis is documentation.',
|
||||
sha: 'sha-readme',
|
||||
language: 'markdown'
|
||||
}
|
||||
];
|
||||
|
||||
const pipeline = makePipeline({ files, totalFiles: 1 }, embeddingService);
|
||||
const job1 = makeJob();
|
||||
await pipeline.run(job1 as never);
|
||||
|
||||
const firstSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as { id: string }[])
|
||||
.map((row) => row.id);
|
||||
expect(firstSnippetIds.length).toBeGreaterThan(0);
|
||||
|
||||
const firstEmbeddingCount = (
|
||||
db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as {
|
||||
n: number;
|
||||
}
|
||||
).n;
|
||||
expect(firstEmbeddingCount).toBe(firstSnippetIds.length);
|
||||
|
||||
db.prepare(`DELETE FROM snippet_embeddings WHERE profile_id = 'local-default'`).run();
|
||||
|
||||
const job2Id = insertJob(db, { repository_id: '/test/repo', status: 'queued' });
|
||||
const job2 = db.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`).get(job2Id) as never;
|
||||
await pipeline.run(job2);
|
||||
|
||||
const secondSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as {
|
||||
id: string;
|
||||
}[]).map((row) => row.id);
|
||||
const secondEmbeddingCount = (
|
||||
db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as {
|
||||
n: number;
|
||||
}
|
||||
).n;
|
||||
|
||||
expect(secondSnippetIds).toEqual(firstSnippetIds);
|
||||
expect(secondEmbeddingCount).toBe(firstSnippetIds.length);
|
||||
});
|
||||
|
||||
it('replaces snippets atomically when a file changes', async () => {
|
||||
const pipeline1 = makePipeline({
|
||||
files: [
|
||||
|
||||
@@ -187,20 +187,28 @@ export class IndexingPipeline {
|
||||
this.replaceSnippets(repo.id, changedDocIds, newDocuments, newSnippets);
|
||||
|
||||
// ---- Stage 4: Embeddings (if provider is configured) ----------------
|
||||
if (this.embeddingService && newSnippets.length > 0) {
|
||||
const snippetIds = newSnippets.map((s) => s.id!);
|
||||
if (this.embeddingService) {
|
||||
const snippetIds = this.embeddingService.findSnippetIdsMissingEmbeddings(
|
||||
repo.id,
|
||||
normJob.versionId
|
||||
);
|
||||
|
||||
if (snippetIds.length === 0) {
|
||||
// No missing embeddings for the active profile; parsing progress is final.
|
||||
} else {
|
||||
const embeddingsTotal = snippetIds.length;
|
||||
|
||||
await this.embeddingService.embedSnippets(snippetIds, (done) => {
|
||||
const progress = calculateProgress(
|
||||
processedFiles,
|
||||
totalFiles,
|
||||
done,
|
||||
embeddingsTotal,
|
||||
true
|
||||
);
|
||||
this.updateJob(job.id, { progress });
|
||||
});
|
||||
await this.embeddingService.embedSnippets(snippetIds, (done) => {
|
||||
const progress = calculateProgress(
|
||||
processedFiles,
|
||||
totalFiles,
|
||||
done,
|
||||
embeddingsTotal,
|
||||
true
|
||||
);
|
||||
this.updateJob(job.id, { progress });
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Stage 5: Update repository stats --------------------------------
|
||||
|
||||
131
src/lib/server/services/embedding-settings.service.ts
Normal file
131
src/lib/server/services/embedding-settings.service.ts
Normal file
@@ -0,0 +1,131 @@
|
||||
import type Database from 'better-sqlite3';
|
||||
import type { EmbeddingSettingsUpdateDto } from '$lib/dtos/embedding-settings.js';
|
||||
import { createProviderFromProfile, getDefaultLocalProfile } from '$lib/server/embeddings/registry.js';
|
||||
import { EmbeddingProfileMapper } from '$lib/server/mappers/embedding-profile.mapper.js';
|
||||
import { EmbeddingProfile, EmbeddingProfileEntity } from '$lib/server/models/embedding-profile.js';
|
||||
import { EmbeddingSettings } from '$lib/server/models/embedding-settings.js';
|
||||
import { InvalidInputError } from '$lib/server/utils/validation.js';
|
||||
|
||||
export class EmbeddingSettingsService {
|
||||
constructor(private readonly db: Database.Database) {}
|
||||
|
||||
getSettings(): EmbeddingSettings {
|
||||
const profiles = this.loadProfiles();
|
||||
const activeProfile = profiles.find((profile) => profile.isDefault && profile.enabled) ?? null;
|
||||
|
||||
return new EmbeddingSettings({ profiles, activeProfile });
|
||||
}
|
||||
|
||||
async updateSettings(input: EmbeddingSettingsUpdateDto): Promise<EmbeddingSettings> {
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
|
||||
this.db.prepare('UPDATE embedding_profiles SET is_default = 0, updated_at = ?').run(now);
|
||||
|
||||
if (input.activeProfileId === null) {
|
||||
return this.getSettings();
|
||||
}
|
||||
|
||||
const profile =
|
||||
input.activeProfileId === 'local-default'
|
||||
? this.buildDefaultLocalProfile(now)
|
||||
: this.buildCustomProfile(input, now);
|
||||
|
||||
const available = await createProviderFromProfile(profile).isAvailable();
|
||||
if (!available) {
|
||||
throw new InvalidInputError(
|
||||
`Could not connect to the "${profile.providerKind}" provider. Check your configuration.`
|
||||
);
|
||||
}
|
||||
|
||||
this.persistProfile(profile);
|
||||
return this.getSettings();
|
||||
}
|
||||
|
||||
private loadProfiles(): EmbeddingProfile[] {
|
||||
return this.db
|
||||
.prepare('SELECT * FROM embedding_profiles ORDER BY is_default DESC, created_at ASC')
|
||||
.all()
|
||||
.map((row) => EmbeddingProfileMapper.fromEntity(new EmbeddingProfileEntity(row as never)));
|
||||
}
|
||||
|
||||
private buildDefaultLocalProfile(now: number): EmbeddingProfile {
|
||||
const defaultLocal = getDefaultLocalProfile();
|
||||
|
||||
return new EmbeddingProfile({
|
||||
id: defaultLocal.id,
|
||||
providerKind: defaultLocal.providerKind,
|
||||
title: defaultLocal.title,
|
||||
enabled: true,
|
||||
isDefault: true,
|
||||
model: defaultLocal.model,
|
||||
dimensions: defaultLocal.dimensions,
|
||||
config: {},
|
||||
createdAt: this.getCreatedAt(defaultLocal.id, now),
|
||||
updatedAt: now
|
||||
});
|
||||
}
|
||||
|
||||
private buildCustomProfile(input: EmbeddingSettingsUpdateDto, now: number): EmbeddingProfile {
|
||||
const candidate = input.profile;
|
||||
if (!candidate) {
|
||||
throw new InvalidInputError('profile is required for custom embedding providers');
|
||||
}
|
||||
if (candidate.id !== input.activeProfileId) {
|
||||
throw new InvalidInputError('activeProfileId must match profile.id');
|
||||
}
|
||||
if (!candidate.title || !candidate.model) {
|
||||
throw new InvalidInputError('profile title and model are required');
|
||||
}
|
||||
|
||||
return new EmbeddingProfile({
|
||||
id: candidate.id,
|
||||
providerKind: candidate.providerKind,
|
||||
title: candidate.title,
|
||||
enabled: true,
|
||||
isDefault: true,
|
||||
model: candidate.model,
|
||||
dimensions: candidate.dimensions,
|
||||
config: candidate.config,
|
||||
createdAt: this.getCreatedAt(candidate.id, now),
|
||||
updatedAt: now
|
||||
});
|
||||
}
|
||||
|
||||
private getCreatedAt(id: string, fallback: number): number {
|
||||
return (
|
||||
this.db
|
||||
.prepare<[string], { created_at: number }>('SELECT created_at FROM embedding_profiles WHERE id = ?')
|
||||
.get(id)?.created_at ?? fallback
|
||||
);
|
||||
}
|
||||
|
||||
private persistProfile(profile: EmbeddingProfile): void {
|
||||
this.db
|
||||
.prepare(
|
||||
`INSERT INTO embedding_profiles
|
||||
(id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(id) DO UPDATE SET
|
||||
provider_kind = excluded.provider_kind,
|
||||
title = excluded.title,
|
||||
enabled = excluded.enabled,
|
||||
is_default = excluded.is_default,
|
||||
model = excluded.model,
|
||||
dimensions = excluded.dimensions,
|
||||
config = excluded.config,
|
||||
updated_at = excluded.updated_at`
|
||||
)
|
||||
.run(
|
||||
profile.id,
|
||||
profile.providerKind,
|
||||
profile.title,
|
||||
profile.enabled ? 1 : 0,
|
||||
profile.isDefault ? 1 : 0,
|
||||
profile.model,
|
||||
profile.dimensions,
|
||||
JSON.stringify(profile.config),
|
||||
profile.createdAt,
|
||||
profile.updatedAt
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -27,16 +27,20 @@ function createTestDb(): Database.Database {
|
||||
client.pragma('foreign_keys = ON');
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
|
||||
|
||||
// Drizzle migration files use `--> statement-breakpoint` as separator.
|
||||
const statements = migrationSql
|
||||
.split('--> statement-breakpoint')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
for (const migration of [
|
||||
'0000_large_master_chief.sql',
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql'
|
||||
]) {
|
||||
const statements = readFileSync(join(migrationsFolder, migration), 'utf-8')
|
||||
.split('--> statement-breakpoint')
|
||||
.map((statement) => statement.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
for (const stmt of statements) {
|
||||
client.exec(stmt);
|
||||
for (const statement of statements) {
|
||||
client.exec(statement);
|
||||
}
|
||||
}
|
||||
|
||||
return client;
|
||||
@@ -408,6 +412,83 @@ describe('RepositoryService.getVersions()', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getIndexSummary()
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('RepositoryService.getIndexSummary()', () => {
|
||||
let client: Database.Database;
|
||||
let service: RepositoryService;
|
||||
|
||||
beforeEach(() => {
|
||||
client = createTestDb();
|
||||
service = makeService(client);
|
||||
service.add({ source: 'github', sourceUrl: 'https://github.com/facebook/react', branch: 'main' });
|
||||
});
|
||||
|
||||
it('returns embedding counts and indexed version labels', () => {
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
const docId = crypto.randomUUID();
|
||||
const versionDocId = crypto.randomUUID();
|
||||
const snippetId = crypto.randomUUID();
|
||||
const versionSnippetId = crypto.randomUUID();
|
||||
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO repository_versions (id, repository_id, tag, state, created_at)
|
||||
VALUES (?, '/facebook/react', ?, 'indexed', ?)`
|
||||
)
|
||||
.run('/facebook/react/v18.3.0', 'v18.3.0', now);
|
||||
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, '/facebook/react', NULL, 'README.md', 'base', ?)`
|
||||
)
|
||||
.run(docId, now);
|
||||
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, '/facebook/react', ?, 'README.md', 'version', ?)`
|
||||
)
|
||||
.run(versionDocId, '/facebook/react/v18.3.0', now);
|
||||
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
|
||||
VALUES (?, ?, '/facebook/react', NULL, 'info', 'base snippet', ?)`
|
||||
)
|
||||
.run(snippetId, docId, now);
|
||||
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
|
||||
VALUES (?, ?, '/facebook/react', ?, 'info', 'version snippet', ?)`
|
||||
)
|
||||
.run(versionSnippetId, versionDocId, '/facebook/react/v18.3.0', now);
|
||||
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, 'local-default', 'Xenova/all-MiniLM-L6-v2', 2, ?, ?)`
|
||||
)
|
||||
.run(snippetId, Buffer.from(Float32Array.from([1, 0]).buffer), now);
|
||||
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, 'local-default', 'Xenova/all-MiniLM-L6-v2', 2, ?, ?)`
|
||||
)
|
||||
.run(versionSnippetId, Buffer.from(Float32Array.from([0, 1]).buffer), now);
|
||||
|
||||
expect(service.getIndexSummary('/facebook/react')).toEqual({
|
||||
embeddingCount: 2,
|
||||
indexedVersions: ['main', 'v18.3.0']
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// createIndexingJob()
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -39,6 +39,11 @@ export interface RepositoryStats {
|
||||
lastIndexedAt: Date | null;
|
||||
}
|
||||
|
||||
export interface RepositoryIndexSummary {
|
||||
embeddingCount: number;
|
||||
indexedVersions: string[];
|
||||
}
|
||||
|
||||
export class RepositoryService {
|
||||
constructor(private readonly db: Database.Database) {}
|
||||
|
||||
@@ -266,6 +271,49 @@ export class RepositoryService {
|
||||
return rows.map((r) => r.tag);
|
||||
}
|
||||
|
||||
getIndexSummary(repositoryId: string): RepositoryIndexSummary {
|
||||
const repository = this.get(repositoryId);
|
||||
if (!repository) throw new NotFoundError(`Repository ${repositoryId} not found`);
|
||||
|
||||
const embeddingRow = this.db
|
||||
.prepare(
|
||||
`SELECT COUNT(*) AS count
|
||||
FROM snippet_embeddings se
|
||||
INNER JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ?`
|
||||
)
|
||||
.get(repositoryId) as { count: number };
|
||||
|
||||
const versionRows = this.db
|
||||
.prepare(
|
||||
`SELECT tag FROM repository_versions
|
||||
WHERE repository_id = ? AND state = 'indexed'
|
||||
ORDER BY created_at DESC`
|
||||
)
|
||||
.all(repositoryId) as { tag: string }[];
|
||||
|
||||
const hasDefaultBranchIndex = Boolean(
|
||||
this.db
|
||||
.prepare(
|
||||
`SELECT 1 AS found
|
||||
FROM documents
|
||||
WHERE repository_id = ? AND version_id IS NULL
|
||||
LIMIT 1`
|
||||
)
|
||||
.get(repositoryId)
|
||||
);
|
||||
|
||||
const indexedVersions = [
|
||||
...(hasDefaultBranchIndex ? [repository.branch ?? 'default branch'] : []),
|
||||
...versionRows.map((row) => row.tag)
|
||||
];
|
||||
|
||||
return {
|
||||
embeddingCount: embeddingRow.count,
|
||||
indexedVersions: Array.from(new Set(indexedVersions))
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an indexing job for a repository.
|
||||
* If a job is already running, returns the existing job.
|
||||
|
||||
Reference in New Issue
Block a user