TRUEREF-0023 rewrite indexing pipeline - parallel reads - serialized writes

This commit is contained in:
Giancarmine Salucci
2026-04-02 09:49:38 +02:00
parent 9525c58e9a
commit f86be4106b
68 changed files with 5042 additions and 3131 deletions

View File

@@ -33,9 +33,10 @@ try {
try {
const db = getClient();
const activeProfileRow = db
.prepare<[], EmbeddingProfileEntityProps>(
'SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1'
)
.prepare<
[],
EmbeddingProfileEntityProps
>('SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1')
.get();
let embeddingService: EmbeddingService | null = null;
@@ -55,9 +56,10 @@ try {
let concurrency = 2; // default
if (dbPath) {
const concurrencyRow = db
.prepare<[], { value: string }>(
"SELECT value FROM settings WHERE key = 'indexing.concurrency' LIMIT 1"
)
.prepare<
[],
{ value: string }
>("SELECT value FROM settings WHERE key = 'indexing.concurrency' LIMIT 1")
.get();
if (concurrencyRow) {
try {

View File

@@ -16,21 +16,29 @@
es.addEventListener('job-done', () => {
void fetch(`/api/v1/jobs/${jobId}`)
.then(r => r.json())
.then(d => { job = d.job; oncomplete?.(); });
.then((r) => r.json())
.then((d) => {
job = d.job;
oncomplete?.();
});
es.close();
});
es.addEventListener('job-failed', (event) => {
const data = JSON.parse(event.data);
if (job) job = { ...job, status: 'failed', error: data.error ?? 'Unknown error' } as IndexingJob;
if (job)
job = { ...job, status: 'failed', error: data.error ?? 'Unknown error' } as IndexingJob;
oncomplete?.();
es.close();
});
es.onerror = () => {
es.close();
void fetch(`/api/v1/jobs/${jobId}`).then(r => r.json()).then(d => { job = d.job; });
void fetch(`/api/v1/jobs/${jobId}`)
.then((r) => r.json())
.then((d) => {
job = d.job;
});
};
return () => es.close();

View File

@@ -30,4 +30,4 @@ describe('RepositoryCard.svelte', () => {
expect(text).toMatch(/1[,.\u00a0\u202f]?200 embeddings/);
expect(text).toContain('Indexed: main, v18.3.0');
});
});
});

View File

@@ -1,8 +1,9 @@
<script lang="ts">
let { rows = 5 }: { rows?: number } = $props();
const rowIndexes = $derived(Array.from({ length: rows }, (_, index) => index));
</script>
{#each Array(rows) as _, i (i)}
{#each rowIndexes as i (i)}
<tr>
<td class="px-6 py-4">
<div class="h-4 w-48 animate-pulse rounded bg-gray-200"></div>

View File

@@ -1,5 +1,6 @@
<script lang="ts">
import { onDestroy } from 'svelte';
import { SvelteMap } from 'svelte/reactivity';
export interface ToastItem {
id: string;
@@ -8,7 +9,7 @@
}
let { toasts = $bindable([]) }: { toasts: ToastItem[] } = $props();
const timers = new Map<string, ReturnType<typeof setTimeout>>();
const timers = new SvelteMap<string, ReturnType<typeof setTimeout>>();
$effect(() => {
for (const toast of toasts) {
@@ -70,8 +71,7 @@
class="ml-2 text-xs opacity-70 hover:opacity-100"
>
x
</button
>
</button>
</div>
{/each}
</div>

View File

@@ -38,4 +38,4 @@ export interface EmbeddingProfileUpsertDto {
export interface EmbeddingSettingsUpdateDto {
activeProfileId: string | null;
profile?: EmbeddingProfileUpsertDto;
}
}

View File

@@ -10,9 +10,7 @@ import { GitHubApiError } from './github-tags.js';
// ---------------------------------------------------------------------------
function mockFetch(status: number, body: unknown): void {
vi.spyOn(global, 'fetch').mockResolvedValueOnce(
new Response(JSON.stringify(body), { status })
);
vi.spyOn(global, 'fetch').mockResolvedValueOnce(new Response(JSON.stringify(body), { status }));
}
beforeEach(() => {
@@ -105,9 +103,9 @@ describe('fetchGitHubChangedFiles', () => {
it('throws GitHubApiError on 422 unprocessable entity', async () => {
mockFetch(422, { message: 'Unprocessable Entity' });
await expect(
fetchGitHubChangedFiles('owner', 'repo', 'bad-ref', 'v1.1.0')
).rejects.toThrow(GitHubApiError);
await expect(fetchGitHubChangedFiles('owner', 'repo', 'bad-ref', 'v1.1.0')).rejects.toThrow(
GitHubApiError
);
});
it('returns empty array when files property is missing', async () => {
@@ -141,9 +139,11 @@ describe('fetchGitHubChangedFiles', () => {
});
it('sends Authorization header when token is provided', async () => {
const fetchSpy = vi.spyOn(global, 'fetch').mockResolvedValueOnce(
new Response(JSON.stringify({ status: 'ahead', files: [] }), { status: 200 })
);
const fetchSpy = vi
.spyOn(global, 'fetch')
.mockResolvedValueOnce(
new Response(JSON.stringify({ status: 'ahead', files: [] }), { status: 200 })
);
await fetchGitHubChangedFiles('owner', 'repo', 'v1.0.0', 'v1.1.0', 'my-token');
const callArgs = fetchSpy.mock.calls[0];
const headers = (callArgs[1] as RequestInit).headers as Record<string, string>;
@@ -151,9 +151,11 @@ describe('fetchGitHubChangedFiles', () => {
});
it('does not send Authorization header when no token provided', async () => {
const fetchSpy = vi.spyOn(global, 'fetch').mockResolvedValueOnce(
new Response(JSON.stringify({ status: 'ahead', files: [] }), { status: 200 })
);
const fetchSpy = vi
.spyOn(global, 'fetch')
.mockResolvedValueOnce(
new Response(JSON.stringify({ status: 'ahead', files: [] }), { status: 200 })
);
await fetchGitHubChangedFiles('owner', 'repo', 'v1.0.0', 'v1.1.0');
const callArgs = fetchSpy.mock.calls[0];
const headers = (callArgs[1] as RequestInit).headers as Record<string, string>;

View File

@@ -4,6 +4,7 @@
*/
import Database from 'better-sqlite3';
import { env } from '$env/dynamic/private';
import { applySqlitePragmas } from './connection';
import { loadSqliteVec } from './sqlite-vec';
let _client: Database.Database | null = null;
@@ -12,14 +13,7 @@ export function getClient(): Database.Database {
if (!_client) {
if (!env.DATABASE_URL) throw new Error('DATABASE_URL is not set');
_client = new Database(env.DATABASE_URL);
_client.pragma('journal_mode = WAL');
_client.pragma('foreign_keys = ON');
_client.pragma('busy_timeout = 5000');
_client.pragma('synchronous = NORMAL');
_client.pragma('cache_size = -65536');
_client.pragma('temp_store = MEMORY');
_client.pragma('mmap_size = 268435456');
_client.pragma('wal_autocheckpoint = 1000');
applySqlitePragmas(_client);
loadSqliteVec(_client);
}
return _client;

View File

@@ -0,0 +1,14 @@
import type Database from 'better-sqlite3';
export const SQLITE_BUSY_TIMEOUT_MS = 30000;
export function applySqlitePragmas(db: Database.Database): void {
db.pragma('journal_mode = WAL');
db.pragma('foreign_keys = ON');
db.pragma(`busy_timeout = ${SQLITE_BUSY_TIMEOUT_MS}`);
db.pragma('synchronous = NORMAL');
db.pragma('cache_size = -65536');
db.pragma('temp_store = MEMORY');
db.pragma('mmap_size = 268435456');
db.pragma('wal_autocheckpoint = 1000');
}

View File

@@ -5,6 +5,7 @@ import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { join, dirname } from 'node:path';
import * as schema from './schema';
import { applySqlitePragmas } from './connection';
import { loadSqliteVec } from './sqlite-vec';
import { env } from '$env/dynamic/private';
@@ -12,19 +13,7 @@ if (!env.DATABASE_URL) throw new Error('DATABASE_URL is not set');
const client = new Database(env.DATABASE_URL);
// Enable WAL mode for better concurrent read performance.
client.pragma('journal_mode = WAL');
// Enforce foreign key constraints.
client.pragma('foreign_keys = ON');
// Wait up to 5 s when the DB is locked instead of failing immediately.
// Prevents SQLITE_BUSY errors when the indexing pipeline holds the write lock
// and an HTTP request arrives simultaneously.
client.pragma('busy_timeout = 5000');
client.pragma('synchronous = NORMAL');
client.pragma('cache_size = -65536');
client.pragma('temp_store = MEMORY');
client.pragma('mmap_size = 268435456');
client.pragma('wal_autocheckpoint = 1000');
applySqlitePragmas(client);
loadSqliteVec(client);
export const db = drizzle(client, { schema });

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,55 +1,55 @@
{
"version": "7",
"dialect": "sqlite",
"entries": [
{
"idx": 0,
"version": "6",
"when": 1774196053634,
"tag": "0000_large_master_chief",
"breakpoints": true
},
{
"idx": 1,
"version": "6",
"when": 1774448049161,
"tag": "0001_quick_nighthawk",
"breakpoints": true
},
{
"idx": 2,
"version": "6",
"when": 1774461897742,
"tag": "0002_silky_stellaris",
"breakpoints": true
},
{
"idx": 3,
"version": "6",
"when": 1743155877000,
"tag": "0003_multiversion_config",
"breakpoints": true
},
{
"idx": 4,
"version": "6",
"when": 1774880275833,
"tag": "0004_complete_sentry",
"breakpoints": true
},
{
"idx": 5,
"version": "6",
"when": 1774890536284,
"tag": "0005_fix_stage_defaults",
"breakpoints": true
},
{
"idx": 6,
"version": "6",
"when": 1775038799913,
"tag": "0006_yielding_centennial",
"breakpoints": true
}
]
}
"version": "7",
"dialect": "sqlite",
"entries": [
{
"idx": 0,
"version": "6",
"when": 1774196053634,
"tag": "0000_large_master_chief",
"breakpoints": true
},
{
"idx": 1,
"version": "6",
"when": 1774448049161,
"tag": "0001_quick_nighthawk",
"breakpoints": true
},
{
"idx": 2,
"version": "6",
"when": 1774461897742,
"tag": "0002_silky_stellaris",
"breakpoints": true
},
{
"idx": 3,
"version": "6",
"when": 1743155877000,
"tag": "0003_multiversion_config",
"breakpoints": true
},
{
"idx": 4,
"version": "6",
"when": 1774880275833,
"tag": "0004_complete_sentry",
"breakpoints": true
},
{
"idx": 5,
"version": "6",
"when": 1774890536284,
"tag": "0005_fix_stage_defaults",
"breakpoints": true
},
{
"idx": 6,
"version": "6",
"when": 1775038799913,
"tag": "0006_yielding_centennial",
"breakpoints": true
}
]
}

View File

@@ -349,14 +349,14 @@ describe('snippet_embeddings table', () => {
});
it('keeps the relational schema free of vec_embedding and retains the profile index', () => {
const columns = client
.prepare("PRAGMA table_info('snippet_embeddings')")
.all() as Array<{ name: string }>;
const columns = client.prepare("PRAGMA table_info('snippet_embeddings')").all() as Array<{
name: string;
}>;
expect(columns.map((column) => column.name)).not.toContain('vec_embedding');
const indexes = client
.prepare("PRAGMA index_list('snippet_embeddings')")
.all() as Array<{ name: string }>;
const indexes = client.prepare("PRAGMA index_list('snippet_embeddings')").all() as Array<{
name: string;
}>;
expect(indexes.map((index) => index.name)).toContain('idx_embeddings_profile');
});

View File

@@ -13,29 +13,33 @@ import {
// ---------------------------------------------------------------------------
// repositories
// ---------------------------------------------------------------------------
export const repositories = sqliteTable('repositories', {
id: text('id').primaryKey(), // e.g. "/facebook/react" or "/local/my-sdk"
title: text('title').notNull(),
description: text('description'),
source: text('source', { enum: ['github', 'local'] }).notNull(),
sourceUrl: text('source_url').notNull(), // GitHub URL or absolute local path
branch: text('branch').default('main'),
state: text('state', {
enum: ['pending', 'indexing', 'indexed', 'error']
})
.notNull()
.default('pending'),
totalSnippets: integer('total_snippets').default(0),
totalTokens: integer('total_tokens').default(0),
trustScore: real('trust_score').default(0), // 0.010.0
benchmarkScore: real('benchmark_score').default(0), // 0.0100.0; reserved for future quality metrics
stars: integer('stars'),
// TODO: encrypt at rest in production; stored as plaintext for v1
githubToken: text('github_token'),
lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull()
}, (t) => [index('idx_repositories_state').on(t.state)]);
export const repositories = sqliteTable(
'repositories',
{
id: text('id').primaryKey(), // e.g. "/facebook/react" or "/local/my-sdk"
title: text('title').notNull(),
description: text('description'),
source: text('source', { enum: ['github', 'local'] }).notNull(),
sourceUrl: text('source_url').notNull(), // GitHub URL or absolute local path
branch: text('branch').default('main'),
state: text('state', {
enum: ['pending', 'indexing', 'indexed', 'error']
})
.notNull()
.default('pending'),
totalSnippets: integer('total_snippets').default(0),
totalTokens: integer('total_tokens').default(0),
trustScore: real('trust_score').default(0), // 0.010.0
benchmarkScore: real('benchmark_score').default(0), // 0.0100.0; reserved for future quality metrics
stars: integer('stars'),
// TODO: encrypt at rest in production; stored as plaintext for v1
githubToken: text('github_token'),
lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull()
},
(t) => [index('idx_repositories_state').on(t.state)]
);
// ---------------------------------------------------------------------------
// repository_versions
@@ -61,43 +65,51 @@ export const repositoryVersions = sqliteTable('repository_versions', {
// ---------------------------------------------------------------------------
// documents
// ---------------------------------------------------------------------------
export const documents = sqliteTable('documents', {
id: text('id').primaryKey(), // UUID
repositoryId: text('repository_id')
.notNull()
.references(() => repositories.id, { onDelete: 'cascade' }),
versionId: text('version_id').references(() => repositoryVersions.id, { onDelete: 'cascade' }),
filePath: text('file_path').notNull(), // relative path within repo
title: text('title'),
language: text('language'), // e.g. "typescript", "markdown"
tokenCount: integer('token_count').default(0),
checksum: text('checksum').notNull(), // SHA-256 of file content
indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull()
}, (t) => [index('idx_documents_repo_version').on(t.repositoryId, t.versionId)]);
export const documents = sqliteTable(
'documents',
{
id: text('id').primaryKey(), // UUID
repositoryId: text('repository_id')
.notNull()
.references(() => repositories.id, { onDelete: 'cascade' }),
versionId: text('version_id').references(() => repositoryVersions.id, { onDelete: 'cascade' }),
filePath: text('file_path').notNull(), // relative path within repo
title: text('title'),
language: text('language'), // e.g. "typescript", "markdown"
tokenCount: integer('token_count').default(0),
checksum: text('checksum').notNull(), // SHA-256 of file content
indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull()
},
(t) => [index('idx_documents_repo_version').on(t.repositoryId, t.versionId)]
);
// ---------------------------------------------------------------------------
// snippets
// ---------------------------------------------------------------------------
export const snippets = sqliteTable('snippets', {
id: text('id').primaryKey(), // UUID
documentId: text('document_id')
.notNull()
.references(() => documents.id, { onDelete: 'cascade' }),
repositoryId: text('repository_id')
.notNull()
.references(() => repositories.id, { onDelete: 'cascade' }),
versionId: text('version_id').references(() => repositoryVersions.id, { onDelete: 'cascade' }),
type: text('type', { enum: ['code', 'info'] }).notNull(),
title: text('title'),
content: text('content').notNull(), // searchable text / code
language: text('language'),
breadcrumb: text('breadcrumb'), // e.g. "Installation > Getting Started"
tokenCount: integer('token_count').default(0),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
}, (t) => [
index('idx_snippets_repo_version').on(t.repositoryId, t.versionId),
index('idx_snippets_repo_type').on(t.repositoryId, t.type),
]);
export const snippets = sqliteTable(
'snippets',
{
id: text('id').primaryKey(), // UUID
documentId: text('document_id')
.notNull()
.references(() => documents.id, { onDelete: 'cascade' }),
repositoryId: text('repository_id')
.notNull()
.references(() => repositories.id, { onDelete: 'cascade' }),
versionId: text('version_id').references(() => repositoryVersions.id, { onDelete: 'cascade' }),
type: text('type', { enum: ['code', 'info'] }).notNull(),
title: text('title'),
content: text('content').notNull(), // searchable text / code
language: text('language'),
breadcrumb: text('breadcrumb'), // e.g. "Installation > Getting Started"
tokenCount: integer('token_count').default(0),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
},
(t) => [
index('idx_snippets_repo_version').on(t.repositoryId, t.versionId),
index('idx_snippets_repo_type').on(t.repositoryId, t.type)
]
);
// ---------------------------------------------------------------------------
// embedding_profiles
@@ -134,34 +146,52 @@ export const snippetEmbeddings = sqliteTable(
},
(table) => [
primaryKey({ columns: [table.snippetId, table.profileId] }),
index('idx_embeddings_profile').on(table.profileId, table.snippetId),
index('idx_embeddings_profile').on(table.profileId, table.snippetId)
]
);
// ---------------------------------------------------------------------------
// indexing_jobs
// ---------------------------------------------------------------------------
export const indexingJobs = sqliteTable('indexing_jobs', {
id: text('id').primaryKey(), // UUID
repositoryId: text('repository_id')
.notNull()
.references(() => repositories.id, { onDelete: 'cascade' }),
versionId: text('version_id'),
status: text('status', {
enum: ['queued', 'running', 'paused', 'cancelled', 'done', 'failed']
})
.notNull()
.default('queued'),
progress: integer('progress').default(0), // 0100
totalFiles: integer('total_files').default(0),
processedFiles: integer('processed_files').default(0),
stage: text('stage', { enum: ['queued', 'differential', 'crawling', 'cloning', 'parsing', 'storing', 'embedding', 'done', 'failed'] }).notNull().default('queued'),
stageDetail: text('stage_detail'),
error: text('error'),
startedAt: integer('started_at', { mode: 'timestamp' }),
completedAt: integer('completed_at', { mode: 'timestamp' }),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
}, (t) => [index('idx_jobs_repo_status').on(t.repositoryId, t.status)]);
export const indexingJobs = sqliteTable(
'indexing_jobs',
{
id: text('id').primaryKey(), // UUID
repositoryId: text('repository_id')
.notNull()
.references(() => repositories.id, { onDelete: 'cascade' }),
versionId: text('version_id'),
status: text('status', {
enum: ['queued', 'running', 'paused', 'cancelled', 'done', 'failed']
})
.notNull()
.default('queued'),
progress: integer('progress').default(0), // 0100
totalFiles: integer('total_files').default(0),
processedFiles: integer('processed_files').default(0),
stage: text('stage', {
enum: [
'queued',
'differential',
'crawling',
'cloning',
'parsing',
'storing',
'embedding',
'done',
'failed'
]
})
.notNull()
.default('queued'),
stageDetail: text('stage_detail'),
error: text('error'),
startedAt: integer('started_at', { mode: 'timestamp' }),
completedAt: integer('completed_at', { mode: 'timestamp' }),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
},
(t) => [index('idx_jobs_repo_status').on(t.repositoryId, t.status)]
);
// ---------------------------------------------------------------------------
// repository_configs

View File

@@ -46,4 +46,4 @@ export function loadSqliteVec(db: Database.Database): void {
sqliteVec.load(db);
loadedConnections.add(db);
}
}

View File

@@ -12,11 +12,7 @@ import { migrate } from 'drizzle-orm/better-sqlite3/migrator';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import * as schema from '../db/schema.js';
import {
loadSqliteVec,
sqliteVecRowidTableName,
sqliteVecTableName
} from '../db/sqlite-vec.js';
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from '../db/sqlite-vec.js';
import { SqliteVecStore } from '../search/sqlite-vec.store.js';
import { NoopEmbeddingProvider, EmbeddingError, type EmbeddingVector } from './provider.js';
@@ -424,6 +420,25 @@ describe('EmbeddingService', () => {
expect(embedding![2]).toBeCloseTo(0.2, 5);
});
it('can delegate embedding persistence to an injected writer', async () => {
const snippetId = seedSnippet(db, client);
const provider = makeProvider(4);
const persistEmbeddings = vi.fn().mockResolvedValue(undefined);
const service = new EmbeddingService(client, provider, 'local-default', {
persistEmbeddings
});
await service.embedSnippets([snippetId]);
expect(persistEmbeddings).toHaveBeenCalledTimes(1);
const rows = client
.prepare(
'SELECT COUNT(*) AS cnt FROM snippet_embeddings WHERE snippet_id = ? AND profile_id = ?'
)
.get(snippetId, 'local-default') as { cnt: number };
expect(rows.cnt).toBe(0);
});
it('stores embeddings under the configured profile ID', async () => {
client
.prepare(
@@ -431,16 +446,7 @@ describe('EmbeddingService', () => {
(id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, unixepoch(), unixepoch())`
)
.run(
'openai-custom',
'openai-compatible',
'OpenAI Custom',
1,
0,
'test-model',
4,
'{}'
);
.run('openai-custom', 'openai-compatible', 'OpenAI Custom', 1, 0, 'test-model', 4, '{}');
const snippetId = seedSnippet(db, client);
const provider = makeProvider(4, 'test-model');

View File

@@ -6,6 +6,10 @@
import type Database from 'better-sqlite3';
import type { EmbeddingProvider } from './provider.js';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
import {
upsertEmbeddings,
type PersistedEmbedding
} from '$lib/server/pipeline/write-operations.js';
interface SnippetRow {
id: string;
@@ -23,7 +27,10 @@ export class EmbeddingService {
constructor(
private readonly db: Database.Database,
private readonly provider: EmbeddingProvider,
private readonly profileId: string = 'local-default'
private readonly profileId: string = 'local-default',
private readonly persistenceDelegate?: {
persistEmbeddings?: (embeddings: PersistedEmbedding[]) => Promise<void>;
}
) {
this.sqliteVecStore = new SqliteVecStore(db);
}
@@ -94,37 +101,31 @@ export class EmbeddingService {
[s.title, s.breadcrumb, s.content].filter(Boolean).join('\n').slice(0, TEXT_MAX_CHARS)
);
const insert = this.db.prepare<[string, string, string, number, Buffer]>(`
INSERT OR REPLACE INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
VALUES (?, ?, ?, ?, ?, unixepoch())
`);
for (let i = 0; i < snippets.length; i += BATCH_SIZE) {
const batchSnippets = snippets.slice(i, i + BATCH_SIZE);
const batchTexts = texts.slice(i, i + BATCH_SIZE);
const embeddings = await this.provider.embed(batchTexts);
const insertMany = this.db.transaction(() => {
for (let j = 0; j < batchSnippets.length; j++) {
const snippet = batchSnippets[j];
const embedding = embeddings[j];
insert.run(
snippet.id,
this.profileId,
embedding.model,
embedding.dimensions,
Buffer.from(
embedding.values.buffer,
embedding.values.byteOffset,
embedding.values.byteLength
)
);
this.sqliteVecStore.upsertEmbedding(this.profileId, snippet.id, embedding.values);
}
const persistedEmbeddings: PersistedEmbedding[] = batchSnippets.map((snippet, index) => {
const embedding = embeddings[index];
return {
snippetId: snippet.id,
profileId: this.profileId,
model: embedding.model,
dimensions: embedding.dimensions,
embedding: Buffer.from(
embedding.values.buffer,
embedding.values.byteOffset,
embedding.values.byteLength
)
};
});
insertMany();
if (this.persistenceDelegate?.persistEmbeddings) {
await this.persistenceDelegate.persistEmbeddings(persistedEmbeddings);
} else {
upsertEmbeddings(this.db, persistedEmbeddings);
}
onProgress?.(Math.min(i + BATCH_SIZE, snippets.length), snippets.length);
}

View File

@@ -1,7 +1,4 @@
import {
EmbeddingProfile,
EmbeddingProfileEntity
} from '$lib/server/models/embedding-profile.js';
import { EmbeddingProfile, EmbeddingProfileEntity } from '$lib/server/models/embedding-profile.js';
function parseConfig(config: Record<string, unknown> | string | null): Record<string, unknown> {
if (!config) {
@@ -35,4 +32,4 @@ export class EmbeddingProfileMapper {
updatedAt: entity.updated_at
});
}
}
}

View File

@@ -68,4 +68,4 @@ export class EmbeddingSettingsDtoMapper {
activeProfile
};
}
}
}

View File

@@ -74,4 +74,4 @@ export class EmbeddingProfile {
this.createdAt = props.createdAt;
this.updatedAt = props.updatedAt;
}
}
}

View File

@@ -17,4 +17,4 @@ export class EmbeddingSettings {
get activeProfileId(): string | null {
return this.activeProfile?.id ?? null;
}
}
}

View File

@@ -44,7 +44,10 @@ function createTestDb(): Database.Database {
'0004_complete_sentry.sql'
]) {
const sql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
for (const stmt of sql.split('--> statement-breakpoint').map((s) => s.trim()).filter(Boolean)) {
for (const stmt of sql
.split('--> statement-breakpoint')
.map((s) => s.trim())
.filter(Boolean)) {
client.exec(stmt);
}
}
@@ -113,9 +116,10 @@ function insertDocument(db: Database.Database, versionId: string, filePath: stri
.run(
id,
db
.prepare<[string], { repository_id: string }>(
`SELECT repository_id FROM repository_versions WHERE id = ?`
)
.prepare<
[string],
{ repository_id: string }
>(`SELECT repository_id FROM repository_versions WHERE id = ?`)
.get(versionId)?.repository_id ?? '/test/repo',
versionId,
filePath,
@@ -280,9 +284,9 @@ describe('buildDifferentialPlan', () => {
insertDocument(db, v1Id, 'packages/react/index.js');
insertDocument(db, v1Id, 'packages/react-dom/index.js');
const fetchFn = vi.fn().mockResolvedValue([
{ path: 'packages/react/index.js', status: 'modified' as const }
]);
const fetchFn = vi
.fn()
.mockResolvedValue([{ path: 'packages/react/index.js', status: 'modified' as const }]);
const plan = await buildDifferentialPlan({
repo,
@@ -292,13 +296,7 @@ describe('buildDifferentialPlan', () => {
});
expect(fetchFn).toHaveBeenCalledOnce();
expect(fetchFn).toHaveBeenCalledWith(
'facebook',
'react',
'v18.0.0',
'v18.1.0',
'ghp_test123'
);
expect(fetchFn).toHaveBeenCalledWith('facebook', 'react', 'v18.0.0', 'v18.1.0', 'ghp_test123');
expect(plan).not.toBeNull();
expect(plan!.changedPaths.has('packages/react/index.js')).toBe(true);

View File

@@ -41,9 +41,7 @@ export async function buildDifferentialPlan(params: {
try {
// 1. Load all indexed versions for this repository
const rows = db
.prepare(
`SELECT * FROM repository_versions WHERE repository_id = ? AND state = 'indexed'`
)
.prepare(`SELECT * FROM repository_versions WHERE repository_id = ? AND state = 'indexed'`)
.all(repo.id) as RepositoryVersionEntity[];
const indexedVersions: RepositoryVersion[] = rows.map((row) =>

View File

@@ -1,10 +1,19 @@
import { workerData, parentPort } from 'node:worker_threads';
import Database from 'better-sqlite3';
import { EmbeddingService } from '$lib/server/embeddings/embedding.service.js';
import { applySqlitePragmas } from '$lib/server/db/connection.js';
import { createProviderFromProfile } from '$lib/server/embeddings/registry.js';
import { EmbeddingProfileMapper } from '$lib/server/mappers/embedding-profile.mapper.js';
import { EmbeddingProfileEntity, type EmbeddingProfileEntityProps } from '$lib/server/models/embedding-profile.js';
import type { EmbedWorkerRequest, EmbedWorkerResponse, WorkerInitData } from './worker-types.js';
import {
EmbeddingProfileEntity,
type EmbeddingProfileEntityProps
} from '$lib/server/models/embedding-profile.js';
import type {
EmbedWorkerRequest,
EmbedWorkerResponse,
SerializedEmbedding,
WorkerInitData
} from './worker-types.js';
const { dbPath, embeddingProfileId } = workerData as WorkerInitData;
@@ -18,17 +27,12 @@ if (!embeddingProfileId) {
}
const db = new Database(dbPath);
db.pragma('journal_mode = WAL');
db.pragma('foreign_keys = ON');
db.pragma('busy_timeout = 5000');
db.pragma('synchronous = NORMAL');
db.pragma('cache_size = -65536');
db.pragma('temp_store = MEMORY');
db.pragma('mmap_size = 268435456');
db.pragma('wal_autocheckpoint = 1000');
applySqlitePragmas(db);
// Load the embedding profile from DB
const rawProfile = db.prepare('SELECT * FROM embedding_profiles WHERE id = ?').get(embeddingProfileId);
const rawProfile = db
.prepare('SELECT * FROM embedding_profiles WHERE id = ?')
.get(embeddingProfileId);
if (!rawProfile) {
db.close();
@@ -43,9 +47,55 @@ if (!rawProfile) {
const profileEntity = new EmbeddingProfileEntity(rawProfile as EmbeddingProfileEntityProps);
const profile = EmbeddingProfileMapper.fromEntity(profileEntity);
let pendingWrite: {
jobId: string;
resolve: () => void;
reject: (error: Error) => void;
} | null = null;
let currentJobId: string | null = null;
function requestWrite(
message: Extract<EmbedWorkerResponse, { type: 'write_embeddings' }>
): Promise<void> {
if (pendingWrite) {
return Promise.reject(new Error(`write request already in flight for ${pendingWrite.jobId}`));
}
return new Promise((resolve, reject) => {
pendingWrite = {
jobId: message.jobId,
resolve: () => {
pendingWrite = null;
resolve();
},
reject: (error: Error) => {
pendingWrite = null;
reject(error);
}
};
parentPort!.postMessage(message);
});
}
// Create provider and embedding service
const provider = createProviderFromProfile(profile);
const embeddingService = new EmbeddingService(db, provider, embeddingProfileId);
const embeddingService = new EmbeddingService(db, provider, embeddingProfileId, {
persistEmbeddings: async (embeddings) => {
const serializedEmbeddings: SerializedEmbedding[] = embeddings.map((item) => ({
snippetId: item.snippetId,
profileId: item.profileId,
model: item.model,
dimensions: item.dimensions,
embedding: Uint8Array.from(item.embedding)
}));
await requestWrite({
type: 'write_embeddings',
jobId: currentJobId ?? 'unknown',
embeddings: serializedEmbeddings
});
}
});
// Signal ready after service initialization
parentPort!.postMessage({
@@ -53,12 +103,27 @@ parentPort!.postMessage({
} satisfies EmbedWorkerResponse);
parentPort!.on('message', async (msg: EmbedWorkerRequest) => {
if (msg.type === 'write_ack') {
if (pendingWrite?.jobId === msg.jobId) {
pendingWrite.resolve();
}
return;
}
if (msg.type === 'write_error') {
if (pendingWrite?.jobId === msg.jobId) {
pendingWrite.reject(new Error(msg.error));
}
return;
}
if (msg.type === 'shutdown') {
db.close();
process.exit(0);
}
if (msg.type === 'embed') {
currentJobId = msg.jobId;
try {
const snippetIds = embeddingService.findSnippetIdsMissingEmbeddings(
msg.repositoryId,
@@ -84,6 +149,8 @@ parentPort!.on('message', async (msg: EmbedWorkerRequest) => {
jobId: msg.jobId,
error: err instanceof Error ? err.message : String(err)
} satisfies EmbedWorkerResponse);
} finally {
currentJobId = null;
}
}
});

View File

@@ -466,12 +466,15 @@ describe('IndexingPipeline', () => {
const job1 = makeJob();
await pipeline.run(job1 as never);
const firstSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as { id: string }[])
.map((row) => row.id);
const firstSnippetIds = (
db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as { id: string }[]
).map((row) => row.id);
expect(firstSnippetIds.length).toBeGreaterThan(0);
const firstEmbeddingCount = (
db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as {
db
.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`)
.get() as {
n: number;
}
).n;
@@ -483,11 +486,15 @@ describe('IndexingPipeline', () => {
const job2 = db.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`).get(job2Id) as never;
await pipeline.run(job2);
const secondSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as {
id: string;
}[]).map((row) => row.id);
const secondSnippetIds = (
db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as {
id: string;
}[]
).map((row) => row.id);
const secondEmbeddingCount = (
db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as {
db
.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`)
.get() as {
n: number;
}
).n;
@@ -918,9 +925,9 @@ describe('IndexingPipeline', () => {
await pipeline.run(job as never);
const docs = db
.prepare(`SELECT file_path FROM documents ORDER BY file_path`)
.all() as { file_path: string }[];
const docs = db.prepare(`SELECT file_path FROM documents ORDER BY file_path`).all() as {
file_path: string;
}[];
const filePaths = docs.map((d) => d.file_path);
// migration-guide.md and docs/legacy-api.md must be absent.
@@ -956,7 +963,10 @@ describe('IndexingPipeline', () => {
expect(row).toBeDefined();
const rules = JSON.parse(row!.rules);
expect(rules).toEqual(['Always use TypeScript strict mode', 'Prefer async/await over callbacks']);
expect(rules).toEqual([
'Always use TypeScript strict mode',
'Prefer async/await over callbacks'
]);
});
it('persists version-specific rules under (repositoryId, versionId) when job has versionId', async () => {
@@ -1219,12 +1229,7 @@ describe('differential indexing', () => {
insertSnippet(db, doc1Id, { repository_id: '/test/repo', version_id: ancestorVersionId });
insertSnippet(db, doc2Id, { repository_id: '/test/repo', version_id: ancestorVersionId });
const pipeline = new IndexingPipeline(
db,
vi.fn() as never,
{ crawl: vi.fn() } as never,
null
);
const pipeline = new IndexingPipeline(db, vi.fn() as never, { crawl: vi.fn() } as never, null);
(pipeline as unknown as PipelineInternals).cloneFromAncestor(
ancestorVersionId,
targetVersionId,
@@ -1236,9 +1241,7 @@ describe('differential indexing', () => {
.prepare(`SELECT * FROM documents WHERE version_id = ?`)
.all(targetVersionId) as { id: string; file_path: string }[];
expect(targetDocs).toHaveLength(2);
expect(targetDocs.map((d) => d.file_path).sort()).toEqual(
['README.md', 'src/index.ts'].sort()
);
expect(targetDocs.map((d) => d.file_path).sort()).toEqual(['README.md', 'src/index.ts'].sort());
// New IDs must differ from ancestor doc IDs.
const targetDocIds = targetDocs.map((d) => d.id);
expect(targetDocIds).not.toContain(doc1Id);
@@ -1261,12 +1264,7 @@ describe('differential indexing', () => {
checksum: 'sha-main'
});
const pipeline = new IndexingPipeline(
db,
vi.fn() as never,
{ crawl: vi.fn() } as never,
null
);
const pipeline = new IndexingPipeline(db, vi.fn() as never, { crawl: vi.fn() } as never, null);
(pipeline as unknown as PipelineInternals).cloneFromAncestor(
ancestorVersionId,
targetVersionId,
@@ -1323,9 +1321,9 @@ describe('differential indexing', () => {
await pipeline.run(job);
const updatedJob = db
.prepare(`SELECT status FROM indexing_jobs WHERE id = ?`)
.get(jobId) as { status: string };
const updatedJob = db.prepare(`SELECT status FROM indexing_jobs WHERE id = ?`).get(jobId) as {
status: string;
};
expect(updatedJob.status).toBe('done');
const docs = db
@@ -1375,9 +1373,7 @@ describe('differential indexing', () => {
deletedPaths: new Set<string>(),
unchangedPaths: new Set(['unchanged.md'])
};
const spy = vi
.spyOn(diffStrategy, 'buildDifferentialPlan')
.mockResolvedValueOnce(mockPlan);
const spy = vi.spyOn(diffStrategy, 'buildDifferentialPlan').mockResolvedValueOnce(mockPlan);
const pipeline = new IndexingPipeline(
db,
@@ -1398,9 +1394,9 @@ describe('differential indexing', () => {
spy.mockRestore();
// 6. Assert job completed and both docs exist under the target version.
const finalJob = db
.prepare(`SELECT status FROM indexing_jobs WHERE id = ?`)
.get(jobId) as { status: string };
const finalJob = db.prepare(`SELECT status FROM indexing_jobs WHERE id = ?`).get(jobId) as {
status: string;
};
expect(finalJob.status).toBe('done');
const targetDocs = db

View File

@@ -28,6 +28,14 @@ import { parseFile } from '$lib/server/parser/index.js';
import { computeTrustScore } from '$lib/server/search/trust-score.js';
import { computeDiff } from './diff.js';
import { buildDifferentialPlan, type DifferentialPlan } from './differential-strategy.js';
import {
cloneFromAncestor as cloneFromAncestorInDatabase,
replaceSnippets as replaceSnippetsInDatabase,
updateRepo as updateRepoInDatabase,
updateVersion as updateVersionInDatabase,
type CloneFromAncestorRequest
} from './write-operations.js';
import type { SerializedFields } from './worker-types.js';
// ---------------------------------------------------------------------------
// Progress calculation
@@ -70,7 +78,23 @@ export class IndexingPipeline {
private readonly db: Database.Database,
private readonly githubCrawl: typeof GithubCrawlFn,
private readonly localCrawler: LocalCrawler,
private readonly embeddingService: EmbeddingService | null
private readonly embeddingService: EmbeddingService | null,
private readonly writeDelegate?: {
persistJobUpdates?: boolean;
replaceSnippets?: (
changedDocIds: string[],
newDocuments: NewDocument[],
newSnippets: NewSnippet[]
) => Promise<void>;
cloneFromAncestor?: (request: CloneFromAncestorRequest) => Promise<void>;
updateRepo?: (repositoryId: string, fields: SerializedFields) => Promise<void>;
updateVersion?: (versionId: string, fields: SerializedFields) => Promise<void>;
upsertRepoConfig?: (
repositoryId: string,
versionId: string | null,
rules: string[]
) => Promise<void>;
}
) {
this.sqliteVecStore = new SqliteVecStore(db);
}
@@ -117,14 +141,12 @@ export class IndexingPipeline {
if (!repo) throw new Error(`Repository ${repositoryId} not found`);
// Mark repo as actively indexing.
this.updateRepo(repo.id, { state: 'indexing' });
await this.updateRepo(repo.id, { state: 'indexing' });
if (normJob.versionId) {
this.updateVersion(normJob.versionId, { state: 'indexing' });
await this.updateVersion(normJob.versionId, { state: 'indexing' });
}
const versionTag = normJob.versionId
? this.getVersionTag(normJob.versionId)
: undefined;
const versionTag = normJob.versionId ? this.getVersionTag(normJob.versionId) : undefined;
// ---- Stage 0: Differential strategy (TRUEREF-0021) ----------------------
// When indexing a tagged version, check if we can inherit unchanged files
@@ -147,12 +169,12 @@ export class IndexingPipeline {
// If a differential plan exists, clone unchanged files from ancestor.
if (differentialPlan && differentialPlan.unchangedPaths.size > 0) {
reportStage('cloning');
this.cloneFromAncestor(
differentialPlan.ancestorVersionId,
normJob.versionId!,
repo.id,
differentialPlan.unchangedPaths
);
await this.cloneFromAncestor({
ancestorVersionId: differentialPlan.ancestorVersionId,
targetVersionId: normJob.versionId!,
repositoryId: repo.id,
unchangedPaths: [...differentialPlan.unchangedPaths]
});
console.info(
`[IndexingPipeline] Differential indexing: cloned ${differentialPlan.unchangedPaths.size} unchanged files from ${differentialPlan.ancestorTag}`
);
@@ -174,7 +196,11 @@ export class IndexingPipeline {
if (crawlResult.config) {
// Config was pre-parsed by the crawler — wrap it in a ParsedConfig
// shell so the rest of the pipeline can use it uniformly.
parsedConfig = { config: crawlResult.config, source: 'trueref.json', warnings: [] } satisfies ParsedConfig;
parsedConfig = {
config: crawlResult.config,
source: 'trueref.json',
warnings: []
} satisfies ParsedConfig;
} else {
const configFile = crawlResult.files.find(
(f) => f.path === 'trueref.json' || f.path === 'context7.json'
@@ -189,7 +215,10 @@ export class IndexingPipeline {
const filteredFiles =
excludeFiles.length > 0
? crawlResult.files.filter(
(f) => !excludeFiles.some((pattern) => IndexingPipeline.matchesExcludePattern(f.path, pattern))
(f) =>
!excludeFiles.some((pattern) =>
IndexingPipeline.matchesExcludePattern(f.path, pattern)
)
)
: crawlResult.files;
@@ -303,7 +332,13 @@ export class IndexingPipeline {
this.embeddingService !== null
);
this.updateJob(job.id, { processedFiles: totalProcessed, progress });
reportStage('parsing', `${totalProcessed} / ${totalFiles} files`, progress, totalProcessed, totalFiles);
reportStage(
'parsing',
`${totalProcessed} / ${totalFiles} files`,
progress,
totalProcessed,
totalFiles
);
}
}
@@ -312,7 +347,7 @@ export class IndexingPipeline {
// ---- Stage 3: Atomic replacement ------------------------------------
reportStage('storing');
this.replaceSnippets(repo.id, changedDocIds, newDocuments, newSnippets);
await this.replaceSnippets(repo.id, changedDocIds, newDocuments, newSnippets);
// ---- Stage 4: Embeddings (if provider is configured) ----------------
if (this.embeddingService) {
@@ -325,7 +360,7 @@ export class IndexingPipeline {
if (snippetIds.length === 0) {
// No missing embeddings for the active profile; parsing progress is final.
} else {
const embeddingsTotal = snippetIds.length;
const embeddingsTotal = snippetIds.length;
await this.embeddingService.embedSnippets(snippetIds, (done) => {
const progress = calculateProgress(
@@ -350,7 +385,7 @@ export class IndexingPipeline {
state: 'indexed'
});
this.updateRepo(repo.id, {
await this.updateRepo(repo.id, {
state: 'indexed',
totalSnippets: stats.totalSnippets,
totalTokens: stats.totalTokens,
@@ -360,7 +395,7 @@ export class IndexingPipeline {
if (normJob.versionId) {
const versionStats = this.computeVersionStats(normJob.versionId);
this.updateVersion(normJob.versionId, {
await this.updateVersion(normJob.versionId, {
state: 'indexed',
totalSnippets: versionStats.totalSnippets,
indexedAt: Math.floor(Date.now() / 1000)
@@ -371,12 +406,12 @@ export class IndexingPipeline {
if (parsedConfig?.config.rules?.length) {
if (!normJob.versionId) {
// Main-branch job: write the repo-wide entry only.
this.upsertRepoConfig(repo.id, null, parsedConfig.config.rules);
await this.upsertRepoConfig(repo.id, null, parsedConfig.config.rules);
} else {
// Version job: write only the version-specific entry.
// Writing to the NULL row here would overwrite repo-wide rules
// with whatever the last-indexed version happened to carry.
this.upsertRepoConfig(repo.id, normJob.versionId, parsedConfig.config.rules);
await this.upsertRepoConfig(repo.id, normJob.versionId, parsedConfig.config.rules);
}
}
@@ -398,9 +433,9 @@ export class IndexingPipeline {
});
// Restore repo to error state but preserve any existing indexed data.
this.updateRepo(repositoryId, { state: 'error' });
await this.updateRepo(repositoryId, { state: 'error' });
if (normJob.versionId) {
this.updateVersion(normJob.versionId, { state: 'error' });
await this.updateVersion(normJob.versionId, { state: 'error' });
}
throw error;
@@ -411,7 +446,11 @@ export class IndexingPipeline {
// Private — crawl
// -------------------------------------------------------------------------
private async crawl(repo: Repository, ref?: string, allowedPaths?: Set<string>): Promise<{
private async crawl(
repo: Repository,
ref?: string,
allowedPaths?: Set<string>
): Promise<{
files: Array<{ path: string; content: string; sha: string; size: number; language: string }>;
totalFiles: number;
/** Pre-parsed trueref.json / context7.json, or undefined when absent. */
@@ -473,219 +512,50 @@ export class IndexingPipeline {
*
* Runs in a single SQLite transaction for atomicity.
*/
private cloneFromAncestor(
ancestorVersionId: string,
targetVersionId: string,
repositoryId: string,
unchangedPaths: Set<string>
): void {
this.db.transaction(() => {
const pathList = [...unchangedPaths];
const placeholders = pathList.map(() => '?').join(',');
const ancestorDocs = this.db
.prepare(
`SELECT * FROM documents WHERE version_id = ? AND file_path IN (${placeholders})`
)
.all(ancestorVersionId, ...pathList) as Array<{
id: string;
repository_id: string;
file_path: string;
title: string | null;
language: string | null;
token_count: number;
checksum: string;
indexed_at: number;
}>;
private async cloneFromAncestor(
requestOrAncestorVersionId: CloneFromAncestorRequest | string,
targetVersionId?: string,
repositoryId?: string,
unchangedPaths?: Set<string>
): Promise<void> {
const request: CloneFromAncestorRequest =
typeof requestOrAncestorVersionId === 'string'
? {
ancestorVersionId: requestOrAncestorVersionId,
targetVersionId: targetVersionId!,
repositoryId: repositoryId!,
unchangedPaths: [...(unchangedPaths ?? new Set<string>())]
}
: requestOrAncestorVersionId;
const docIdMap = new Map<string, string>();
const nowEpoch = Math.floor(Date.now() / 1000);
if (request.unchangedPaths.length === 0) {
return;
}
for (const doc of ancestorDocs) {
const newDocId = randomUUID();
docIdMap.set(doc.id, newDocId);
this.db
.prepare(
`INSERT INTO documents (id, repository_id, version_id, file_path, title, language, token_count, checksum, indexed_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
)
.run(
newDocId,
repositoryId,
targetVersionId,
doc.file_path,
doc.title,
doc.language,
doc.token_count,
doc.checksum,
nowEpoch
);
}
if (this.writeDelegate?.cloneFromAncestor) {
await this.writeDelegate.cloneFromAncestor(request);
return;
}
if (docIdMap.size === 0) return;
const oldDocIds = [...docIdMap.keys()];
const snippetPlaceholders = oldDocIds.map(() => '?').join(',');
const ancestorSnippets = this.db
.prepare(
`SELECT * FROM snippets WHERE document_id IN (${snippetPlaceholders})`
)
.all(...oldDocIds) as Array<{
id: string;
document_id: string;
repository_id: string;
version_id: string | null;
type: string;
title: string | null;
content: string;
language: string | null;
breadcrumb: string | null;
token_count: number;
created_at: number;
}>;
const snippetIdMap = new Map<string, string>();
for (const snippet of ancestorSnippets) {
const newSnippetId = randomUUID();
snippetIdMap.set(snippet.id, newSnippetId);
const newDocId = docIdMap.get(snippet.document_id)!;
this.db
.prepare(
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, title, content, language, breadcrumb, token_count, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
)
.run(
newSnippetId,
newDocId,
repositoryId,
targetVersionId,
snippet.type,
snippet.title,
snippet.content,
snippet.language,
snippet.breadcrumb,
snippet.token_count,
snippet.created_at
);
}
if (snippetIdMap.size > 0) {
const oldSnippetIds = [...snippetIdMap.keys()];
const embPlaceholders = oldSnippetIds.map(() => '?').join(',');
const ancestorEmbeddings = this.db
.prepare(
`SELECT * FROM snippet_embeddings WHERE snippet_id IN (${embPlaceholders})`
)
.all(...oldSnippetIds) as Array<{
snippet_id: string;
profile_id: string;
model: string;
dimensions: number;
embedding: Buffer;
created_at: number;
}>;
for (const emb of ancestorEmbeddings) {
const newSnippetId = snippetIdMap.get(emb.snippet_id)!;
this.db
.prepare(
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
VALUES (?, ?, ?, ?, ?, ?)`
)
.run(
newSnippetId,
emb.profile_id,
emb.model,
emb.dimensions,
emb.embedding,
emb.created_at
);
this.sqliteVecStore.upsertEmbeddingBuffer(
emb.profile_id,
newSnippetId,
emb.embedding,
emb.dimensions
);
}
}
})();
cloneFromAncestorInDatabase(this.db, request);
}
// -------------------------------------------------------------------------
// Private — atomic snippet replacement
// -------------------------------------------------------------------------
private replaceSnippets(
private async replaceSnippets(
_repositoryId: string,
changedDocIds: string[],
newDocuments: NewDocument[],
newSnippets: NewSnippet[]
): void {
const insertDoc = this.db.prepare(
`INSERT INTO documents
(id, repository_id, version_id, file_path, title, language,
token_count, checksum, indexed_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
);
): Promise<void> {
if (this.writeDelegate?.replaceSnippets) {
await this.writeDelegate.replaceSnippets(changedDocIds, newDocuments, newSnippets);
return;
}
const insertSnippet = this.db.prepare(
`INSERT INTO snippets
(id, document_id, repository_id, version_id, type, title,
content, language, breadcrumb, token_count, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
);
this.db.transaction(() => {
this.sqliteVecStore.deleteEmbeddingsForDocumentIds(changedDocIds);
// Delete stale documents (cascade deletes their snippets via FK).
if (changedDocIds.length > 0) {
const placeholders = changedDocIds.map(() => '?').join(',');
this.db
.prepare(`DELETE FROM documents WHERE id IN (${placeholders})`)
.run(...changedDocIds);
}
// Insert new documents.
for (const doc of newDocuments) {
const indexedAtSeconds =
doc.indexedAt instanceof Date
? Math.floor(doc.indexedAt.getTime() / 1000)
: Math.floor(Date.now() / 1000);
insertDoc.run(
doc.id,
doc.repositoryId,
doc.versionId ?? null,
doc.filePath,
doc.title ?? null,
doc.language ?? null,
doc.tokenCount ?? 0,
doc.checksum,
indexedAtSeconds
);
}
// Insert new snippets.
for (const snippet of newSnippets) {
const createdAtSeconds =
snippet.createdAt instanceof Date
? Math.floor(snippet.createdAt.getTime() / 1000)
: Math.floor(Date.now() / 1000);
insertSnippet.run(
snippet.id,
snippet.documentId,
snippet.repositoryId,
snippet.versionId ?? null,
snippet.type,
snippet.title ?? null,
snippet.content,
snippet.language ?? null,
snippet.breadcrumb ?? null,
snippet.tokenCount ?? 0,
createdAtSeconds
);
}
})();
replaceSnippetsInDatabase(this.db, changedDocIds, newDocuments, newSnippets);
}
// -------------------------------------------------------------------------
@@ -709,9 +579,10 @@ export class IndexingPipeline {
private computeVersionStats(versionId: string): { totalSnippets: number } {
const row = this.db
.prepare<[string], { total_snippets: number }>(
`SELECT COUNT(*) as total_snippets FROM snippets WHERE version_id = ?`
)
.prepare<
[string],
{ total_snippets: number }
>(`SELECT COUNT(*) as total_snippets FROM snippets WHERE version_id = ?`)
.get(versionId);
return { totalSnippets: row?.total_snippets ?? 0 };
@@ -750,6 +621,10 @@ export class IndexingPipeline {
}
private updateJob(id: string, fields: Record<string, unknown>): void {
if (this.writeDelegate?.persistJobUpdates === false) {
return;
}
const sets = Object.keys(fields)
.map((k) => `${toSnake(k)} = ?`)
.join(', ');
@@ -757,43 +632,44 @@ export class IndexingPipeline {
this.db.prepare(`UPDATE indexing_jobs SET ${sets} WHERE id = ?`).run(...values);
}
private updateRepo(id: string, fields: Record<string, unknown>): void {
const now = Math.floor(Date.now() / 1000);
const allFields = { ...fields, updatedAt: now };
const sets = Object.keys(allFields)
.map((k) => `${toSnake(k)} = ?`)
.join(', ');
const values = [...Object.values(allFields), id];
this.db.prepare(`UPDATE repositories SET ${sets} WHERE id = ?`).run(...values);
private async updateRepo(id: string, fields: SerializedFields): Promise<void> {
if (this.writeDelegate?.updateRepo) {
await this.writeDelegate.updateRepo(id, fields);
return;
}
updateRepoInDatabase(this.db, id, fields);
}
private updateVersion(id: string, fields: Record<string, unknown>): void {
const sets = Object.keys(fields)
.map((k) => `${toSnake(k)} = ?`)
.join(', ');
const values = [...Object.values(fields), id];
this.db.prepare(`UPDATE repository_versions SET ${sets} WHERE id = ?`).run(...values);
private async updateVersion(id: string, fields: SerializedFields): Promise<void> {
if (this.writeDelegate?.updateVersion) {
await this.writeDelegate.updateVersion(id, fields);
return;
}
updateVersionInDatabase(this.db, id, fields);
}
private upsertRepoConfig(
private async upsertRepoConfig(
repositoryId: string,
versionId: string | null,
rules: string[]
): void {
): Promise<void> {
if (this.writeDelegate?.upsertRepoConfig) {
await this.writeDelegate.upsertRepoConfig(repositoryId, versionId, rules);
return;
}
const now = Math.floor(Date.now() / 1000);
// Use DELETE + INSERT because ON CONFLICT … DO UPDATE doesn't work reliably
// with partial unique indexes in all SQLite versions.
if (versionId === null) {
this.db
.prepare(
`DELETE FROM repository_configs WHERE repository_id = ? AND version_id IS NULL`
)
.prepare(`DELETE FROM repository_configs WHERE repository_id = ? AND version_id IS NULL`)
.run(repositoryId);
} else {
this.db
.prepare(
`DELETE FROM repository_configs WHERE repository_id = ? AND version_id = ?`
)
.prepare(`DELETE FROM repository_configs WHERE repository_id = ? AND version_id = ?`)
.run(repositoryId, versionId);
}
this.db

View File

@@ -36,10 +36,10 @@ function normalizeStatuses(status?: JobStatusFilter): Array<IndexingJob['status'
return [...new Set(statuses)];
}
function buildJobFilterQuery(options?: {
repositoryId?: string;
status?: JobStatusFilter;
}): { where: string; params: unknown[] } {
function buildJobFilterQuery(options?: { repositoryId?: string; status?: JobStatusFilter }): {
where: string;
params: unknown[];
} {
const conditions: string[] = [];
const params: unknown[] = [];
@@ -164,7 +164,9 @@ export class JobQueue {
*/
private async processNext(): Promise<void> {
// Fallback path: no worker pool configured, run directly (used by tests and dev mode)
console.warn('[JobQueue] Running in fallback mode (no worker pool) — direct pipeline execution.');
console.warn(
'[JobQueue] Running in fallback mode (no worker pool) — direct pipeline execution.'
);
const rawJob = this.db
.prepare<[], IndexingJobEntity>(
@@ -176,7 +178,9 @@ export class JobQueue {
if (!rawJob) return;
console.warn('[JobQueue] processNext: no pipeline or pool configured — skipping job processing');
console.warn(
'[JobQueue] processNext: no pipeline or pool configured — skipping job processing'
);
}
/**

View File

@@ -181,7 +181,9 @@ describe('ProgressBroadcaster', () => {
concurrency: 2,
active: 1,
idle: 1,
workers: [{ index: 0, state: 'running', jobId: 'job-1', repositoryId: '/repo/1', versionId: null }]
workers: [
{ index: 0, state: 'running', jobId: 'job-1', repositoryId: '/repo/1', versionId: null }
]
});
const { value } = await reader.read();

View File

@@ -19,6 +19,7 @@ import { WorkerPool } from './worker-pool.js';
import { initBroadcaster } from './progress-broadcaster.js';
import type { ProgressBroadcaster } from './progress-broadcaster.js';
import path from 'node:path';
import { existsSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
// ---------------------------------------------------------------------------
@@ -57,6 +58,21 @@ let _pipeline: IndexingPipeline | null = null;
let _pool: WorkerPool | null = null;
let _broadcaster: ProgressBroadcaster | null = null;
function resolveWorkerScript(...segments: string[]): string {
const candidates = [
path.resolve(process.cwd(), ...segments),
path.resolve(path.dirname(fileURLToPath(import.meta.url)), '../../../../', ...segments)
];
for (const candidate of candidates) {
if (existsSync(candidate)) {
return candidate;
}
}
return candidates[0];
}
/**
* Initialise (or return the existing) JobQueue + IndexingPipeline pair.
*
@@ -91,19 +107,17 @@ export function initializePipeline(
const getRepositoryIdForJob = (jobId: string): string => {
const row = db
.prepare<[string], { repository_id: string }>(
`SELECT repository_id FROM indexing_jobs WHERE id = ?`
)
.prepare<
[string],
{ repository_id: string }
>(`SELECT repository_id FROM indexing_jobs WHERE id = ?`)
.get(jobId);
return row?.repository_id ?? '';
};
// Resolve worker script paths relative to this file (build/workers/ directory)
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const workerScript = path.join(__dirname, '../../../build/workers/worker-entry.mjs');
const embedWorkerScript = path.join(__dirname, '../../../build/workers/embed-worker-entry.mjs');
const writeWorkerScript = path.join(__dirname, '../../../build/workers/write-worker-entry.mjs');
const workerScript = resolveWorkerScript('build', 'workers', 'worker-entry.mjs');
const embedWorkerScript = resolveWorkerScript('build', 'workers', 'embed-worker-entry.mjs');
const writeWorkerScript = resolveWorkerScript('build', 'workers', 'write-worker-entry.mjs');
try {
_pool = new WorkerPool({
@@ -113,13 +127,6 @@ export function initializePipeline(
writeWorkerScript,
dbPath: options.dbPath,
onProgress: (jobId, msg) => {
// Update DB with progress
db.prepare(
`UPDATE indexing_jobs
SET stage = ?, stage_detail = ?, progress = ?, processed_files = ?, total_files = ?
WHERE id = ?`
).run(msg.stage, msg.stageDetail ?? null, msg.progress, msg.processedFiles, msg.totalFiles, jobId);
// Broadcast progress event
if (_broadcaster) {
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-progress', {
@@ -129,11 +136,6 @@ export function initializePipeline(
}
},
onJobDone: (jobId: string) => {
// Update job status to done
db.prepare(`UPDATE indexing_jobs SET status = 'done', completed_at = unixepoch() WHERE id = ?`).run(
jobId
);
// Broadcast done event
if (_broadcaster) {
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-done', {
@@ -143,11 +145,6 @@ export function initializePipeline(
}
},
onJobFailed: (jobId: string, error: string) => {
// Update job status to failed with error message
db.prepare(
`UPDATE indexing_jobs SET status = 'failed', error = ?, completed_at = unixepoch() WHERE id = ?`
).run(error, jobId);
// Broadcast failed event
if (_broadcaster) {
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-failed', {
@@ -231,5 +228,3 @@ export function _resetSingletons(): void {
_pool = null;
_broadcaster = null;
}

View File

@@ -5,24 +5,175 @@ import { crawl as githubCrawl } from '$lib/server/crawler/github.crawler.js';
import { LocalCrawler } from '$lib/server/crawler/local.crawler.js';
import { IndexingJobMapper } from '$lib/server/mappers/indexing-job.mapper.js';
import { IndexingJobEntity, type IndexingJobEntityProps } from '$lib/server/models/indexing-job.js';
import type { ParseWorkerRequest, ParseWorkerResponse, WorkerInitData } from './worker-types.js';
import { applySqlitePragmas } from '$lib/server/db/connection.js';
import type {
ParseWorkerRequest,
ParseWorkerResponse,
SerializedDocument,
SerializedSnippet,
WorkerInitData
} from './worker-types.js';
import type { IndexingStage } from '$lib/types.js';
const { dbPath } = workerData as WorkerInitData;
const db = new Database(dbPath);
db.pragma('journal_mode = WAL');
db.pragma('foreign_keys = ON');
db.pragma('busy_timeout = 5000');
db.pragma('synchronous = NORMAL');
db.pragma('cache_size = -65536');
db.pragma('temp_store = MEMORY');
db.pragma('mmap_size = 268435456');
db.pragma('wal_autocheckpoint = 1000');
applySqlitePragmas(db);
const pipeline = new IndexingPipeline(db, githubCrawl, new LocalCrawler(), null);
let pendingWrite: {
jobId: string;
resolve: () => void;
reject: (error: Error) => void;
} | null = null;
function serializeDocument(document: {
id: string;
repositoryId: string;
versionId?: string | null;
filePath: string;
title?: string | null;
language?: string | null;
tokenCount?: number | null;
checksum: string;
indexedAt: Date;
}): SerializedDocument {
return {
id: document.id,
repositoryId: document.repositoryId,
versionId: document.versionId ?? null,
filePath: document.filePath,
title: document.title ?? null,
language: document.language ?? null,
tokenCount: document.tokenCount ?? 0,
checksum: document.checksum,
indexedAt: Math.floor(document.indexedAt.getTime() / 1000)
};
}
function serializeSnippet(snippet: {
id: string;
documentId: string;
repositoryId: string;
versionId?: string | null;
type: 'code' | 'info';
title?: string | null;
content: string;
language?: string | null;
breadcrumb?: string | null;
tokenCount?: number | null;
createdAt: Date;
}): SerializedSnippet {
return {
id: snippet.id,
documentId: snippet.documentId,
repositoryId: snippet.repositoryId,
versionId: snippet.versionId ?? null,
type: snippet.type,
title: snippet.title ?? null,
content: snippet.content,
language: snippet.language ?? null,
breadcrumb: snippet.breadcrumb ?? null,
tokenCount: snippet.tokenCount ?? 0,
createdAt: Math.floor(snippet.createdAt.getTime() / 1000)
};
}
function requestWrite(
message: Extract<
ParseWorkerResponse,
{
type:
| 'write_replace'
| 'write_clone'
| 'write_repo_update'
| 'write_version_update'
| 'write_repo_config';
}
>
): Promise<void> {
if (pendingWrite) {
return Promise.reject(new Error(`write request already in flight for ${pendingWrite.jobId}`));
}
return new Promise((resolve, reject) => {
pendingWrite = {
jobId: message.jobId,
resolve: () => {
pendingWrite = null;
resolve();
},
reject: (error: Error) => {
pendingWrite = null;
reject(error);
}
};
parentPort!.postMessage(message);
});
}
const pipeline = new IndexingPipeline(db, githubCrawl, new LocalCrawler(), null, {
persistJobUpdates: false,
replaceSnippets: async (changedDocIds, newDocuments, newSnippets) => {
await requestWrite({
type: 'write_replace',
jobId: currentJobId ?? 'unknown',
changedDocIds,
documents: newDocuments.map(serializeDocument),
snippets: newSnippets.map(serializeSnippet)
});
},
cloneFromAncestor: async (request) => {
await requestWrite({
type: 'write_clone',
jobId: currentJobId ?? 'unknown',
ancestorVersionId: request.ancestorVersionId,
targetVersionId: request.targetVersionId,
repositoryId: request.repositoryId,
unchangedPaths: request.unchangedPaths
});
},
updateRepo: async (repositoryId, fields) => {
await requestWrite({
type: 'write_repo_update',
jobId: currentJobId ?? 'unknown',
repositoryId,
fields
});
},
updateVersion: async (versionId, fields) => {
await requestWrite({
type: 'write_version_update',
jobId: currentJobId ?? 'unknown',
versionId,
fields
});
},
upsertRepoConfig: async (repositoryId, versionId, rules) => {
await requestWrite({
type: 'write_repo_config',
jobId: currentJobId ?? 'unknown',
repositoryId,
versionId,
rules
});
}
});
let currentJobId: string | null = null;
parentPort!.on('message', async (msg: ParseWorkerRequest) => {
if (msg.type === 'write_ack') {
if (pendingWrite?.jobId === msg.jobId) {
pendingWrite.resolve();
}
return;
}
if (msg.type === 'write_error') {
if (pendingWrite?.jobId === msg.jobId) {
pendingWrite.reject(new Error(msg.error));
}
return;
}
if (msg.type === 'shutdown') {
db.close();
process.exit(0);
@@ -35,11 +186,19 @@ parentPort!.on('message', async (msg: ParseWorkerRequest) => {
if (!rawJob) {
throw new Error(`Job ${msg.jobId} not found`);
}
const job = IndexingJobMapper.fromEntity(new IndexingJobEntity(rawJob as IndexingJobEntityProps));
const job = IndexingJobMapper.fromEntity(
new IndexingJobEntity(rawJob as IndexingJobEntityProps)
);
await pipeline.run(
job,
(stage: IndexingStage, detail?: string, progress?: number, processedFiles?: number, totalFiles?: number) => {
(
stage: IndexingStage,
detail?: string,
progress?: number,
processedFiles?: number,
totalFiles?: number
) => {
parentPort!.postMessage({
type: 'progress',
jobId: msg.jobId,

View File

@@ -8,7 +8,6 @@
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { writeFileSync, unlinkSync, existsSync } from 'node:fs';
import { EventEmitter } from 'node:events';
// ---------------------------------------------------------------------------
// Hoist FakeWorker + registry so vi.mock can reference them.
@@ -36,7 +35,7 @@ const { createdWorkers, FakeWorker } = vi.hoisted(() => {
this.threadId = 0;
});
constructor(_script: string, _opts?: unknown) {
constructor() {
super();
createdWorkers.push(this);
}
@@ -67,6 +66,7 @@ function makeOpts(overrides: Partial<WorkerPoolOptions> = {}): WorkerPoolOptions
concurrency: 2,
workerScript: FAKE_SCRIPT,
embedWorkerScript: MISSING_SCRIPT,
writeWorkerScript: MISSING_SCRIPT,
dbPath: ':memory:',
onProgress: vi.fn(),
onJobDone: vi.fn(),
@@ -142,6 +142,12 @@ describe('WorkerPool normal mode', () => {
expect(createdWorkers).toHaveLength(3);
});
it('spawns a write worker when writeWorkerScript exists', () => {
new WorkerPool(makeOpts({ concurrency: 2, writeWorkerScript: FAKE_SCRIPT }));
expect(createdWorkers).toHaveLength(3);
});
// -------------------------------------------------------------------------
// enqueue dispatches to an idle worker
// -------------------------------------------------------------------------
@@ -208,8 +214,12 @@ describe('WorkerPool normal mode', () => {
const runCalls = createdWorkers.flatMap((w) =>
w.postMessage.mock.calls.filter((c) => (c[0] as { type: string })?.type === 'run')
);
expect(runCalls.filter((c) => (c[0] as unknown as { jobId: string }).jobId === 'job-1')).toHaveLength(1);
expect(runCalls.filter((c) => (c[0] as unknown as { jobId: string }).jobId === 'job-2')).toHaveLength(0);
expect(
runCalls.filter((c) => (c[0] as unknown as { jobId: string }).jobId === 'job-1')
).toHaveLength(1);
expect(
runCalls.filter((c) => (c[0] as unknown as { jobId: string }).jobId === 'job-2')
).toHaveLength(0);
});
it('starts jobs for different repos concurrently', () => {
@@ -227,6 +237,83 @@ describe('WorkerPool normal mode', () => {
expect(dispatchedIds).toContain('job-beta');
});
it('dispatches same-repo jobs concurrently when versionIds differ', () => {
const pool = new WorkerPool(makeOpts({ concurrency: 2 }));
pool.enqueue('job-v1', '/repo/same', 'v1');
pool.enqueue('job-v2', '/repo/same', 'v2');
const runCalls = createdWorkers.flatMap((w) =>
w.postMessage.mock.calls.filter((c) => (c[0] as { type: string })?.type === 'run')
);
const dispatchedIds = runCalls.map((c) => (c[0] as unknown as { jobId: string }).jobId);
expect(dispatchedIds).toContain('job-v1');
expect(dispatchedIds).toContain('job-v2');
});
it('forwards write worker acknowledgements back to the originating parse worker', () => {
new WorkerPool(makeOpts({ concurrency: 1, writeWorkerScript: FAKE_SCRIPT }));
const parseWorker = createdWorkers[0];
const writeWorker = createdWorkers[1];
writeWorker.emit('message', { type: 'ready' });
parseWorker.emit('message', {
type: 'write_replace',
jobId: 'job-write',
changedDocIds: [],
documents: [],
snippets: []
});
writeWorker.emit('message', { type: 'write_ack', jobId: 'job-write' });
expect(writeWorker.postMessage).toHaveBeenCalledWith({
type: 'write_replace',
jobId: 'job-write',
changedDocIds: [],
documents: [],
snippets: []
});
expect(parseWorker.postMessage).toHaveBeenCalledWith({ type: 'write_ack', jobId: 'job-write' });
});
it('forwards write worker acknowledgements back to the embed worker', () => {
new WorkerPool(
makeOpts({
concurrency: 1,
writeWorkerScript: FAKE_SCRIPT,
embedWorkerScript: FAKE_SCRIPT,
embeddingProfileId: 'local-default'
})
);
const parseWorker = createdWorkers[0];
const embedWorker = createdWorkers[1];
const writeWorker = createdWorkers[2];
writeWorker.emit('message', { type: 'ready' });
embedWorker.emit('message', { type: 'ready' });
embedWorker.emit('message', {
type: 'write_embeddings',
jobId: 'job-embed',
embeddings: []
});
writeWorker.emit('message', { type: 'write_ack', jobId: 'job-embed', embeddingCount: 0 });
expect(parseWorker.postMessage).not.toHaveBeenCalledWith({
type: 'write_ack',
jobId: 'job-embed'
});
expect(writeWorker.postMessage).toHaveBeenCalledWith({
type: 'write_embeddings',
jobId: 'job-embed',
embeddings: []
});
expect(embedWorker.postMessage).toHaveBeenCalledWith({
type: 'write_ack',
jobId: 'job-embed',
embeddingCount: 0
});
});
// -------------------------------------------------------------------------
// Worker crash (exit code != 0)
// -------------------------------------------------------------------------
@@ -248,7 +335,7 @@ describe('WorkerPool normal mode', () => {
it('does NOT call onJobFailed when a worker exits cleanly (code 0)', () => {
const opts = makeOpts({ concurrency: 1 });
const pool = new WorkerPool(opts);
new WorkerPool(opts);
// Exit without any running job
const worker = createdWorkers[0];

View File

@@ -6,9 +6,12 @@ import type {
EmbedWorkerRequest,
EmbedWorkerResponse,
WorkerInitData,
WriteWorkerRequest,
WriteWorkerResponse
} from './worker-types.js';
type InFlightWriteRequest = Exclude<WriteWorkerRequest, { type: 'shutdown' }>;
export interface WorkerPoolOptions {
concurrency: number;
workerScript: string;
@@ -68,6 +71,7 @@ export class WorkerPool {
private runningJobs = new Map<Worker, RunningJob>();
private runningJobKeys = new Set<string>();
private embedQueue: EmbedQueuedJob[] = [];
private pendingWriteWorkers = new Map<string, Worker>();
private options: WorkerPoolOptions;
private fallbackMode = false;
private shuttingDown = false;
@@ -179,7 +183,11 @@ export class WorkerPool {
const job = this.jobQueue.splice(jobIdx, 1)[0];
const worker = this.idleWorkers.pop()!;
this.runningJobs.set(worker, { jobId: job.jobId, repositoryId: job.repositoryId, versionId: job.versionId });
this.runningJobs.set(worker, {
jobId: job.jobId,
repositoryId: job.repositoryId,
versionId: job.versionId
});
this.runningJobKeys.add(WorkerPool.jobKey(job.repositoryId, job.versionId));
statusChanged = true;
@@ -192,14 +200,66 @@ export class WorkerPool {
}
}
private postWriteRequest(request: InFlightWriteRequest, worker?: Worker): void {
if (!this.writeWorker || !this.writeReady) {
if (worker) {
worker.postMessage({
type: 'write_error',
jobId: request.jobId,
error: 'Write worker is not ready'
} satisfies ParseWorkerRequest);
}
return;
}
if (worker) {
this.pendingWriteWorkers.set(request.jobId, worker);
}
this.writeWorker.postMessage(request);
}
private onWorkerMessage(worker: Worker, msg: ParseWorkerResponse): void {
if (msg.type === 'progress') {
this.postWriteRequest({
type: 'write_job_update',
jobId: msg.jobId,
fields: {
status: 'running',
startedAt: Math.floor(Date.now() / 1000),
stage: msg.stage,
stageDetail: msg.stageDetail ?? null,
progress: msg.progress,
processedFiles: msg.processedFiles,
totalFiles: msg.totalFiles
}
});
this.options.onProgress(msg.jobId, msg);
} else if (
msg.type === 'write_replace' ||
msg.type === 'write_clone' ||
msg.type === 'write_repo_update' ||
msg.type === 'write_version_update' ||
msg.type === 'write_repo_config'
) {
this.postWriteRequest(msg, worker);
} else if (msg.type === 'done') {
const runningJob = this.runningJobs.get(worker);
this.postWriteRequest({
type: 'write_job_update',
jobId: msg.jobId,
fields: {
status: 'done',
stage: 'done',
progress: 100,
completedAt: Math.floor(Date.now() / 1000)
}
});
if (runningJob) {
this.runningJobs.delete(worker);
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
this.runningJobKeys.delete(
WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId)
);
}
this.idleWorkers.push(worker);
this.options.onJobDone(msg.jobId);
@@ -207,20 +267,32 @@ export class WorkerPool {
// If embedding configured, enqueue embed request
if (this.embedWorker && this.options.embeddingProfileId) {
const runningJobData = runningJob || { jobId: msg.jobId, repositoryId: '', versionId: null };
this.enqueueEmbed(
msg.jobId,
runningJobData.repositoryId,
runningJobData.versionId ?? null
);
const runningJobData = runningJob || {
jobId: msg.jobId,
repositoryId: '',
versionId: null
};
this.enqueueEmbed(msg.jobId, runningJobData.repositoryId, runningJobData.versionId ?? null);
}
this.dispatch();
} else if (msg.type === 'failed') {
const runningJob = this.runningJobs.get(worker);
this.postWriteRequest({
type: 'write_job_update',
jobId: msg.jobId,
fields: {
status: 'failed',
stage: 'failed',
error: msg.error,
completedAt: Math.floor(Date.now() / 1000)
}
});
if (runningJob) {
this.runningJobs.delete(worker);
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
this.runningJobKeys.delete(
WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId)
);
}
this.idleWorkers.push(worker);
this.options.onJobFailed(msg.jobId, msg.error);
@@ -273,6 +345,22 @@ export class WorkerPool {
this.embedReady = true;
// Process any queued embed requests
this.processEmbedQueue();
} else if (msg.type === 'write_embeddings') {
const embedWorker = this.embedWorker;
if (!embedWorker) {
return;
}
if (!this.writeWorker || !this.writeReady) {
embedWorker.postMessage({
type: 'write_error',
jobId: msg.jobId,
error: 'Write worker is not ready'
} satisfies EmbedWorkerRequest);
return;
}
this.postWriteRequest(msg, embedWorker);
} else if (msg.type === 'embed-progress') {
// Progress message - could be tracked but not strictly required
} else if (msg.type === 'embed-done') {
@@ -288,6 +376,12 @@ export class WorkerPool {
return;
}
const worker = this.pendingWriteWorkers.get(msg.jobId);
if (worker) {
this.pendingWriteWorkers.delete(msg.jobId);
worker.postMessage(msg satisfies ParseWorkerRequest);
}
if (msg.type === 'write_error') {
console.error('[WorkerPool] Write worker failed for job:', msg.jobId, msg.error);
}
@@ -433,6 +527,7 @@ export class WorkerPool {
this.idleWorkers = [];
this.embedWorker = null;
this.writeWorker = null;
this.pendingWriteWorkers.clear();
this.emitStatusChanged();
}

View File

@@ -2,29 +2,58 @@ import type { IndexingStage } from '$lib/types.js';
export type ParseWorkerRequest =
| { type: 'run'; jobId: string }
| { type: 'write_ack'; jobId: string }
| { type: 'write_error'; jobId: string; error: string }
| { type: 'shutdown' };
export type ParseWorkerResponse =
| { type: 'progress'; jobId: string; stage: IndexingStage; stageDetail?: string; progress: number; processedFiles: number; totalFiles: number }
| {
type: 'progress';
jobId: string;
stage: IndexingStage;
stageDetail?: string;
progress: number;
processedFiles: number;
totalFiles: number;
}
| { type: 'done'; jobId: string }
| { type: 'failed'; jobId: string; error: string };
| { type: 'failed'; jobId: string; error: string }
| WriteReplaceRequest
| WriteCloneRequest
| WriteRepoUpdateRequest
| WriteVersionUpdateRequest
| WriteRepoConfigRequest;
export type EmbedWorkerRequest =
| { type: 'embed'; jobId: string; repositoryId: string; versionId: string | null }
| {
type: 'write_ack';
jobId: string;
documentCount?: number;
snippetCount?: number;
embeddingCount?: number;
}
| { type: 'write_error'; jobId: string; error: string }
| { type: 'shutdown' };
export type EmbedWorkerResponse =
| { type: 'ready' }
| { type: 'embed-progress'; jobId: string; done: number; total: number }
| { type: 'embed-done'; jobId: string }
| { type: 'embed-failed'; jobId: string; error: string };
| { type: 'embed-failed'; jobId: string; error: string }
| WriteEmbeddingsRequest;
export type WriteWorkerRequest = WriteRequest | { type: 'shutdown' };
export type WriteWorkerRequest =
| ReplaceWriteRequest
| CloneWriteRequest
| JobUpdateWriteRequest
| RepoUpdateWriteRequest
| VersionUpdateWriteRequest
| RepoConfigWriteRequest
| EmbeddingsWriteRequest
| { type: 'shutdown' };
export type WriteWorkerResponse =
| { type: 'ready' }
| WriteAck
| WriteError;
export type WriteWorkerResponse = { type: 'ready' } | WriteAck | WriteError;
export interface WorkerInitData {
dbPath: string;
@@ -58,18 +87,84 @@ export interface SerializedSnippet {
createdAt: number;
}
export type WriteRequest = {
type: 'write';
export interface SerializedEmbedding {
snippetId: string;
profileId: string;
model: string;
dimensions: number;
embedding: Uint8Array;
}
export type SerializedFieldValue = string | number | null;
export type SerializedFields = Record<string, SerializedFieldValue>;
export type ReplaceWriteRequest = {
type: 'write_replace';
jobId: string;
changedDocIds: string[];
documents: SerializedDocument[];
snippets: SerializedSnippet[];
};
export type CloneWriteRequest = {
type: 'write_clone';
jobId: string;
ancestorVersionId: string;
targetVersionId: string;
repositoryId: string;
unchangedPaths: string[];
};
export type WriteReplaceRequest = ReplaceWriteRequest;
export type WriteCloneRequest = CloneWriteRequest;
export type EmbeddingsWriteRequest = {
type: 'write_embeddings';
jobId: string;
embeddings: SerializedEmbedding[];
};
export type RepoUpdateWriteRequest = {
type: 'write_repo_update';
jobId: string;
repositoryId: string;
fields: SerializedFields;
};
export type VersionUpdateWriteRequest = {
type: 'write_version_update';
jobId: string;
versionId: string;
fields: SerializedFields;
};
export type RepoConfigWriteRequest = {
type: 'write_repo_config';
jobId: string;
repositoryId: string;
versionId: string | null;
rules: string[];
};
export type JobUpdateWriteRequest = {
type: 'write_job_update';
jobId: string;
fields: SerializedFields;
};
export type WriteEmbeddingsRequest = EmbeddingsWriteRequest;
export type WriteRepoUpdateRequest = RepoUpdateWriteRequest;
export type WriteVersionUpdateRequest = VersionUpdateWriteRequest;
export type WriteRepoConfigRequest = RepoConfigWriteRequest;
export type WriteAck = {
type: 'write_ack';
jobId: string;
documentCount: number;
snippetCount: number;
documentCount?: number;
snippetCount?: number;
embeddingCount?: number;
};
export type WriteError = {

View File

@@ -0,0 +1,343 @@
import { randomUUID } from 'node:crypto';
import type Database from 'better-sqlite3';
import type { NewDocument, NewSnippet } from '$lib/types';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
import type {
SerializedDocument,
SerializedEmbedding,
SerializedFields,
SerializedSnippet
} from './worker-types.js';
type DocumentLike = Pick<
NewDocument,
| 'id'
| 'repositoryId'
| 'versionId'
| 'filePath'
| 'title'
| 'language'
| 'tokenCount'
| 'checksum'
> & {
indexedAt: Date | number;
};
type SnippetLike = Pick<
NewSnippet,
| 'id'
| 'documentId'
| 'repositoryId'
| 'versionId'
| 'type'
| 'title'
| 'content'
| 'language'
| 'breadcrumb'
| 'tokenCount'
> & {
createdAt: Date | number;
};
export interface CloneFromAncestorRequest {
ancestorVersionId: string;
targetVersionId: string;
repositoryId: string;
unchangedPaths: string[];
}
export interface PersistedEmbedding {
snippetId: string;
profileId: string;
model: string;
dimensions: number;
embedding: Buffer | Uint8Array;
}
function toEpochSeconds(value: Date | number): number {
return value instanceof Date ? Math.floor(value.getTime() / 1000) : value;
}
function toSnake(key: string): string {
return key.replace(/[A-Z]/g, (char) => `_${char.toLowerCase()}`);
}
function replaceSnippetsInternal(
db: Database.Database,
changedDocIds: string[],
newDocuments: DocumentLike[],
newSnippets: SnippetLike[]
): void {
const sqliteVecStore = new SqliteVecStore(db);
const insertDoc = db.prepare(
`INSERT INTO documents
(id, repository_id, version_id, file_path, title, language,
token_count, checksum, indexed_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
);
const insertSnippet = db.prepare(
`INSERT INTO snippets
(id, document_id, repository_id, version_id, type, title,
content, language, breadcrumb, token_count, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
);
db.transaction(() => {
sqliteVecStore.deleteEmbeddingsForDocumentIds(changedDocIds);
if (changedDocIds.length > 0) {
const placeholders = changedDocIds.map(() => '?').join(',');
db.prepare(`DELETE FROM documents WHERE id IN (${placeholders})`).run(...changedDocIds);
}
for (const doc of newDocuments) {
insertDoc.run(
doc.id,
doc.repositoryId,
doc.versionId ?? null,
doc.filePath,
doc.title ?? null,
doc.language ?? null,
doc.tokenCount ?? 0,
doc.checksum,
toEpochSeconds(doc.indexedAt)
);
}
for (const snippet of newSnippets) {
insertSnippet.run(
snippet.id,
snippet.documentId,
snippet.repositoryId,
snippet.versionId ?? null,
snippet.type,
snippet.title ?? null,
snippet.content,
snippet.language ?? null,
snippet.breadcrumb ?? null,
snippet.tokenCount ?? 0,
toEpochSeconds(snippet.createdAt)
);
}
})();
}
export function replaceSnippets(
db: Database.Database,
changedDocIds: string[],
newDocuments: NewDocument[],
newSnippets: NewSnippet[]
): void {
replaceSnippetsInternal(db, changedDocIds, newDocuments, newSnippets);
}
export function replaceSerializedSnippets(
db: Database.Database,
changedDocIds: string[],
documents: SerializedDocument[],
snippets: SerializedSnippet[]
): void {
replaceSnippetsInternal(db, changedDocIds, documents, snippets);
}
export function cloneFromAncestor(db: Database.Database, request: CloneFromAncestorRequest): void {
const sqliteVecStore = new SqliteVecStore(db);
const { ancestorVersionId, targetVersionId, repositoryId, unchangedPaths } = request;
db.transaction(() => {
const pathList = [...unchangedPaths];
if (pathList.length === 0) {
return;
}
const placeholders = pathList.map(() => '?').join(',');
const ancestorDocs = db
.prepare(`SELECT * FROM documents WHERE version_id = ? AND file_path IN (${placeholders})`)
.all(ancestorVersionId, ...pathList) as Array<{
id: string;
repository_id: string;
file_path: string;
title: string | null;
language: string | null;
token_count: number;
checksum: string;
indexed_at: number;
}>;
const docIdMap = new Map<string, string>();
const nowEpoch = Math.floor(Date.now() / 1000);
for (const doc of ancestorDocs) {
const newDocId = randomUUID();
docIdMap.set(doc.id, newDocId);
db.prepare(
`INSERT INTO documents (id, repository_id, version_id, file_path, title, language, token_count, checksum, indexed_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
).run(
newDocId,
repositoryId,
targetVersionId,
doc.file_path,
doc.title,
doc.language,
doc.token_count,
doc.checksum,
nowEpoch
);
}
if (docIdMap.size === 0) return;
const oldDocIds = [...docIdMap.keys()];
const snippetPlaceholders = oldDocIds.map(() => '?').join(',');
const ancestorSnippets = db
.prepare(`SELECT * FROM snippets WHERE document_id IN (${snippetPlaceholders})`)
.all(...oldDocIds) as Array<{
id: string;
document_id: string;
repository_id: string;
version_id: string | null;
type: string;
title: string | null;
content: string;
language: string | null;
breadcrumb: string | null;
token_count: number;
created_at: number;
}>;
const snippetIdMap = new Map<string, string>();
for (const snippet of ancestorSnippets) {
const newSnippetId = randomUUID();
snippetIdMap.set(snippet.id, newSnippetId);
const newDocId = docIdMap.get(snippet.document_id)!;
db.prepare(
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, title, content, language, breadcrumb, token_count, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
).run(
newSnippetId,
newDocId,
repositoryId,
targetVersionId,
snippet.type,
snippet.title,
snippet.content,
snippet.language,
snippet.breadcrumb,
snippet.token_count,
snippet.created_at
);
}
if (snippetIdMap.size === 0) {
return;
}
const oldSnippetIds = [...snippetIdMap.keys()];
const embPlaceholders = oldSnippetIds.map(() => '?').join(',');
const ancestorEmbeddings = db
.prepare(`SELECT * FROM snippet_embeddings WHERE snippet_id IN (${embPlaceholders})`)
.all(...oldSnippetIds) as Array<{
snippet_id: string;
profile_id: string;
model: string;
dimensions: number;
embedding: Buffer;
created_at: number;
}>;
for (const emb of ancestorEmbeddings) {
const newSnippetId = snippetIdMap.get(emb.snippet_id)!;
db.prepare(
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
VALUES (?, ?, ?, ?, ?, ?)`
).run(newSnippetId, emb.profile_id, emb.model, emb.dimensions, emb.embedding, emb.created_at);
sqliteVecStore.upsertEmbeddingBuffer(
emb.profile_id,
newSnippetId,
emb.embedding,
emb.dimensions
);
}
})();
}
export function upsertEmbeddings(db: Database.Database, embeddings: PersistedEmbedding[]): void {
if (embeddings.length === 0) {
return;
}
const sqliteVecStore = new SqliteVecStore(db);
const insert = db.prepare<[string, string, string, number, Buffer]>(`
INSERT OR REPLACE INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
VALUES (?, ?, ?, ?, ?, unixepoch())
`);
db.transaction(() => {
for (const item of embeddings) {
const embeddingBuffer = Buffer.isBuffer(item.embedding)
? item.embedding
: Buffer.from(item.embedding);
insert.run(item.snippetId, item.profileId, item.model, item.dimensions, embeddingBuffer);
sqliteVecStore.upsertEmbeddingBuffer(
item.profileId,
item.snippetId,
embeddingBuffer,
item.dimensions
);
}
})();
}
export function upsertSerializedEmbeddings(
db: Database.Database,
embeddings: SerializedEmbedding[]
): void {
upsertEmbeddings(
db,
embeddings.map((item) => ({
snippetId: item.snippetId,
profileId: item.profileId,
model: item.model,
dimensions: item.dimensions,
embedding: item.embedding
}))
);
}
export function updateRepo(
db: Database.Database,
repositoryId: string,
fields: SerializedFields
): void {
const now = Math.floor(Date.now() / 1000);
const allFields = { ...fields, updatedAt: now };
const sets = Object.keys(allFields)
.map((key) => `${toSnake(key)} = ?`)
.join(', ');
const values = [...Object.values(allFields), repositoryId];
db.prepare(`UPDATE repositories SET ${sets} WHERE id = ?`).run(...values);
}
export function updateJob(db: Database.Database, jobId: string, fields: SerializedFields): void {
const sets = Object.keys(fields)
.map((key) => `${toSnake(key)} = ?`)
.join(', ');
const values = [...Object.values(fields), jobId];
db.prepare(`UPDATE indexing_jobs SET ${sets} WHERE id = ?`).run(...values);
}
export function updateVersion(
db: Database.Database,
versionId: string,
fields: SerializedFields
): void {
const sets = Object.keys(fields)
.map((key) => `${toSnake(key)} = ?`)
.join(', ');
const values = [...Object.values(fields), versionId];
db.prepare(`UPDATE repository_versions SET ${sets} WHERE id = ?`).run(...values);
}

View File

@@ -1,67 +1,21 @@
import { workerData, parentPort } from 'node:worker_threads';
import Database from 'better-sqlite3';
import type {
SerializedDocument,
SerializedSnippet,
WorkerInitData,
WriteWorkerRequest,
WriteWorkerResponse
} from './worker-types.js';
import { applySqlitePragmas } from '$lib/server/db/connection.js';
import { loadSqliteVec } from '$lib/server/db/sqlite-vec.js';
import type { WorkerInitData, WriteWorkerRequest, WriteWorkerResponse } from './worker-types.js';
import {
cloneFromAncestor,
replaceSerializedSnippets,
updateJob,
updateRepo,
updateVersion,
upsertSerializedEmbeddings
} from './write-operations.js';
const { dbPath } = workerData as WorkerInitData;
const db = new Database(dbPath);
db.pragma('journal_mode = WAL');
db.pragma('foreign_keys = ON');
db.pragma('busy_timeout = 5000');
db.pragma('synchronous = NORMAL');
db.pragma('cache_size = -65536');
db.pragma('temp_store = MEMORY');
db.pragma('mmap_size = 268435456');
db.pragma('wal_autocheckpoint = 1000');
const insertDocument = db.prepare(
`INSERT OR REPLACE INTO documents
(id, repository_id, version_id, file_path, title, language, token_count, checksum, indexed_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
);
const insertSnippet = db.prepare(
`INSERT OR REPLACE INTO snippets
(id, document_id, repository_id, version_id, type, title, content, language, breadcrumb, token_count, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
);
const writeBatch = db.transaction((documents: SerializedDocument[], snippets: SerializedSnippet[]) => {
for (const document of documents) {
insertDocument.run(
document.id,
document.repositoryId,
document.versionId,
document.filePath,
document.title,
document.language,
document.tokenCount,
document.checksum,
document.indexedAt
);
}
for (const snippet of snippets) {
insertSnippet.run(
snippet.id,
snippet.documentId,
snippet.repositoryId,
snippet.versionId,
snippet.type,
snippet.title,
snippet.content,
snippet.language,
snippet.breadcrumb,
snippet.tokenCount,
snippet.createdAt
);
}
});
applySqlitePragmas(db);
loadSqliteVec(db);
parentPort?.postMessage({ type: 'ready' } satisfies WriteWorkerResponse);
@@ -71,23 +25,145 @@ parentPort?.on('message', (msg: WriteWorkerRequest) => {
process.exit(0);
}
if (msg.type !== 'write') {
if (msg.type === 'write_replace') {
try {
replaceSerializedSnippets(db, msg.changedDocIds, msg.documents, msg.snippets);
parentPort?.postMessage({
type: 'write_ack',
jobId: msg.jobId,
documentCount: msg.documents.length,
snippetCount: msg.snippets.length
} satisfies WriteWorkerResponse);
} catch (error) {
parentPort?.postMessage({
type: 'write_error',
jobId: msg.jobId,
error: error instanceof Error ? error.message : String(error)
} satisfies WriteWorkerResponse);
}
return;
}
try {
writeBatch(msg.documents, msg.snippets);
parentPort?.postMessage({
type: 'write_ack',
jobId: msg.jobId,
documentCount: msg.documents.length,
snippetCount: msg.snippets.length
} satisfies WriteWorkerResponse);
} catch (error) {
parentPort?.postMessage({
type: 'write_error',
jobId: msg.jobId,
error: error instanceof Error ? error.message : String(error)
} satisfies WriteWorkerResponse);
if (msg.type === 'write_clone') {
try {
cloneFromAncestor(db, {
ancestorVersionId: msg.ancestorVersionId,
targetVersionId: msg.targetVersionId,
repositoryId: msg.repositoryId,
unchangedPaths: msg.unchangedPaths
});
parentPort?.postMessage({
type: 'write_ack',
jobId: msg.jobId
} satisfies WriteWorkerResponse);
} catch (error) {
parentPort?.postMessage({
type: 'write_error',
jobId: msg.jobId,
error: error instanceof Error ? error.message : String(error)
} satisfies WriteWorkerResponse);
}
return;
}
});
if (msg.type === 'write_embeddings') {
try {
upsertSerializedEmbeddings(db, msg.embeddings);
parentPort?.postMessage({
type: 'write_ack',
jobId: msg.jobId,
embeddingCount: msg.embeddings.length
} satisfies WriteWorkerResponse);
} catch (error) {
parentPort?.postMessage({
type: 'write_error',
jobId: msg.jobId,
error: error instanceof Error ? error.message : String(error)
} satisfies WriteWorkerResponse);
}
return;
}
if (msg.type === 'write_job_update') {
try {
updateJob(db, msg.jobId, msg.fields);
parentPort?.postMessage({
type: 'write_ack',
jobId: msg.jobId
} satisfies WriteWorkerResponse);
} catch (error) {
parentPort?.postMessage({
type: 'write_error',
jobId: msg.jobId,
error: error instanceof Error ? error.message : String(error)
} satisfies WriteWorkerResponse);
}
return;
}
if (msg.type === 'write_repo_update') {
try {
updateRepo(db, msg.repositoryId, msg.fields);
parentPort?.postMessage({
type: 'write_ack',
jobId: msg.jobId
} satisfies WriteWorkerResponse);
} catch (error) {
parentPort?.postMessage({
type: 'write_error',
jobId: msg.jobId,
error: error instanceof Error ? error.message : String(error)
} satisfies WriteWorkerResponse);
}
return;
}
if (msg.type === 'write_version_update') {
try {
updateVersion(db, msg.versionId, msg.fields);
parentPort?.postMessage({
type: 'write_ack',
jobId: msg.jobId
} satisfies WriteWorkerResponse);
} catch (error) {
parentPort?.postMessage({
type: 'write_error',
jobId: msg.jobId,
error: error instanceof Error ? error.message : String(error)
} satisfies WriteWorkerResponse);
}
return;
}
if (msg.type === 'write_repo_config') {
try {
const now = Math.floor(Date.now() / 1000);
if (msg.versionId === null) {
db.prepare(
`DELETE FROM repository_configs WHERE repository_id = ? AND version_id IS NULL`
).run(msg.repositoryId);
} else {
db.prepare(`DELETE FROM repository_configs WHERE repository_id = ? AND version_id = ?`).run(
msg.repositoryId,
msg.versionId
);
}
db.prepare(
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
VALUES (?, ?, ?, ?)`
).run(msg.repositoryId, msg.versionId, JSON.stringify(msg.rules), now);
parentPort?.postMessage({
type: 'write_ack',
jobId: msg.jobId
} satisfies WriteWorkerResponse);
} catch (error) {
parentPort?.postMessage({
type: 'write_error',
jobId: msg.jobId,
error: error instanceof Error ? error.message : String(error)
} satisfies WriteWorkerResponse);
}
}
});

View File

@@ -383,7 +383,18 @@ describe('VectorSearch', () => {
`INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
)
.run('secondary-profile', 'local-transformers', 'Secondary', 1, 0, 'test-model', 2, '{}', NOW_S, NOW_S);
.run(
'secondary-profile',
'local-transformers',
'Secondary',
1,
0,
'test-model',
2,
'{}',
NOW_S,
NOW_S
);
const defaultSnippet = seedSnippet(client, {
repositoryId: repoId,

View File

@@ -90,17 +90,18 @@ export class SqliteVecStore {
this.ensureProfileStore(profileId, tables.dimensions);
const existingRow = this.db
.prepare<[string], SnippetRowidRow>(
`SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`
)
.prepare<
[string],
SnippetRowidRow
>(`SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`)
.get(snippetId);
const embeddingBuffer = toEmbeddingBuffer(embedding);
if (existingRow) {
this.db
.prepare<[Buffer, number]>(
`UPDATE ${tables.quotedVectorTableName} SET embedding = ? WHERE rowid = ?`
)
.prepare<
[Buffer, number]
>(`UPDATE ${tables.quotedVectorTableName} SET embedding = ? WHERE rowid = ?`)
.run(embeddingBuffer, existingRow.rowid);
return;
}
@@ -109,9 +110,9 @@ export class SqliteVecStore {
.prepare<[Buffer]>(`INSERT INTO ${tables.quotedVectorTableName} (embedding) VALUES (?)`)
.run(embeddingBuffer);
this.db
.prepare<[number, string]>(
`INSERT INTO ${tables.quotedRowidTableName} (rowid, snippet_id) VALUES (?, ?)`
)
.prepare<
[number, string]
>(`INSERT INTO ${tables.quotedRowidTableName} (rowid, snippet_id) VALUES (?, ?)`)
.run(Number(insertResult.lastInsertRowid), snippetId);
}
@@ -134,9 +135,10 @@ export class SqliteVecStore {
this.ensureProfileStore(profileId);
const existingRow = this.db
.prepare<[string], SnippetRowidRow>(
`SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`
)
.prepare<
[string],
SnippetRowidRow
>(`SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`)
.get(snippetId);
if (!existingRow) {
@@ -280,11 +282,7 @@ export class SqliteVecStore {
this.upsertEmbedding(
profileId,
row.snippet_id,
new Float32Array(
row.embedding.buffer,
row.embedding.byteOffset,
tables.dimensions
)
new Float32Array(row.embedding.buffer, row.embedding.byteOffset, tables.dimensions)
);
}
});
@@ -323,9 +321,10 @@ export class SqliteVecStore {
loadSqliteVec(this.db);
const dimensionsRow = this.db
.prepare<[string], ProfileDimensionsRow>(
'SELECT dimensions FROM embedding_profiles WHERE id = ?'
)
.prepare<
[string],
ProfileDimensionsRow
>('SELECT dimensions FROM embedding_profiles WHERE id = ?')
.get(profileId);
if (!dimensionsRow) {
throw new Error(`Embedding profile not found: ${profileId}`);
@@ -377,10 +376,7 @@ export class SqliteVecStore {
throw new Error(`Stored embedding dimensions are missing for profile ${profileId}`);
}
if (
preferredDimensions !== undefined &&
preferredDimensions !== canonicalDimensions
) {
if (preferredDimensions !== undefined && preferredDimensions !== canonicalDimensions) {
throw new Error(
`Embedding dimension mismatch for profile ${profileId}: expected ${canonicalDimensions}, received ${preferredDimensions}`
);
@@ -391,4 +387,4 @@ export class SqliteVecStore {
return preferredDimensions ?? profileDimensions;
}
}
}

View File

@@ -1,6 +1,9 @@
import type Database from 'better-sqlite3';
import type { EmbeddingSettingsUpdateDto } from '$lib/dtos/embedding-settings.js';
import { createProviderFromProfile, getDefaultLocalProfile } from '$lib/server/embeddings/registry.js';
import {
createProviderFromProfile,
getDefaultLocalProfile
} from '$lib/server/embeddings/registry.js';
import { EmbeddingProfileMapper } from '$lib/server/mappers/embedding-profile.mapper.js';
import { EmbeddingProfile, EmbeddingProfileEntity } from '$lib/server/models/embedding-profile.js';
import { EmbeddingSettings } from '$lib/server/models/embedding-settings.js';
@@ -94,7 +97,10 @@ export class EmbeddingSettingsService {
private getCreatedAt(id: string, fallback: number): number {
return (
this.db
.prepare<[string], { created_at: number }>('SELECT created_at FROM embedding_profiles WHERE id = ?')
.prepare<
[string],
{ created_at: number }
>('SELECT created_at FROM embedding_profiles WHERE id = ?')
.get(id)?.created_at ?? fallback
);
}
@@ -128,4 +134,4 @@ export class EmbeddingSettingsService {
profile.updatedAt
);
}
}
}

View File

@@ -11,7 +11,11 @@ import Database from 'better-sqlite3';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { RepositoryService } from './repository.service';
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js';
import {
loadSqliteVec,
sqliteVecRowidTableName,
sqliteVecTableName
} from '$lib/server/db/sqlite-vec.js';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
import {
AlreadyExistsError,
@@ -465,7 +469,11 @@ describe('RepositoryService.getIndexSummary()', () => {
beforeEach(() => {
client = createTestDb();
service = makeService(client);
service.add({ source: 'github', sourceUrl: 'https://github.com/facebook/react', branch: 'main' });
service.add({
source: 'github',
sourceUrl: 'https://github.com/facebook/react',
branch: 'main'
});
});
it('returns embedding counts and indexed version labels', () => {

View File

@@ -10,7 +10,11 @@ import { describe, it, expect } from 'vitest';
import Database from 'better-sqlite3';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js';
import {
loadSqliteVec,
sqliteVecRowidTableName,
sqliteVecTableName
} from '$lib/server/db/sqlite-vec.js';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
import { VersionService } from './version.service';
import { RepositoryService } from './repository.service';
@@ -206,18 +210,24 @@ describe('VersionService.remove()', () => {
const now = Math.floor(Date.now() / 1000);
const vecStore = new SqliteVecStore(client);
client.prepare(
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
client
.prepare(
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
VALUES (?, '/facebook/react', ?, 'README.md', 'version-doc', ?)`
).run(docId, version.id, now);
client.prepare(
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
)
.run(docId, version.id, now);
client
.prepare(
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
VALUES (?, ?, '/facebook/react', ?, 'info', 'version snippet', ?)`
).run(snippetId, docId, version.id, now);
client.prepare(
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
)
.run(snippetId, docId, version.id, now);
client
.prepare(
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
).run(snippetId, Buffer.from(embedding.buffer), now);
)
.run(snippetId, Buffer.from(embedding.buffer), now);
vecStore.upsertEmbedding('local-default', snippetId, embedding);
versionService.remove('/facebook/react', 'v18.3.0');

View File

@@ -9,7 +9,10 @@ import { RepositoryVersion } from '$lib/server/models/repository-version.js';
// Helpers
// ---------------------------------------------------------------------------
function makeVersion(tag: string, state: RepositoryVersion['state'] = 'indexed'): RepositoryVersion {
function makeVersion(
tag: string,
state: RepositoryVersion['state'] = 'indexed'
): RepositoryVersion {
return new RepositoryVersion({
id: `/facebook/react/${tag}`,
repositoryId: '/facebook/react',
@@ -42,21 +45,13 @@ describe('findBestAncestorVersion', () => {
});
it('returns the nearest semver predecessor from a list', () => {
const candidates = [
makeVersion('v1.0.0'),
makeVersion('v1.1.0'),
makeVersion('v2.0.0')
];
const candidates = [makeVersion('v1.0.0'), makeVersion('v1.1.0'), makeVersion('v2.0.0')];
const result = findBestAncestorVersion('v2.1.0', candidates);
expect(result?.tag).toBe('v2.0.0');
});
it('handles v-prefix stripping correctly', () => {
const candidates = [
makeVersion('v1.0.0'),
makeVersion('v1.5.0'),
makeVersion('v2.0.0')
];
const candidates = [makeVersion('v1.0.0'), makeVersion('v1.5.0'), makeVersion('v2.0.0')];
const result = findBestAncestorVersion('v2.0.1', candidates);
expect(result?.tag).toBe('v2.0.0');
});

View File

@@ -31,7 +31,16 @@ export type RepositorySource = 'github' | 'local';
export type RepositoryState = 'pending' | 'indexing' | 'indexed' | 'error';
export type SnippetType = 'code' | 'info';
export type JobStatus = 'queued' | 'running' | 'done' | 'failed';
export type IndexingStage = 'queued' | 'differential' | 'crawling' | 'cloning' | 'parsing' | 'storing' | 'embedding' | 'done' | 'failed';
export type IndexingStage =
| 'queued'
| 'differential'
| 'crawling'
| 'cloning'
| 'parsing'
| 'storing'
| 'embedding'
| 'done'
| 'failed';
export type VersionState = 'pending' | 'indexing' | 'indexed' | 'error';
export type EmbeddingProviderKind = 'local-transformers' | 'openai-compatible';

View File

@@ -38,6 +38,9 @@
<a href={resolveRoute('/search')} class="text-sm text-gray-600 hover:text-gray-900">
Search
</a>
<a href={resolveRoute('/admin/jobs')} class="text-sm text-gray-600 hover:text-gray-900">
Admin
</a>
<a href={resolveRoute('/settings')} class="text-sm text-gray-600 hover:text-gray-900">
Settings
</a>

View File

@@ -95,7 +95,10 @@
}
function filtersDirty(): boolean {
return repositoryInput.trim() !== appliedRepositoryFilter || !sameStatuses(selectedStatuses, appliedStatuses);
return (
repositoryInput.trim() !== appliedRepositoryFilter ||
!sameStatuses(selectedStatuses, appliedStatuses)
);
}
function isSpecificRepositoryId(repositoryId: string): boolean {
@@ -107,7 +110,8 @@
const repositoryFilter = appliedRepositoryFilter;
const repositoryMatches = isSpecificRepositoryId(repositoryFilter)
? job.repositoryId === repositoryFilter
: job.repositoryId === repositoryFilter || job.repositoryId.startsWith(`${repositoryFilter}/`);
: job.repositoryId === repositoryFilter ||
job.repositoryId.startsWith(`${repositoryFilter}/`);
if (!repositoryMatches) {
return false;
@@ -199,8 +203,8 @@
selectedStatuses = selectedStatuses.includes(status)
? selectedStatuses.filter((candidate) => candidate !== status)
: [...selectedStatuses, status].sort(
(left, right) => filterStatuses.indexOf(left) - filterStatuses.indexOf(right)
);
(left, right) => filterStatuses.indexOf(left) - filterStatuses.indexOf(right)
);
}
function applyFilters(event?: SubmitEvent) {
@@ -316,7 +320,10 @@
<WorkerStatusPanel />
<form class="mb-6 rounded-lg border border-gray-200 bg-white p-4 shadow-sm" onsubmit={applyFilters}>
<form
class="mb-6 rounded-lg border border-gray-200 bg-white p-4 shadow-sm"
onsubmit={applyFilters}
>
<div class="flex flex-col gap-4 lg:flex-row lg:items-end lg:justify-between">
<div class="flex-1">
<label class="mb-2 block text-sm font-medium text-gray-700" for="repository-filter">
@@ -327,10 +334,11 @@
type="text"
bind:value={repositoryInput}
placeholder="/owner or /owner/repo"
class="w-full rounded-md border border-gray-300 px-3 py-2 text-sm text-gray-900 shadow-sm focus:border-blue-500 focus:outline-none focus:ring-2 focus:ring-blue-200"
class="w-full rounded-md border border-gray-300 px-3 py-2 text-sm text-gray-900 shadow-sm focus:border-blue-500 focus:ring-2 focus:ring-blue-200 focus:outline-none"
/>
<p class="mt-2 text-xs text-gray-500">
Use an owner prefix like <code>/facebook</code> or a full repository ID like <code>/facebook/react</code>.
Use an owner prefix like <code>/facebook</code> or a full repository ID like
<code>/facebook/react</code>.
</p>
</div>
@@ -341,7 +349,9 @@
<button
type="button"
onclick={() => toggleStatusFilter(status)}
class="rounded-full border px-3 py-1 text-xs font-semibold uppercase transition {selectedStatuses.includes(status)
class="rounded-full border px-3 py-1 text-xs font-semibold uppercase transition {selectedStatuses.includes(
status
)
? 'border-blue-600 bg-blue-50 text-blue-700'
: 'border-gray-300 text-gray-600 hover:border-gray-400 hover:text-gray-900'}"
>
@@ -370,7 +380,9 @@
</div>
</form>
<div class="mb-4 flex flex-col gap-2 text-sm text-gray-600 md:flex-row md:items-center md:justify-between">
<div
class="mb-4 flex flex-col gap-2 text-sm text-gray-600 md:flex-row md:items-center md:justify-between"
>
<p>
Showing <span class="font-semibold text-gray-900">{jobs.length}</span> of
<span class="font-semibold text-gray-900">{total}</span> jobs
@@ -444,103 +456,105 @@
<JobSkeleton rows={6} />
{:else}
{#each jobs as job (job.id)}
<tr class="hover:bg-gray-50">
<td class="px-6 py-4 text-sm font-medium whitespace-nowrap text-gray-900">
{job.repositoryId}
{#if job.versionId}
<span class="ml-1 text-xs text-gray-500">@{job.versionId}</span>
{/if}
<div class="mt-1 text-xs text-gray-400">{job.id}</div>
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
<JobStatusBadge status={job.status} spinning={job.status === 'running'} />
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
<div class="flex items-center gap-2">
<span>{getStageLabel(job.stage)}</span>
{#if job.stageDetail}
<span class="text-xs text-gray-400">{job.stageDetail}</span>
<tr class="hover:bg-gray-50">
<td class="px-6 py-4 text-sm font-medium whitespace-nowrap text-gray-900">
{job.repositoryId}
{#if job.versionId}
<span class="ml-1 text-xs text-gray-500">@{job.versionId}</span>
{/if}
</div>
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
<div class="space-y-2">
<div class="mt-1 text-xs text-gray-400">{job.id}</div>
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
<JobStatusBadge status={job.status} spinning={job.status === 'running'} />
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
<div class="flex items-center gap-2">
<span class="w-12 text-right text-xs font-semibold text-gray-600">{job.progress}%</span>
<div class="h-2 w-32 rounded-full bg-gray-200">
<div
class="h-2 rounded-full bg-blue-600 transition-all"
style="width: {job.progress}%"
></div>
</div>
<span>{getStageLabel(job.stage)}</span>
{#if job.stageDetail}
<span class="text-xs text-gray-400">{job.stageDetail}</span>
{/if}
</div>
{#if job.totalFiles > 0}
<div class="text-xs text-gray-400">
{job.processedFiles}/{job.totalFiles} files processed
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
<div class="space-y-2">
<div class="flex items-center gap-2">
<span class="w-12 text-right text-xs font-semibold text-gray-600"
>{job.progress}%</span
>
<div class="h-2 w-32 rounded-full bg-gray-200">
<div
class="h-2 rounded-full bg-blue-600 transition-all"
style="width: {job.progress}%"
></div>
</div>
</div>
{/if}
</div>
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
{formatDate(job.createdAt)}
</td>
<td class="px-6 py-4 text-right text-sm font-medium whitespace-nowrap">
<div class="flex justify-end gap-2">
{#if pendingCancelJobId === job.id}
<button
type="button"
onclick={() => void runJobAction(job, 'cancel')}
disabled={isRowBusy(job.id)}
class="rounded bg-red-600 px-3 py-1 text-xs font-semibold text-white hover:bg-red-700 disabled:cursor-not-allowed disabled:opacity-50"
>
{rowActions[job.id] === 'cancel' ? 'Cancelling...' : 'Confirm cancel'}
</button>
<button
type="button"
onclick={() => requestCancel(job.id)}
disabled={isRowBusy(job.id)}
class="rounded border border-gray-300 px-3 py-1 text-xs font-semibold text-gray-700 hover:border-gray-400 hover:text-gray-900 disabled:cursor-not-allowed disabled:opacity-50"
>
Keep job
</button>
{:else}
{#if canPause(job.status)}
{#if job.totalFiles > 0}
<div class="text-xs text-gray-400">
{job.processedFiles}/{job.totalFiles} files processed
</div>
{/if}
</div>
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
{formatDate(job.createdAt)}
</td>
<td class="px-6 py-4 text-right text-sm font-medium whitespace-nowrap">
<div class="flex justify-end gap-2">
{#if pendingCancelJobId === job.id}
<button
type="button"
onclick={() => void runJobAction(job, 'pause')}
onclick={() => void runJobAction(job, 'cancel')}
disabled={isRowBusy(job.id)}
class="rounded bg-yellow-600 px-3 py-1 text-xs font-semibold text-white hover:bg-yellow-700 disabled:cursor-not-allowed disabled:opacity-50"
class="rounded bg-red-600 px-3 py-1 text-xs font-semibold text-white hover:bg-red-700 disabled:cursor-not-allowed disabled:opacity-50"
>
{rowActions[job.id] === 'pause' ? 'Pausing...' : 'Pause'}
{rowActions[job.id] === 'cancel' ? 'Cancelling...' : 'Confirm cancel'}
</button>
{/if}
{#if canResume(job.status)}
<button
type="button"
onclick={() => void runJobAction(job, 'resume')}
disabled={isRowBusy(job.id)}
class="rounded bg-green-600 px-3 py-1 text-xs font-semibold text-white hover:bg-green-700 disabled:cursor-not-allowed disabled:opacity-50"
>
{rowActions[job.id] === 'resume' ? 'Resuming...' : 'Resume'}
</button>
{/if}
{#if canCancel(job.status)}
<button
type="button"
onclick={() => requestCancel(job.id)}
disabled={isRowBusy(job.id)}
class="rounded bg-red-600 px-3 py-1 text-xs font-semibold text-white hover:bg-red-700 disabled:cursor-not-allowed disabled:opacity-50"
class="rounded border border-gray-300 px-3 py-1 text-xs font-semibold text-gray-700 hover:border-gray-400 hover:text-gray-900 disabled:cursor-not-allowed disabled:opacity-50"
>
Cancel
Keep job
</button>
{:else}
{#if canPause(job.status)}
<button
type="button"
onclick={() => void runJobAction(job, 'pause')}
disabled={isRowBusy(job.id)}
class="rounded bg-yellow-600 px-3 py-1 text-xs font-semibold text-white hover:bg-yellow-700 disabled:cursor-not-allowed disabled:opacity-50"
>
{rowActions[job.id] === 'pause' ? 'Pausing...' : 'Pause'}
</button>
{/if}
{#if canResume(job.status)}
<button
type="button"
onclick={() => void runJobAction(job, 'resume')}
disabled={isRowBusy(job.id)}
class="rounded bg-green-600 px-3 py-1 text-xs font-semibold text-white hover:bg-green-700 disabled:cursor-not-allowed disabled:opacity-50"
>
{rowActions[job.id] === 'resume' ? 'Resuming...' : 'Resume'}
</button>
{/if}
{#if canCancel(job.status)}
<button
type="button"
onclick={() => requestCancel(job.id)}
disabled={isRowBusy(job.id)}
class="rounded bg-red-600 px-3 py-1 text-xs font-semibold text-white hover:bg-red-700 disabled:cursor-not-allowed disabled:opacity-50"
>
Cancel
</button>
{/if}
{#if !canPause(job.status) && !canResume(job.status) && !canCancel(job.status)}
<span class="text-xs text-gray-400"></span>
{/if}
{/if}
{#if !canPause(job.status) && !canResume(job.status) && !canCancel(job.status)}
<span class="text-xs text-gray-400"></span>
{/if}
{/if}
</div>
</td>
</tr>
</div>
</td>
</tr>
{/each}
{/if}
</tbody>
@@ -553,4 +567,4 @@
{/if}
</div>
<Toast bind:toasts={toasts} />
<Toast bind:toasts />

View File

@@ -36,9 +36,10 @@ function getServices(db: ReturnType<typeof getClient>) {
// Load the active embedding profile from the database
const profileRow = db
.prepare<[], EmbeddingProfileEntityProps>(
'SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1'
)
.prepare<
[],
EmbeddingProfileEntityProps
>('SELECT * FROM embedding_profiles WHERE is_default = 1 AND enabled = 1 LIMIT 1')
.get();
const profile = profileRow
@@ -227,10 +228,7 @@ export const GET: RequestHandler = async ({ url }) => {
// Fall back to commit hash prefix match (min 7 chars).
if (!resolvedVersion && parsed.version.length >= 7) {
resolvedVersion = db
.prepare<
[string, string],
RawVersionRow
>(
.prepare<[string, string], RawVersionRow>(
`SELECT id, tag FROM repository_versions
WHERE repository_id = ? AND commit_hash LIKE ?`
)
@@ -261,14 +259,14 @@ export const GET: RequestHandler = async ({ url }) => {
const selectedResults = applyTokenBudget
? (() => {
const snippets = searchResults.map((r) => r.snippet);
const selected = selectSnippetsWithinBudget(snippets, maxTokens);
const snippets = searchResults.map((r) => r.snippet);
const selected = selectSnippetsWithinBudget(snippets, maxTokens);
return selected.map((snippet) => {
const found = searchResults.find((r) => r.snippet.id === snippet.id)!;
return found;
});
})()
return selected.map((snippet) => {
const found = searchResults.find((r) => r.snippet.id === snippet.id)!;
return found;
});
})()
: searchResults;
const snippetVersionIds = Array.from(

View File

@@ -22,17 +22,23 @@ const VALID_JOB_STATUSES: ReadonlySet<IndexingJob['status']> = new Set([
'failed'
]);
function parseStatusFilter(searchValue: string | null): IndexingJob['status'] | Array<IndexingJob['status']> | undefined {
function parseStatusFilter(
searchValue: string | null
): IndexingJob['status'] | Array<IndexingJob['status']> | undefined {
if (!searchValue) {
return undefined;
}
const statuses = [...new Set(
searchValue
.split(',')
.map((value) => value.trim())
.filter((value): value is IndexingJob['status'] => VALID_JOB_STATUSES.has(value as IndexingJob['status']))
)];
const statuses = [
...new Set(
searchValue
.split(',')
.map((value) => value.trim())
.filter((value): value is IndexingJob['status'] =>
VALID_JOB_STATUSES.has(value as IndexingJob['status'])
)
)
];
if (statuses.length === 0) {
return undefined;

View File

@@ -51,7 +51,9 @@ export const GET: RequestHandler = ({ params, request }) => {
if (lastEventId) {
const lastEvent = broadcaster.getLastEvent(jobId);
if (lastEvent && lastEvent.id >= parseInt(lastEventId, 10)) {
controller.enqueue(`id: ${lastEvent.id}\nevent: ${lastEvent.event}\ndata: ${lastEvent.data}\n\n`);
controller.enqueue(
`id: ${lastEvent.id}\nevent: ${lastEvent.event}\ndata: ${lastEvent.data}\n\n`
);
}
}
@@ -80,10 +82,7 @@ export const GET: RequestHandler = ({ params, request }) => {
controller.enqueue(value);
// Check if the incoming event indicates job completion
if (
value.includes('event: job-done') ||
value.includes('event: job-failed')
) {
if (value.includes('event: job-done') || value.includes('event: job-failed')) {
controller.close();
break;
}
@@ -111,7 +110,7 @@ export const GET: RequestHandler = ({ params, request }) => {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
Connection: 'keep-alive',
'X-Accel-Buffering': 'no',
'Access-Control-Allow-Origin': '*'
}

View File

@@ -30,7 +30,7 @@ export const GET: RequestHandler = ({ url }) => {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
Connection: 'keep-alive',
'X-Accel-Buffering': 'no',
'Access-Control-Allow-Origin': '*'
}

View File

@@ -124,9 +124,11 @@ describe('POST /api/v1/libs/:id/index', () => {
versionService.add('/facebook/react', 'v18.3.0', 'React v18.3.0');
versionService.add('/facebook/react', 'v17.0.0', 'React v17.0.0');
const enqueue = vi.fn().mockImplementation(
(repositoryId: string, versionId?: string) => makeEnqueueJob(repositoryId, versionId)
);
const enqueue = vi
.fn()
.mockImplementation((repositoryId: string, versionId?: string) =>
makeEnqueueJob(repositoryId, versionId)
);
mockQueue = { enqueue };
const response = await postIndex({
@@ -158,9 +160,11 @@ describe('POST /api/v1/libs/:id/index', () => {
repoService.add({ source: 'github', sourceUrl: 'https://github.com/facebook/react' });
versionService.add('/facebook/react', 'v18.3.0', 'React v18.3.0');
const enqueue = vi.fn().mockImplementation(
(repositoryId: string, versionId?: string) => makeEnqueueJob(repositoryId, versionId)
);
const enqueue = vi
.fn()
.mockImplementation((repositoryId: string, versionId?: string) =>
makeEnqueueJob(repositoryId, versionId)
);
mockQueue = { enqueue };
const response = await postIndex({

View File

@@ -49,7 +49,10 @@ function createTestDb(): Database.Database {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
const migrationsFolder = join(import.meta.dirname, '../../../../../../../lib/server/db/migrations');
const migrationsFolder = join(
import.meta.dirname,
'../../../../../../../lib/server/db/migrations'
);
const ftsFile = join(import.meta.dirname, '../../../../../../../lib/server/db/fts.sql');
const migration0 = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');

View File

@@ -180,4 +180,4 @@ describe('embedding settings routes', () => {
}
});
});
});
});

View File

@@ -18,9 +18,10 @@ export const GET: RequestHandler = () => {
try {
const db = getClient();
const row = db
.prepare<[], { value: string }>(
"SELECT value FROM settings WHERE key = 'indexing.concurrency'"
)
.prepare<
[],
{ value: string }
>("SELECT value FROM settings WHERE key = 'indexing.concurrency'")
.get();
let concurrency = 2;
@@ -54,13 +55,13 @@ export const PUT: RequestHandler = async ({ request }) => {
// Validate and clamp concurrency
const maxConcurrency = Math.max(os.cpus().length - 1, 1);
const concurrency = Math.max(1, Math.min(parseInt(String(body.concurrency ?? 2), 10), maxConcurrency));
const concurrency = Math.max(
1,
Math.min(parseInt(String(body.concurrency ?? 2), 10), maxConcurrency)
);
if (isNaN(concurrency)) {
return json(
{ error: 'Concurrency must be a valid integer' },
{ status: 400 }
);
return json({ error: 'Concurrency must be a valid integer' }, { status: 400 });
}
const db = getClient();

View File

@@ -18,7 +18,8 @@ import type { ProgressBroadcaster as BroadcasterType } from '$lib/server/pipelin
let db: Database.Database;
// Closed over by the vi.mock factory below.
let mockBroadcaster: BroadcasterType | null = null;
let mockPool: { getStatus: () => object; setMaxConcurrency?: (value: number) => void } | null = null;
let mockPool: { getStatus: () => object; setMaxConcurrency?: (value: number) => void } | null =
null;
vi.mock('$lib/server/db/client', () => ({
getClient: () => db
@@ -39,7 +40,8 @@ vi.mock('$lib/server/pipeline/startup.js', () => ({
}));
vi.mock('$lib/server/pipeline/progress-broadcaster', async (importOriginal) => {
const original = await importOriginal<typeof import('$lib/server/pipeline/progress-broadcaster.js')>();
const original =
await importOriginal<typeof import('$lib/server/pipeline/progress-broadcaster.js')>();
return {
...original,
getBroadcaster: () => mockBroadcaster
@@ -47,7 +49,8 @@ vi.mock('$lib/server/pipeline/progress-broadcaster', async (importOriginal) => {
});
vi.mock('$lib/server/pipeline/progress-broadcaster.js', async (importOriginal) => {
const original = await importOriginal<typeof import('$lib/server/pipeline/progress-broadcaster.js')>();
const original =
await importOriginal<typeof import('$lib/server/pipeline/progress-broadcaster.js')>();
return {
...original,
getBroadcaster: () => mockBroadcaster
@@ -62,7 +65,10 @@ import { ProgressBroadcaster } from '$lib/server/pipeline/progress-broadcaster.j
import { GET as getJobsList } from './jobs/+server.js';
import { GET as getJobStream } from './jobs/[id]/stream/+server.js';
import { GET as getJobsStream } from './jobs/stream/+server.js';
import { GET as getIndexingSettings, PUT as putIndexingSettings } from './settings/indexing/+server.js';
import {
GET as getIndexingSettings,
PUT as putIndexingSettings
} from './settings/indexing/+server.js';
import { GET as getWorkers } from './workers/+server.js';
// ---------------------------------------------------------------------------
@@ -84,7 +90,10 @@ function createTestDb(): Database.Database {
'0005_fix_stage_defaults.sql'
]) {
const sql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
for (const stmt of sql.split('--> statement-breakpoint').map((s) => s.trim()).filter(Boolean)) {
for (const stmt of sql
.split('--> statement-breakpoint')
.map((s) => s.trim())
.filter(Boolean)) {
client.exec(stmt);
}
}
@@ -201,9 +210,7 @@ describe('GET /api/v1/jobs/:id/stream', () => {
it('returns 404 when the job does not exist', async () => {
seedRepo(db);
const response = await getJobStream(
makeEvent({ params: { id: 'non-existent-job-id' } })
);
const response = await getJobStream(makeEvent({ params: { id: 'non-existent-job-id' } }));
expect(response.status).toBe(404);
});
@@ -363,7 +370,9 @@ describe('GET /api/v1/jobs/stream', () => {
const subscribeSpy = vi.spyOn(mockBroadcaster!, 'subscribeRepository');
await getJobsStream(
makeEvent<Parameters<typeof getJobsStream>[0]>({ url: 'http://localhost/api/v1/jobs/stream?repositoryId=/test/repo' })
makeEvent<Parameters<typeof getJobsStream>[0]>({
url: 'http://localhost/api/v1/jobs/stream?repositoryId=/test/repo'
})
);
expect(subscribeSpy).toHaveBeenCalledWith('/test/repo');
@@ -383,7 +392,9 @@ describe('GET /api/v1/jobs/stream', () => {
seedRepo(db, '/repo/alpha');
const response = await getJobsStream(
makeEvent<Parameters<typeof getJobsStream>[0]>({ url: 'http://localhost/api/v1/jobs/stream?repositoryId=/repo/alpha' })
makeEvent<Parameters<typeof getJobsStream>[0]>({
url: 'http://localhost/api/v1/jobs/stream?repositoryId=/repo/alpha'
})
);
// Broadcast an event for this repository
@@ -521,7 +532,9 @@ describe('GET /api/v1/settings/indexing', () => {
});
it('returns { concurrency: 2 } when no setting exists in DB', async () => {
const response = await getIndexingSettings(makeEvent<Parameters<typeof getIndexingSettings>[0]>({}));
const response = await getIndexingSettings(
makeEvent<Parameters<typeof getIndexingSettings>[0]>({})
);
const body = await response.json();
expect(response.status).toBe(200);
@@ -533,7 +546,9 @@ describe('GET /api/v1/settings/indexing', () => {
"INSERT INTO settings (key, value, updated_at) VALUES ('indexing.concurrency', ?, ?)"
).run(JSON.stringify(4), NOW_S);
const response = await getIndexingSettings(makeEvent<Parameters<typeof getIndexingSettings>[0]>({}));
const response = await getIndexingSettings(
makeEvent<Parameters<typeof getIndexingSettings>[0]>({})
);
const body = await response.json();
expect(body.concurrency).toBe(4);
@@ -544,7 +559,9 @@ describe('GET /api/v1/settings/indexing', () => {
"INSERT INTO settings (key, value, updated_at) VALUES ('indexing.concurrency', ?, ?)"
).run(JSON.stringify({ value: 5 }), NOW_S);
const response = await getIndexingSettings(makeEvent<Parameters<typeof getIndexingSettings>[0]>({}));
const response = await getIndexingSettings(
makeEvent<Parameters<typeof getIndexingSettings>[0]>({})
);
const body = await response.json();
expect(body.concurrency).toBe(5);
@@ -600,9 +617,10 @@ describe('PUT /api/v1/settings/indexing', () => {
await putIndexingSettings(makePutEvent({ concurrency: 3 }));
const row = db
.prepare<[], { value: string }>(
"SELECT value FROM settings WHERE key = 'indexing.concurrency'"
)
.prepare<
[],
{ value: string }
>("SELECT value FROM settings WHERE key = 'indexing.concurrency'")
.get();
expect(row).toBeDefined();
@@ -634,9 +652,7 @@ describe('PUT /api/v1/settings/indexing', () => {
// The actual flow: parseInt('abc') => NaN, Math.max(1, Math.min(NaN, max)) => NaN,
// then `if (isNaN(concurrency))` returns 400.
// We pass the raw string directly.
const response = await putIndexingSettings(
makePutEvent({ concurrency: 'not-a-number' })
);
const response = await putIndexingSettings(makePutEvent({ concurrency: 'not-a-number' }));
// parseInt('not-a-number') = NaN, so the handler should return 400
expect(response.status).toBe(400);

View File

@@ -13,4 +13,4 @@ export const GET: RequestHandler = () => {
} catch (error) {
return handleServiceError(error);
}
};
};

View File

@@ -39,8 +39,11 @@
indexedAt: string | null;
createdAt: string;
}
type VersionStateFilter = VersionDto['state'] | 'all';
let versions = $state<VersionDto[]>([]);
let versionsLoading = $state(false);
let activeVersionFilter = $state<VersionStateFilter>('all');
let bulkReprocessBusy = $state(false);
// Add version form
let addVersionTag = $state('');
@@ -49,7 +52,7 @@
// Discover tags state
let discoverBusy = $state(false);
let discoveredTags = $state<Array<{ tag: string; commitHash: string }>>([]);
let selectedDiscoveredTags = new SvelteSet<string>();
const selectedDiscoveredTags = new SvelteSet<string>();
let showDiscoverPanel = $state(false);
let registerBusy = $state(false);
@@ -76,6 +79,14 @@
error: 'Error'
};
const versionFilterOptions: Array<{ value: VersionStateFilter; label: string }> = [
{ value: 'all', label: 'All' },
{ value: 'pending', label: stateLabels.pending },
{ value: 'indexing', label: stateLabels.indexing },
{ value: 'indexed', label: stateLabels.indexed },
{ value: 'error', label: stateLabels.error }
];
const stageLabels: Record<string, string> = {
queued: 'Queued',
differential: 'Diff',
@@ -88,6 +99,20 @@
failed: 'Failed'
};
const filteredVersions = $derived(
activeVersionFilter === 'all'
? versions
: versions.filter((version) => version.state === activeVersionFilter)
);
const actionableErroredTags = $derived(
versions
.filter((version) => version.state === 'error' && !activeVersionJobs[version.tag])
.map((version) => version.tag)
);
const activeVersionFilterLabel = $derived(
versionFilterOptions.find((option) => option.value === activeVersionFilter)?.label ?? 'All'
);
async function refreshRepo() {
try {
const res = await fetch(`/api/v1/libs/${encodeURIComponent(repo.id)}`);
@@ -123,9 +148,7 @@
if (!repo.id) return;
let stopped = false;
const es = new EventSource(
`/api/v1/jobs/stream?repositoryId=${encodeURIComponent(repo.id)}`
);
const es = new EventSource(`/api/v1/jobs/stream?repositoryId=${encodeURIComponent(repo.id)}`);
es.addEventListener('job-progress', (event) => {
if (stopped) return;
@@ -277,23 +300,58 @@
async function handleIndexVersion(tag: string) {
errorMessage = null;
try {
const res = await fetch(
`/api/v1/libs/${encodeURIComponent(repo.id)}/versions/${encodeURIComponent(tag)}/index`,
{ method: 'POST' }
);
if (!res.ok) {
const d = await res.json();
throw new Error(d.error ?? 'Failed to queue version indexing');
}
const d = await res.json();
if (d.job?.id) {
activeVersionJobs = { ...activeVersionJobs, [tag]: d.job.id };
const jobId = await queueVersionIndex(tag);
if (jobId) {
activeVersionJobs = { ...activeVersionJobs, [tag]: jobId };
}
} catch (e) {
errorMessage = (e as Error).message;
}
}
async function queueVersionIndex(tag: string): Promise<string | null> {
const res = await fetch(
`/api/v1/libs/${encodeURIComponent(repo.id)}/versions/${encodeURIComponent(tag)}/index`,
{ method: 'POST' }
);
if (!res.ok) {
const d = await res.json();
throw new Error(d.error ?? 'Failed to queue version indexing');
}
const d = await res.json();
return d.job?.id ?? null;
}
async function handleBulkReprocessErroredVersions() {
if (actionableErroredTags.length === 0) return;
bulkReprocessBusy = true;
errorMessage = null;
successMessage = null;
try {
const tags = [...actionableErroredTags];
const BATCH_SIZE = 5;
let next = { ...activeVersionJobs };
for (let i = 0; i < tags.length; i += BATCH_SIZE) {
const batch = tags.slice(i, i + BATCH_SIZE);
const jobIds = await Promise.all(batch.map((versionTag) => queueVersionIndex(versionTag)));
for (let j = 0; j < batch.length; j++) {
if (jobIds[j]) {
next = { ...next, [batch[j]]: jobIds[j] ?? undefined };
}
}
activeVersionJobs = next;
}
successMessage = `Queued ${tags.length} errored tag${tags.length === 1 ? '' : 's'} for reprocessing.`;
await loadVersions();
} catch (e) {
errorMessage = (e as Error).message;
} finally {
bulkReprocessBusy = false;
}
}
async function handleRemoveVersion() {
if (!removeTag) return;
const tag = removeTag;
@@ -318,10 +376,9 @@
discoverBusy = true;
errorMessage = null;
try {
const res = await fetch(
`/api/v1/libs/${encodeURIComponent(repo.id)}/versions/discover`,
{ method: 'POST' }
);
const res = await fetch(`/api/v1/libs/${encodeURIComponent(repo.id)}/versions/discover`, {
method: 'POST'
});
if (!res.ok) {
const d = await res.json();
throw new Error(d.error ?? 'Failed to discover tags');
@@ -331,7 +388,10 @@
discoveredTags = (d.tags ?? []).filter(
(t: { tag: string; commitHash: string }) => !registeredTags.has(t.tag)
);
selectedDiscoveredTags = new SvelteSet(discoveredTags.map((t) => t.tag));
selectedDiscoveredTags.clear();
for (const discoveredTag of discoveredTags) {
selectedDiscoveredTags.add(discoveredTag.tag);
}
showDiscoverPanel = true;
} catch (e) {
errorMessage = (e as Error).message;
@@ -380,7 +440,7 @@
activeVersionJobs = next;
showDiscoverPanel = false;
discoveredTags = [];
selectedDiscoveredTags = new SvelteSet();
selectedDiscoveredTags.clear();
await loadVersions();
} catch (e) {
errorMessage = (e as Error).message;
@@ -498,41 +558,69 @@
<!-- Versions -->
<div class="mt-6 rounded-xl border border-gray-200 bg-white p-5">
<div class="mb-4 flex flex-wrap items-center justify-between gap-3">
<h2 class="text-sm font-semibold text-gray-700">Versions</h2>
<div class="flex flex-wrap items-center gap-2">
<!-- Add version inline form -->
<form
onsubmit={(e) => {
e.preventDefault();
handleAddVersion();
}}
class="flex items-center gap-1.5"
>
<input
type="text"
bind:value={addVersionTag}
placeholder="e.g. v2.0.0"
class="rounded-lg border border-gray-200 px-3 py-1.5 text-sm text-gray-900 placeholder-gray-400 focus:border-blue-400 focus:outline-none"
/>
<div class="mb-4 flex flex-col gap-3">
<div class="flex flex-wrap items-center justify-between gap-3">
<div class="flex flex-wrap items-center gap-3">
<h2 class="text-sm font-semibold text-gray-700">Versions</h2>
<div class="flex flex-wrap items-center gap-1 rounded-lg bg-gray-100 p-1">
{#each versionFilterOptions as option (option.value)}
<button
type="button"
onclick={() => (activeVersionFilter = option.value)}
class="rounded-md px-2.5 py-1 text-xs font-medium transition-colors {activeVersionFilter ===
option.value
? 'bg-white text-gray-900 shadow-sm'
: 'text-gray-500 hover:text-gray-700'}"
>
{option.label}
</button>
{/each}
</div>
</div>
<div class="flex flex-wrap items-center gap-2">
<button
type="submit"
disabled={addVersionBusy || !addVersionTag.trim()}
class="rounded-lg bg-blue-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-blue-700 disabled:cursor-not-allowed disabled:opacity-50"
type="button"
onclick={handleBulkReprocessErroredVersions}
disabled={bulkReprocessBusy || actionableErroredTags.length === 0}
class="rounded-lg border border-red-200 px-3 py-1.5 text-sm font-medium text-red-600 hover:bg-red-50 disabled:cursor-not-allowed disabled:opacity-50"
>
Add
{bulkReprocessBusy
? 'Reprocessing...'
: `Reprocess errored${actionableErroredTags.length > 0 ? ` (${actionableErroredTags.length})` : ''}`}
</button>
</form>
<!-- Discover tags button — local repos only -->
{#if repo.source === 'local'}
<button
onclick={handleDiscoverTags}
disabled={discoverBusy}
class="rounded-lg border border-gray-200 px-3 py-1.5 text-sm font-medium text-gray-700 hover:bg-gray-50 disabled:cursor-not-allowed disabled:opacity-50"
<!-- Add version inline form -->
<form
onsubmit={(e) => {
e.preventDefault();
handleAddVersion();
}}
class="flex items-center gap-1.5"
>
{discoverBusy ? 'Discovering...' : 'Discover tags'}
</button>
{/if}
<input
type="text"
bind:value={addVersionTag}
placeholder="e.g. v2.0.0"
class="rounded-lg border border-gray-200 px-3 py-1.5 text-sm text-gray-900 placeholder-gray-400 focus:border-blue-400 focus:outline-none"
/>
<button
type="submit"
disabled={addVersionBusy || !addVersionTag.trim()}
class="rounded-lg bg-blue-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-blue-700 disabled:cursor-not-allowed disabled:opacity-50"
>
Add
</button>
</form>
<!-- Discover tags button — local repos only -->
{#if repo.source === 'local'}
<button
onclick={handleDiscoverTags}
disabled={discoverBusy}
class="rounded-lg border border-gray-200 px-3 py-1.5 text-sm font-medium text-gray-700 hover:bg-gray-50 disabled:cursor-not-allowed disabled:opacity-50"
>
{discoverBusy ? 'Discovering...' : 'Discover tags'}
</button>
{/if}
</div>
</div>
</div>
@@ -549,7 +637,7 @@
onclick={() => {
showDiscoverPanel = false;
discoveredTags = [];
selectedDiscoveredTags = new SvelteSet();
selectedDiscoveredTags.clear();
}}
class="text-xs text-blue-600 hover:underline"
>
@@ -567,7 +655,9 @@
class="rounded border-gray-300"
/>
<span class="font-mono text-gray-800">{discovered.tag}</span>
<span class="font-mono text-xs text-gray-400">{discovered.commitHash.slice(0, 8)}</span>
<span class="font-mono text-xs text-gray-400"
>{discovered.commitHash.slice(0, 8)}</span
>
</label>
{/each}
</div>
@@ -576,9 +666,7 @@
disabled={registerBusy || selectedDiscoveredTags.size === 0}
class="rounded-lg bg-blue-600 px-3 py-1.5 text-sm font-medium text-white hover:bg-blue-700 disabled:cursor-not-allowed disabled:opacity-50"
>
{registerBusy
? 'Registering...'
: `Register ${selectedDiscoveredTags.size} selected`}
{registerBusy ? 'Registering...' : `Register ${selectedDiscoveredTags.size} selected`}
</button>
{/if}
</div>
@@ -589,9 +677,15 @@
<p class="text-sm text-gray-400">Loading versions...</p>
{:else if versions.length === 0}
<p class="text-sm text-gray-400">No versions registered. Add a tag above to get started.</p>
{:else if filteredVersions.length === 0}
<div class="rounded-lg border border-dashed border-gray-200 bg-gray-50 px-4 py-5">
<p class="text-sm text-gray-500">
No versions match the {activeVersionFilterLabel.toLowerCase()} filter.
</p>
</div>
{:else}
<div class="divide-y divide-gray-100">
{#each versions as version (version.id)}
{#each filteredVersions as version (version.id)}
<div class="py-2.5">
<div class="flex items-center justify-between">
<div class="flex items-center gap-3">
@@ -609,7 +703,9 @@
disabled={version.state === 'indexing' || !!activeVersionJobs[version.tag]}
class="rounded-lg border border-blue-200 px-3 py-1 text-xs font-medium text-blue-600 hover:bg-blue-50 disabled:cursor-not-allowed disabled:opacity-50"
>
{version.state === 'indexing' || !!activeVersionJobs[version.tag] ? 'Indexing...' : 'Index'}
{version.state === 'indexing' || !!activeVersionJobs[version.tag]
? 'Indexing...'
: 'Index'}
</button>
<button
onclick={() => (removeTag = version.tag)}
@@ -625,12 +721,8 @@
version.totalSnippets > 0
? { text: `${version.totalSnippets} snippets`, mono: false }
: null,
version.commitHash
? { text: version.commitHash.slice(0, 8), mono: true }
: null,
version.indexedAt
? { text: formatDate(version.indexedAt), mono: false }
: null
version.commitHash ? { text: version.commitHash.slice(0, 8), mono: true } : null,
version.indexedAt ? { text: formatDate(version.indexedAt), mono: false } : null
] as Array<{ text: string; mono: boolean } | null>
).filter((p): p is { text: string; mono: boolean } => p !== null)}
<div class="mt-1 flex items-center gap-1.5">
@@ -638,7 +730,8 @@
{#if i > 0}
<span class="text-xs text-gray-300">·</span>
{/if}
<span class="text-xs text-gray-400{part.mono ? ' font-mono' : ''}">{part.text}</span>
<span class="text-xs text-gray-400{part.mono ? ' font-mono' : ''}">{part.text}</span
>
{/each}
</div>
{/if}
@@ -646,10 +739,12 @@
{@const job = versionJobProgress[activeVersionJobs[version.tag]!]}
<div class="mt-2">
<div class="flex justify-between text-xs text-gray-500">
<span>
{#if job?.stageDetail}{job.stageDetail}{:else}{(job?.processedFiles ?? 0).toLocaleString()} / {(job?.totalFiles ?? 0).toLocaleString()} files{/if}
{#if job?.stage}{' - ' + (stageLabels[job.stage] ?? job.stage)}{/if}
</span>
<span>
{#if job?.stageDetail}{job.stageDetail}{:else}{(
job?.processedFiles ?? 0
).toLocaleString()} / {(job?.totalFiles ?? 0).toLocaleString()} files{/if}
{#if job?.stage}{' - ' + (stageLabels[job.stage] ?? job.stage)}{/if}
</span>
<span>{job?.progress ?? 0}%</span>
</div>
<div class="mt-1 h-1.5 w-full rounded-full bg-gray-200">

View File

@@ -39,4 +39,4 @@ describe('/repos/[id] page server load', () => {
recentJobs: [{ id: 'job-1', repositoryId: '/facebook/react' }]
});
});
});
});

View File

@@ -30,4 +30,4 @@ describe('SvelteKit route file conventions', () => {
expect(reservedTestFiles).toEqual([]);
});
});
});

View File

@@ -6,7 +6,7 @@ import { EmbeddingSettingsService } from '$lib/server/services/embedding-setting
export const load: PageServerLoad = async () => {
const db = getClient();
const service = new EmbeddingSettingsService(db);
const settings = EmbeddingSettingsDtoMapper.toDto(service.getSettings());
@@ -20,11 +20,9 @@ export const load: PageServerLoad = async () => {
// Read indexing concurrency setting
let indexingConcurrency = 2;
const concurrencyRow = db
.prepare<[], { value: string }>(
"SELECT value FROM settings WHERE key = 'indexing.concurrency'"
)
.prepare<[], { value: string }>("SELECT value FROM settings WHERE key = 'indexing.concurrency'")
.get();
if (concurrencyRow && concurrencyRow.value) {
try {
const parsed = JSON.parse(concurrencyRow.value);
@@ -43,4 +41,4 @@ export const load: PageServerLoad = async () => {
localProviderAvailable,
indexingConcurrency
};
};
};

View File

@@ -199,7 +199,9 @@
}
function getOpenAiProfile(settings: EmbeddingSettingsDto): EmbeddingProfileDto | null {
return settings.profiles.find((profile) => profile.providerKind === 'openai-compatible') ?? null;
return (
settings.profiles.find((profile) => profile.providerKind === 'openai-compatible') ?? null
);
}
function resolveProvider(profile: EmbeddingProfileDto | null): 'none' | 'openai' | 'local' {
@@ -210,27 +212,30 @@
}
function resolveBaseUrl(settings: EmbeddingSettingsDto): string {
const profile = settings.activeProfile?.providerKind === 'openai-compatible'
? settings.activeProfile
: getOpenAiProfile(settings);
const profile =
settings.activeProfile?.providerKind === 'openai-compatible'
? settings.activeProfile
: getOpenAiProfile(settings);
return typeof profile?.config.baseUrl === 'string'
? profile.config.baseUrl
: 'https://api.openai.com/v1';
}
function resolveModel(settings: EmbeddingSettingsDto): string {
const profile = settings.activeProfile?.providerKind === 'openai-compatible'
? settings.activeProfile
: getOpenAiProfile(settings);
const profile =
settings.activeProfile?.providerKind === 'openai-compatible'
? settings.activeProfile
: getOpenAiProfile(settings);
return typeof profile?.config.model === 'string'
? profile.config.model
: profile?.model ?? 'text-embedding-3-small';
: (profile?.model ?? 'text-embedding-3-small');
}
function resolveDimensions(settings: EmbeddingSettingsDto): number | undefined {
const profile = settings.activeProfile?.providerKind === 'openai-compatible'
? settings.activeProfile
: getOpenAiProfile(settings);
const profile =
settings.activeProfile?.providerKind === 'openai-compatible'
? settings.activeProfile
: getOpenAiProfile(settings);
return profile?.dimensions ?? 1536;
}
@@ -296,34 +301,38 @@
<dt class="font-medium text-gray-500">Provider</dt>
<dd class="font-semibold text-gray-900">{activeProfile.providerKind}</dd>
<dt class="font-medium text-gray-500">Model</dt>
<dd class="break-all font-semibold text-gray-900">{activeProfile.model}</dd>
<dd class="font-semibold break-all text-gray-900">{activeProfile.model}</dd>
<dt class="font-medium text-gray-500">Dimensions</dt>
<dd class="font-semibold text-gray-900">{activeProfile.dimensions}</dd>
</div>
<div class="grid grid-cols-[110px_1fr] gap-x-4 gap-y-2 pt-3">
<dt class="text-gray-500">Enabled</dt>
<dd class="font-medium text-gray-800">{activeProfile.enabled ? 'Yes' : 'No'}</dd>
<dt class="text-gray-500">Default</dt>
<dd class="font-medium text-gray-800">{activeProfile.isDefault ? 'Yes' : 'No'}</dd>
<dt class="text-gray-500">Updated</dt>
<dd class="font-medium text-gray-800">{formatTimestamp(activeProfile.updatedAt)}</dd>
<dt class="text-gray-500">Enabled</dt>
<dd class="font-medium text-gray-800">{activeProfile.enabled ? 'Yes' : 'No'}</dd>
<dt class="text-gray-500">Default</dt>
<dd class="font-medium text-gray-800">{activeProfile.isDefault ? 'Yes' : 'No'}</dd>
<dt class="text-gray-500">Updated</dt>
<dd class="font-medium text-gray-800">{formatTimestamp(activeProfile.updatedAt)}</dd>
</div>
</dl>
</div>
<div class="rounded-lg border border-gray-200 bg-gray-50 p-4">
<p class="text-sm font-medium text-gray-800">Provider configuration</p>
<p class="mb-3 mt-1 text-sm text-gray-500">
<p class="mt-1 mb-3 text-sm text-gray-500">
These are the provider-specific settings currently saved for the active profile.
</p>
{#if activeConfigEntries.length > 0}
<ul class="space-y-2 text-sm">
{#each activeConfigEntries as entry (entry.key)}
<li class="flex items-start justify-between gap-4 border-b border-gray-200 pb-2 last:border-b-0 last:pb-0">
<li
class="flex items-start justify-between gap-4 border-b border-gray-200 pb-2 last:border-b-0 last:pb-0"
>
<span class="font-medium text-gray-600">{entry.key}</span>
<span class={entry.redacted ? 'text-gray-500' : 'text-gray-800'}>{entry.value}</span>
<span class={entry.redacted ? 'text-gray-500' : 'text-gray-800'}
>{entry.value}</span
>
</li>
{/each}
</ul>
@@ -332,9 +341,9 @@
No provider-specific configuration is stored for this profile.
</p>
<p class="mt-2 text-sm text-gray-500">
For <span class="font-medium text-gray-700">OpenAI-compatible</span> profiles, edit the
settings in the <span class="font-medium text-gray-700">Embedding Provider</span> form
below. The built-in <span class="font-medium text-gray-700">Local Model</span> profile
For <span class="font-medium text-gray-700">OpenAI-compatible</span> profiles, edit
the settings in the <span class="font-medium text-gray-700">Embedding Provider</span>
form below. The built-in <span class="font-medium text-gray-700">Local Model</span> profile
does not currently expose extra configurable fields.
</p>
{/if}
@@ -342,14 +351,17 @@
</div>
{:else}
<div class="rounded-lg border border-amber-200 bg-amber-50 p-4 text-sm text-amber-800">
Embeddings are currently disabled. Keyword search remains available, but no embedding profile is active.
Embeddings are currently disabled. Keyword search remains available, but no embedding
profile is active.
</div>
{/if}
</div>
<div class="rounded-xl border border-gray-200 bg-white p-6">
<h2 class="mb-1 text-base font-semibold text-gray-900">Profile Inventory</h2>
<p class="mb-4 text-sm text-gray-500">Profiles stored in the database and available for activation.</p>
<p class="mb-4 text-sm text-gray-500">
Profiles stored in the database and available for activation.
</p>
<div class="grid grid-cols-2 gap-3">
<StatBadge label="Profiles" value={String(currentSettings.profiles.length)} />
<StatBadge label="Active" value={activeProfile ? '1' : '0'} />
@@ -363,7 +375,9 @@
<p class="text-gray-500">{profile.id}</p>
</div>
{#if profile.id === currentSettings.activeProfileId}
<span class="rounded-full bg-blue-50 px-2 py-0.5 text-xs font-medium text-blue-700">Active</span>
<span class="rounded-full bg-blue-50 px-2 py-0.5 text-xs font-medium text-blue-700"
>Active</span
>
{/if}
</div>
</div>
@@ -379,238 +393,234 @@
</p>
<form class="space-y-4" onsubmit={handleSubmit}>
<!-- Provider selector -->
<div class="mb-4 flex gap-2">
{#each ['none', 'openai', 'local'] as p (p)}
<button
type="button"
onclick={() => {
provider = p as 'none' | 'openai' | 'local';
testStatus = 'idle';
testError = null;
}}
class={[
'rounded-lg px-4 py-2 text-sm',
provider === p
? 'bg-blue-600 text-white'
: 'border border-gray-200 text-gray-700 hover:bg-gray-50'
].join(' ')}
>
{p === 'none'
? 'None (FTS5 only)'
: p === 'openai'
? 'OpenAI-compatible'
: 'Local Model'}
</button>
{/each}
<!-- Provider selector -->
<div class="mb-4 flex gap-2">
{#each ['none', 'openai', 'local'] as p (p)}
<button
type="button"
onclick={() => {
provider = p as 'none' | 'openai' | 'local';
testStatus = 'idle';
testError = null;
}}
class={[
'rounded-lg px-4 py-2 text-sm',
provider === p
? 'bg-blue-600 text-white'
: 'border border-gray-200 text-gray-700 hover:bg-gray-50'
].join(' ')}
>
{p === 'none' ? 'None (FTS5 only)' : p === 'openai' ? 'OpenAI-compatible' : 'Local Model'}
</button>
{/each}
</div>
<!-- None warning -->
{#if provider === 'none'}
<div class="rounded-lg border border-amber-200 bg-amber-50 p-3 text-sm text-amber-700">
Search will use keyword matching only. Results may be less relevant for complex questions.
</div>
{/if}
<!-- None warning -->
{#if provider === 'none'}
<div class="rounded-lg border border-amber-200 bg-amber-50 p-3 text-sm text-amber-700">
Search will use keyword matching only. Results may be less relevant for complex questions.
</div>
{/if}
<!-- OpenAI-compatible form -->
{#if provider === 'openai'}
<div class="space-y-3">
<!-- Preset buttons -->
<div class="flex flex-wrap gap-2">
{#each PROVIDER_PRESETS as preset (preset.name)}
<button
type="button"
onclick={() => applyPreset(preset)}
class="rounded border border-gray-200 px-2.5 py-1 text-xs text-gray-600 hover:bg-gray-50"
>
{preset.name}
</button>
{/each}
</div>
<label class="block" for="embedding-base-url">
<span class="text-sm font-medium text-gray-700">Base URL</span>
<input
id="embedding-base-url"
name="baseUrl"
type="text"
autocomplete="url"
bind:value={baseUrl}
class="mt-1 w-full rounded-lg border border-gray-300 px-3 py-2 text-sm focus:border-blue-500 focus:outline-none"
/>
</label>
<label class="block" for="embedding-api-key">
<span class="text-sm font-medium text-gray-700">API Key</span>
<input
id="embedding-api-key"
name="apiKey"
type="password"
autocomplete="off"
bind:value={apiKey}
placeholder="sk-…"
class="mt-1 w-full rounded-lg border border-gray-300 px-3 py-2 text-sm focus:border-blue-500 focus:outline-none"
/>
</label>
<label class="block" for="embedding-model">
<span class="text-sm font-medium text-gray-700">Model</span>
<input
id="embedding-model"
name="model"
type="text"
autocomplete="off"
bind:value={model}
class="mt-1 w-full rounded-lg border border-gray-300 px-3 py-2 text-sm focus:border-blue-500 focus:outline-none"
/>
</label>
<label class="block" for="embedding-dimensions">
<span class="text-sm font-medium text-gray-700">Dimensions (optional override)</span>
<input
id="embedding-dimensions"
name="dimensions"
type="number"
inputmode="numeric"
bind:value={dimensions}
class="mt-1 w-full rounded-lg border border-gray-300 px-3 py-2 text-sm focus:border-blue-500 focus:outline-none"
/>
</label>
<!-- Test connection row -->
<div class="flex items-center gap-3">
<!-- OpenAI-compatible form -->
{#if provider === 'openai'}
<div class="space-y-3">
<!-- Preset buttons -->
<div class="flex flex-wrap gap-2">
{#each PROVIDER_PRESETS as preset (preset.name)}
<button
type="button"
onclick={testConnection}
disabled={testStatus === 'testing'}
class="rounded-lg border border-gray-300 px-3 py-1.5 text-sm hover:bg-gray-50 disabled:opacity-50"
onclick={() => applyPreset(preset)}
class="rounded border border-gray-200 px-2.5 py-1 text-xs text-gray-600 hover:bg-gray-50"
>
{testStatus === 'testing' ? 'Testing…' : 'Test Connection'}
{preset.name}
</button>
{#if testStatus === 'ok'}
<span class="text-sm text-green-600">
Connection successful
{#if testDimensions}{testDimensions} dimensions{/if}
</span>
{:else if testStatus === 'error'}
<span class="text-sm text-red-600">
{testError}
</span>
{/if}
</div>
</div>
{/if}
<!-- Local model section -->
{#if provider === 'local'}
<div class="rounded-lg border border-gray-200 bg-gray-50 p-4 text-sm">
<p class="font-medium text-gray-800">Local ONNX model via @xenova/transformers</p>
<p class="mt-1 text-gray-500">Model: Xenova/all-MiniLM-L6-v2 · 384 dimensions</p>
{#if getInitialLocalProviderAvailability()}
<p class="mt-2 text-green-600">@xenova/transformers is installed and ready.</p>
{:else}
<p class="mt-2 text-amber-700">
@xenova/transformers is not installed. Run
<code class="rounded bg-amber-100 px-1 py-0.5 font-mono text-xs"
>npm install @xenova/transformers</code
>
to enable local embeddings.
</p>
{/if}
</div>
{/if}
<!-- Indexing section -->
<div class="space-y-3 rounded-lg border border-gray-200 bg-white p-4">
<div>
<label for="concurrency" class="block text-sm font-medium text-gray-700">
Concurrent Workers
</label>
<p class="mt-0.5 text-xs text-gray-500">
Number of parallel indexing workers. Range: 1 to 8.
</p>
{/each}
</div>
<div class="flex items-center gap-3">
<label class="block" for="embedding-base-url">
<span class="text-sm font-medium text-gray-700">Base URL</span>
<input
id="concurrency"
type="number"
min="1"
max="8"
inputmode="numeric"
bind:value={concurrencyInput}
disabled={concurrencySaving}
class="w-20 rounded-lg border border-gray-300 px-3 py-2 text-sm focus:border-blue-500 focus:outline-none disabled:opacity-50"
id="embedding-base-url"
name="baseUrl"
type="text"
autocomplete="url"
bind:value={baseUrl}
class="mt-1 w-full rounded-lg border border-gray-300 px-3 py-2 text-sm focus:border-blue-500 focus:outline-none"
/>
</label>
<label class="block" for="embedding-api-key">
<span class="text-sm font-medium text-gray-700">API Key</span>
<input
id="embedding-api-key"
name="apiKey"
type="password"
autocomplete="off"
bind:value={apiKey}
placeholder="sk-…"
class="mt-1 w-full rounded-lg border border-gray-300 px-3 py-2 text-sm focus:border-blue-500 focus:outline-none"
/>
</label>
<label class="block" for="embedding-model">
<span class="text-sm font-medium text-gray-700">Model</span>
<input
id="embedding-model"
name="model"
type="text"
autocomplete="off"
bind:value={model}
class="mt-1 w-full rounded-lg border border-gray-300 px-3 py-2 text-sm focus:border-blue-500 focus:outline-none"
/>
</label>
<label class="block" for="embedding-dimensions">
<span class="text-sm font-medium text-gray-700">Dimensions (optional override)</span>
<input
id="embedding-dimensions"
name="dimensions"
type="number"
inputmode="numeric"
bind:value={dimensions}
class="mt-1 w-full rounded-lg border border-gray-300 px-3 py-2 text-sm focus:border-blue-500 focus:outline-none"
/>
</label>
<!-- Test connection row -->
<div class="flex items-center gap-3">
<button
type="button"
onclick={saveConcurrency}
disabled={concurrencySaving}
class="rounded-lg bg-blue-600 px-3 py-2 text-sm text-white hover:bg-blue-700 disabled:opacity-50"
onclick={testConnection}
disabled={testStatus === 'testing'}
class="rounded-lg border border-gray-300 px-3 py-1.5 text-sm hover:bg-gray-50 disabled:opacity-50"
>
{concurrencySaving ? 'Saving…' : 'Save'}
{testStatus === 'testing' ? 'Testing…' : 'Test Connection'}
</button>
{#if concurrencySaveStatus === 'ok'}
<span class="text-sm text-green-600">✓ Saved</span>
{:else if concurrencySaveStatus === 'error'}
<span class="text-sm text-red-600">{concurrencySaveError}</span>
{#if testStatus === 'ok'}
<span class="text-sm text-green-600">
Connection successful
{#if testDimensions}{testDimensions} dimensions{/if}
</span>
{:else if testStatus === 'error'}
<span class="text-sm text-red-600">
{testError}
</span>
{/if}
</div>
</div>
{/if}
<!-- Save feedback banners -->
{#if saveStatus === 'ok'}
<div
class="mt-4 flex items-center gap-2 rounded-lg border border-green-200 bg-green-50 px-4 py-3 text-sm font-medium text-green-700"
>
<svg
xmlns="http://www.w3.org/2000/svg"
class="h-4 w-4 shrink-0"
viewBox="0 0 20 20"
fill="currentColor"
aria-hidden="true"
>
<path
fill-rule="evenodd"
d="M16.707 5.293a1 1 0 010 1.414l-8 8a1 1 0 01-1.414 0l-4-4a1 1 0 011.414-1.414L8 12.586l7.293-7.293a1 1 0 011.414 0z"
clip-rule="evenodd"
/>
</svg>
Settings saved successfully.
</div>
{:else if saveStatus === 'error'}
<div
class="mt-4 flex items-center gap-2 rounded-lg border border-red-200 bg-red-50 px-4 py-3 text-sm font-medium text-red-700"
>
<svg
xmlns="http://www.w3.org/2000/svg"
class="h-4 w-4 shrink-0"
viewBox="0 0 20 20"
fill="currentColor"
aria-hidden="true"
>
<path
fill-rule="evenodd"
d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7 4a1 1 0 11-2 0 1 1 0 012 0zm-1-9a1 1 0 00-1 1v4a1 1 0 102 0V6a1 1 0 00-1-1z"
clip-rule="evenodd"
/>
</svg>
{saveError}
</div>
{/if}
<!-- Save row -->
<div class="mt-4 flex items-center justify-end">
<button
type="submit"
disabled={saving}
class="rounded-lg bg-blue-600 px-4 py-2 text-sm text-white hover:bg-blue-700 disabled:opacity-50"
>
{saving ? 'Saving…' : 'Save Settings'}
</button>
<!-- Local model section -->
{#if provider === 'local'}
<div class="rounded-lg border border-gray-200 bg-gray-50 p-4 text-sm">
<p class="font-medium text-gray-800">Local ONNX model via @xenova/transformers</p>
<p class="mt-1 text-gray-500">Model: Xenova/all-MiniLM-L6-v2 · 384 dimensions</p>
{#if getInitialLocalProviderAvailability()}
<p class="mt-2 text-green-600">@xenova/transformers is installed and ready.</p>
{:else}
<p class="mt-2 text-amber-700">
@xenova/transformers is not installed. Run
<code class="rounded bg-amber-100 px-1 py-0.5 font-mono text-xs"
>npm install @xenova/transformers</code
>
to enable local embeddings.
</p>
{/if}
</div>
{/if}
<!-- Indexing section -->
<div class="space-y-3 rounded-lg border border-gray-200 bg-white p-4">
<div>
<label for="concurrency" class="block text-sm font-medium text-gray-700">
Concurrent Workers
</label>
<p class="mt-0.5 text-xs text-gray-500">
Number of parallel indexing workers. Range: 1 to 8.
</p>
</div>
<div class="flex items-center gap-3">
<input
id="concurrency"
type="number"
min="1"
max="8"
inputmode="numeric"
bind:value={concurrencyInput}
disabled={concurrencySaving}
class="w-20 rounded-lg border border-gray-300 px-3 py-2 text-sm focus:border-blue-500 focus:outline-none disabled:opacity-50"
/>
<button
type="button"
onclick={saveConcurrency}
disabled={concurrencySaving}
class="rounded-lg bg-blue-600 px-3 py-2 text-sm text-white hover:bg-blue-700 disabled:opacity-50"
>
{concurrencySaving ? 'Saving…' : 'Save'}
</button>
{#if concurrencySaveStatus === 'ok'}
<span class="text-sm text-green-600">✓ Saved</span>
{:else if concurrencySaveStatus === 'error'}
<span class="text-sm text-red-600">{concurrencySaveError}</span>
{/if}
</div>
</div>
<!-- Save feedback banners -->
{#if saveStatus === 'ok'}
<div
class="mt-4 flex items-center gap-2 rounded-lg border border-green-200 bg-green-50 px-4 py-3 text-sm font-medium text-green-700"
>
<svg
xmlns="http://www.w3.org/2000/svg"
class="h-4 w-4 shrink-0"
viewBox="0 0 20 20"
fill="currentColor"
aria-hidden="true"
>
<path
fill-rule="evenodd"
d="M16.707 5.293a1 1 0 010 1.414l-8 8a1 1 0 01-1.414 0l-4-4a1 1 0 011.414-1.414L8 12.586l7.293-7.293a1 1 0 011.414 0z"
clip-rule="evenodd"
/>
</svg>
Settings saved successfully.
</div>
{:else if saveStatus === 'error'}
<div
class="mt-4 flex items-center gap-2 rounded-lg border border-red-200 bg-red-50 px-4 py-3 text-sm font-medium text-red-700"
>
<svg
xmlns="http://www.w3.org/2000/svg"
class="h-4 w-4 shrink-0"
viewBox="0 0 20 20"
fill="currentColor"
aria-hidden="true"
>
<path
fill-rule="evenodd"
d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7 4a1 1 0 11-2 0 1 1 0 012 0zm-1-9a1 1 0 00-1 1v4a1 1 0 102 0V6a1 1 0 00-1-1z"
clip-rule="evenodd"
/>
</svg>
{saveError}
</div>
{/if}
<!-- Save row -->
<div class="mt-4 flex items-center justify-end">
<button
type="submit"
disabled={saving}
class="rounded-lg bg-blue-600 px-4 py-2 text-sm text-white hover:bg-blue-700 disabled:opacity-50"
>
{saving ? 'Saving…' : 'Save Settings'}
</button>
</div>
</form>
</div>

View File

@@ -100,4 +100,4 @@ describe('/settings page server load', () => {
])
);
});
});
});