feat(TRUEREF-0023): add sqlite-vec search pipeline

This commit is contained in:
Giancarmine Salucci
2026-04-01 14:09:19 +02:00
parent 0752636847
commit 9525c58e9a
45 changed files with 4009 additions and 614 deletions

View File

@@ -7,39 +7,33 @@
$effect(() => {
job = null;
let stopped = false;
let completeFired = false;
const es = new EventSource(`/api/v1/jobs/${jobId}/stream`);
async function poll() {
if (stopped) return;
try {
const res = await fetch(`/api/v1/jobs/${jobId}`);
if (res.ok) {
const data = await res.json();
job = data.job;
if (!completeFired && (job?.status === 'done' || job?.status === 'failed')) {
completeFired = true;
oncomplete?.();
}
}
} catch {
// ignore transient errors
}
}
es.addEventListener('job-progress', (event) => {
const data = JSON.parse(event.data);
job = { ...job, ...data } as IndexingJob;
});
void poll();
const interval = setInterval(() => {
if (job?.status === 'done' || job?.status === 'failed') {
clearInterval(interval);
return;
}
void poll();
}, 2000);
es.addEventListener('job-done', () => {
void fetch(`/api/v1/jobs/${jobId}`)
.then(r => r.json())
.then(d => { job = d.job; oncomplete?.(); });
es.close();
});
return () => {
stopped = true;
clearInterval(interval);
es.addEventListener('job-failed', (event) => {
const data = JSON.parse(event.data);
if (job) job = { ...job, status: 'failed', error: data.error ?? 'Unknown error' } as IndexingJob;
oncomplete?.();
es.close();
});
es.onerror = () => {
es.close();
void fetch(`/api/v1/jobs/${jobId}`).then(r => r.json()).then(d => { job = d.job; });
};
return () => es.close();
});
const progress = $derived(job?.progress ?? 0);

View File

@@ -0,0 +1,19 @@
<script lang="ts">
let { rows = 5 }: { rows?: number } = $props();
</script>
{#each Array(rows) as _, i (i)}
<tr>
<td class="px-6 py-4">
<div class="h-4 w-48 animate-pulse rounded bg-gray-200"></div>
<div class="mt-1 h-3 w-24 animate-pulse rounded bg-gray-100"></div>
</td>
<td class="px-6 py-4"><div class="h-5 w-16 animate-pulse rounded-full bg-gray-200"></div></td>
<td class="px-6 py-4"><div class="h-4 w-20 animate-pulse rounded bg-gray-200"></div></td>
<td class="px-6 py-4"><div class="h-2 w-32 animate-pulse rounded-full bg-gray-200"></div></td>
<td class="px-6 py-4"><div class="h-4 w-28 animate-pulse rounded bg-gray-200"></div></td>
<td class="px-6 py-4 text-right"
><div class="ml-auto h-7 w-20 animate-pulse rounded bg-gray-200"></div></td
>
</tr>
{/each}

View File

@@ -1,9 +1,10 @@
<script lang="ts">
interface Props {
status: 'queued' | 'running' | 'paused' | 'cancelled' | 'done' | 'failed';
spinning?: boolean;
}
let { status }: Props = $props();
let { status, spinning = false }: Props = $props();
const statusConfig: Record<typeof status, { bg: string; text: string; label: string }> = {
queued: { bg: 'bg-blue-100', text: 'text-blue-800', label: 'Queued' },
@@ -21,4 +22,9 @@
class="inline-flex items-center rounded-full px-2.5 py-0.5 text-xs font-medium {config.bg} {config.text}"
>
{config.label}
{#if spinning}
<span
class="ml-1 inline-block h-3 w-3 animate-spin rounded-full border-2 border-current border-r-transparent"
></span>
{/if}
</span>

View File

@@ -0,0 +1,77 @@
<script lang="ts">
import { onDestroy } from 'svelte';
export interface ToastItem {
id: string;
message: string;
type: 'success' | 'error' | 'info';
}
let { toasts = $bindable([]) }: { toasts: ToastItem[] } = $props();
const timers = new Map<string, ReturnType<typeof setTimeout>>();
$effect(() => {
for (const toast of toasts) {
if (timers.has(toast.id)) {
continue;
}
const timer = setTimeout(() => {
dismiss(toast.id);
}, 4000);
timers.set(toast.id, timer);
}
for (const [id, timer] of timers.entries()) {
if (toasts.some((toast) => toast.id === id)) {
continue;
}
clearTimeout(timer);
timers.delete(id);
}
});
onDestroy(() => {
for (const timer of timers.values()) {
clearTimeout(timer);
}
timers.clear();
});
function dismiss(id: string) {
const timer = timers.get(id);
if (timer) {
clearTimeout(timer);
timers.delete(id);
}
toasts = toasts.filter((toast: ToastItem) => toast.id !== id);
}
</script>
<div class="fixed right-4 bottom-4 z-50 flex flex-col gap-2">
{#each toasts as toast (toast.id)}
<div
role="status"
aria-live="polite"
class="flex items-center gap-3 rounded-lg px-4 py-3 shadow-lg {toast.type === 'error'
? 'bg-red-600 text-white'
: toast.type === 'info'
? 'bg-blue-600 text-white'
: 'bg-green-600 text-white'}"
>
<span class="text-sm">{toast.message}</span>
<button
type="button"
aria-label="Dismiss notification"
onclick={() => dismiss(toast.id)}
class="ml-2 text-xs opacity-70 hover:opacity-100"
>
x
</button
>
</div>
{/each}
</div>

View File

@@ -0,0 +1,81 @@
<script lang="ts">
interface WorkerStatus {
index: number;
state: 'idle' | 'running';
jobId: string | null;
repositoryId: string | null;
versionId: string | null;
}
interface WorkersResponse {
concurrency: number;
active: number;
idle: number;
workers: WorkerStatus[];
}
let status = $state<WorkersResponse>({ concurrency: 0, active: 0, idle: 0, workers: [] });
let pollInterval: ReturnType<typeof setInterval> | null = null;
async function fetchStatus() {
try {
const res = await fetch('/api/v1/workers');
if (res.ok) status = await res.json();
} catch {
/* ignore */
}
}
$effect(() => {
void fetchStatus();
const es = new EventSource('/api/v1/jobs/stream');
es.addEventListener('worker-status', (event) => {
try {
status = JSON.parse(event.data);
} catch {
/* ignore */
}
});
es.onerror = () => {
es.close();
if (!pollInterval) {
pollInterval = setInterval(() => void fetchStatus(), 5000);
}
};
return () => {
es.close();
if (pollInterval) {
clearInterval(pollInterval);
pollInterval = null;
}
};
});
</script>
{#if status.concurrency > 0}
<div class="mb-4 rounded-lg border border-gray-200 bg-white p-4 shadow-sm">
<div class="mb-2 flex items-center justify-between">
<h3 class="text-sm font-semibold text-gray-700">Workers</h3>
<span class="text-xs text-gray-500">{status.active} / {status.concurrency} active</span>
</div>
<div class="space-y-1">
{#each status.workers as worker (worker.index)}
<div class="flex items-center gap-2 text-xs">
<span
class="flex h-2 w-2 rounded-full {worker.state === 'running'
? 'animate-pulse bg-green-500'
: 'bg-gray-300'}"
></span>
<span class="text-gray-600">Worker {worker.index}</span>
{#if worker.state === 'running' && worker.repositoryId}
<span class="truncate text-gray-400"
>{worker.repositoryId}{worker.versionId ? ' / ' + worker.versionId : ''}</span
>
{:else}
<span class="text-gray-400">idle</span>
{/if}
</div>
{/each}
</div>
</div>
{/if}

View File

@@ -4,6 +4,7 @@
*/
import Database from 'better-sqlite3';
import { env } from '$env/dynamic/private';
import { loadSqliteVec } from './sqlite-vec';
let _client: Database.Database | null = null;
@@ -14,6 +15,12 @@ export function getClient(): Database.Database {
_client.pragma('journal_mode = WAL');
_client.pragma('foreign_keys = ON');
_client.pragma('busy_timeout = 5000');
_client.pragma('synchronous = NORMAL');
_client.pragma('cache_size = -65536');
_client.pragma('temp_store = MEMORY');
_client.pragma('mmap_size = 268435456');
_client.pragma('wal_autocheckpoint = 1000');
loadSqliteVec(_client);
}
return _client;
}

View File

@@ -5,6 +5,7 @@ import { readFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { join, dirname } from 'node:path';
import * as schema from './schema';
import { loadSqliteVec } from './sqlite-vec';
import { env } from '$env/dynamic/private';
if (!env.DATABASE_URL) throw new Error('DATABASE_URL is not set');
@@ -19,6 +20,12 @@ client.pragma('foreign_keys = ON');
// Prevents SQLITE_BUSY errors when the indexing pipeline holds the write lock
// and an HTTP request arrives simultaneously.
client.pragma('busy_timeout = 5000');
client.pragma('synchronous = NORMAL');
client.pragma('cache_size = -65536');
client.pragma('temp_store = MEMORY');
client.pragma('mmap_size = 268435456');
client.pragma('wal_autocheckpoint = 1000');
loadSqliteVec(client);
export const db = drizzle(client, { schema });

View File

@@ -0,0 +1,6 @@
CREATE INDEX `idx_embeddings_profile` ON `snippet_embeddings` (`profile_id`,`snippet_id`);--> statement-breakpoint
CREATE INDEX `idx_documents_repo_version` ON `documents` (`repository_id`,`version_id`);--> statement-breakpoint
CREATE INDEX `idx_jobs_repo_status` ON `indexing_jobs` (`repository_id`,`status`);--> statement-breakpoint
CREATE INDEX `idx_repositories_state` ON `repositories` (`state`);--> statement-breakpoint
CREATE INDEX `idx_snippets_repo_version` ON `snippets` (`repository_id`,`version_id`);--> statement-breakpoint
CREATE INDEX `idx_snippets_repo_type` ON `snippets` (`repository_id`,`type`);

View File

@@ -0,0 +1,948 @@
{
"version": "6",
"dialect": "sqlite",
"id": "b8998bda-f89b-41bc-b923-3f676d153c79",
"prevId": "c326dcbe-1771-4a90-a566-0ebd1eca47ec",
"tables": {
"documents": {
"name": "documents",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"file_path": {
"name": "file_path",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"language": {
"name": "language",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"token_count": {
"name": "token_count",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"checksum": {
"name": "checksum",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"indexed_at": {
"name": "indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"idx_documents_repo_version": {
"name": "idx_documents_repo_version",
"columns": [
"repository_id",
"version_id"
],
"isUnique": false
}
},
"foreignKeys": {
"documents_repository_id_repositories_id_fk": {
"name": "documents_repository_id_repositories_id_fk",
"tableFrom": "documents",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"documents_version_id_repository_versions_id_fk": {
"name": "documents_version_id_repository_versions_id_fk",
"tableFrom": "documents",
"tableTo": "repository_versions",
"columnsFrom": [
"version_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"embedding_profiles": {
"name": "embedding_profiles",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"provider_kind": {
"name": "provider_kind",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"enabled": {
"name": "enabled",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": true
},
"is_default": {
"name": "is_default",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": false
},
"model": {
"name": "model",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"dimensions": {
"name": "dimensions",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"config": {
"name": "config",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"indexing_jobs": {
"name": "indexing_jobs",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"status": {
"name": "status",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'queued'"
},
"progress": {
"name": "progress",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"total_files": {
"name": "total_files",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"processed_files": {
"name": "processed_files",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"stage": {
"name": "stage",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'queued'"
},
"stage_detail": {
"name": "stage_detail",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"error": {
"name": "error",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"started_at": {
"name": "started_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"completed_at": {
"name": "completed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"idx_jobs_repo_status": {
"name": "idx_jobs_repo_status",
"columns": [
"repository_id",
"status"
],
"isUnique": false
}
},
"foreignKeys": {
"indexing_jobs_repository_id_repositories_id_fk": {
"name": "indexing_jobs_repository_id_repositories_id_fk",
"tableFrom": "indexing_jobs",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repositories": {
"name": "repositories",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"source": {
"name": "source",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"source_url": {
"name": "source_url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"branch": {
"name": "branch",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": "'main'"
},
"state": {
"name": "state",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'pending'"
},
"total_snippets": {
"name": "total_snippets",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"total_tokens": {
"name": "total_tokens",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"trust_score": {
"name": "trust_score",
"type": "real",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"benchmark_score": {
"name": "benchmark_score",
"type": "real",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"stars": {
"name": "stars",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"github_token": {
"name": "github_token",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"last_indexed_at": {
"name": "last_indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"idx_repositories_state": {
"name": "idx_repositories_state",
"columns": [
"state"
],
"isUnique": false
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repository_configs": {
"name": "repository_configs",
"columns": {
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"project_title": {
"name": "project_title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"folders": {
"name": "folders",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"exclude_folders": {
"name": "exclude_folders",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"exclude_files": {
"name": "exclude_files",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"rules": {
"name": "rules",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"previous_versions": {
"name": "previous_versions",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"uniq_repo_config_base": {
"name": "uniq_repo_config_base",
"columns": [
"repository_id"
],
"isUnique": true,
"where": "\"repository_configs\".\"version_id\" IS NULL"
},
"uniq_repo_config_version": {
"name": "uniq_repo_config_version",
"columns": [
"repository_id",
"version_id"
],
"isUnique": true,
"where": "\"repository_configs\".\"version_id\" IS NOT NULL"
}
},
"foreignKeys": {
"repository_configs_repository_id_repositories_id_fk": {
"name": "repository_configs_repository_id_repositories_id_fk",
"tableFrom": "repository_configs",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repository_versions": {
"name": "repository_versions",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"tag": {
"name": "tag",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"commit_hash": {
"name": "commit_hash",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"state": {
"name": "state",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'pending'"
},
"total_snippets": {
"name": "total_snippets",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"indexed_at": {
"name": "indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"repository_versions_repository_id_repositories_id_fk": {
"name": "repository_versions_repository_id_repositories_id_fk",
"tableFrom": "repository_versions",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"settings": {
"name": "settings",
"columns": {
"key": {
"name": "key",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"value": {
"name": "value",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"snippet_embeddings": {
"name": "snippet_embeddings",
"columns": {
"snippet_id": {
"name": "snippet_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"profile_id": {
"name": "profile_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"model": {
"name": "model",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"dimensions": {
"name": "dimensions",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"embedding": {
"name": "embedding",
"type": "blob",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"idx_embeddings_profile": {
"name": "idx_embeddings_profile",
"columns": [
"profile_id",
"snippet_id"
],
"isUnique": false
}
},
"foreignKeys": {
"snippet_embeddings_snippet_id_snippets_id_fk": {
"name": "snippet_embeddings_snippet_id_snippets_id_fk",
"tableFrom": "snippet_embeddings",
"tableTo": "snippets",
"columnsFrom": [
"snippet_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"snippet_embeddings_profile_id_embedding_profiles_id_fk": {
"name": "snippet_embeddings_profile_id_embedding_profiles_id_fk",
"tableFrom": "snippet_embeddings",
"tableTo": "embedding_profiles",
"columnsFrom": [
"profile_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {
"snippet_embeddings_snippet_id_profile_id_pk": {
"columns": [
"snippet_id",
"profile_id"
],
"name": "snippet_embeddings_snippet_id_profile_id_pk"
}
},
"uniqueConstraints": {},
"checkConstraints": {}
},
"snippets": {
"name": "snippets",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"document_id": {
"name": "document_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"type": {
"name": "type",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"content": {
"name": "content",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"language": {
"name": "language",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"breadcrumb": {
"name": "breadcrumb",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"token_count": {
"name": "token_count",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {
"idx_snippets_repo_version": {
"name": "idx_snippets_repo_version",
"columns": [
"repository_id",
"version_id"
],
"isUnique": false
},
"idx_snippets_repo_type": {
"name": "idx_snippets_repo_type",
"columns": [
"repository_id",
"type"
],
"isUnique": false
}
},
"foreignKeys": {
"snippets_document_id_documents_id_fk": {
"name": "snippets_document_id_documents_id_fk",
"tableFrom": "snippets",
"tableTo": "documents",
"columnsFrom": [
"document_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"snippets_repository_id_repositories_id_fk": {
"name": "snippets_repository_id_repositories_id_fk",
"tableFrom": "snippets",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"snippets_version_id_repository_versions_id_fk": {
"name": "snippets_version_id_repository_versions_id_fk",
"tableFrom": "snippets",
"tableTo": "repository_versions",
"columnsFrom": [
"version_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
}
},
"views": {},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
},
"internal": {
"indexes": {}
}
}

View File

@@ -43,6 +43,13 @@
"when": 1774890536284,
"tag": "0005_fix_stage_defaults",
"breakpoints": true
},
{
"idx": 6,
"version": "6",
"when": 1775038799913,
"tag": "0006_yielding_centennial",
"breakpoints": true
}
]
}

View File

@@ -6,6 +6,7 @@ import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { eq } from 'drizzle-orm';
import * as schema from './schema';
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from './sqlite-vec';
import {
repositories,
repositoryVersions,
@@ -24,6 +25,7 @@ import {
function createTestDb() {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
loadSqliteVec(client);
const db = drizzle(client, { schema });
@@ -266,10 +268,11 @@ describe('snippets table', () => {
describe('snippet_embeddings table', () => {
let db: ReturnType<typeof createTestDb>['db'];
let client: Database.Database;
let snippetId: string;
beforeEach(() => {
({ db } = createTestDb());
({ db, client } = createTestDb());
db.insert(repositories).values(makeRepo()).run();
const docId = crypto.randomUUID();
db.insert(documents)
@@ -344,6 +347,30 @@ describe('snippet_embeddings table', () => {
const result = db.select().from(snippetEmbeddings).all();
expect(result).toHaveLength(0);
});
it('keeps the relational schema free of vec_embedding and retains the profile index', () => {
const columns = client
.prepare("PRAGMA table_info('snippet_embeddings')")
.all() as Array<{ name: string }>;
expect(columns.map((column) => column.name)).not.toContain('vec_embedding');
const indexes = client
.prepare("PRAGMA index_list('snippet_embeddings')")
.all() as Array<{ name: string }>;
expect(indexes.map((index) => index.name)).toContain('idx_embeddings_profile');
});
it('loads sqlite-vec idempotently and derives deterministic per-profile table names', () => {
expect(() => loadSqliteVec(client)).not.toThrow();
const tableName = sqliteVecTableName('local-default');
const rowidTableName = sqliteVecRowidTableName('local-default');
expect(tableName).toMatch(/^snippet_embeddings_vec_local_default_[0-9a-f]{8}$/);
expect(rowidTableName).toMatch(/^snippet_embeddings_vec_rowids_local_default_[0-9a-f]{8}$/);
expect(sqliteVecTableName('local-default')).toBe(tableName);
expect(sqliteVecRowidTableName('local-default')).toBe(rowidTableName);
expect(sqliteVecTableName('local-default')).not.toBe(sqliteVecTableName('openai/custom'));
});
});
describe('indexing_jobs table', () => {

View File

@@ -1,6 +1,7 @@
import { sql } from 'drizzle-orm';
import {
blob,
index,
integer,
primaryKey,
real,
@@ -34,7 +35,7 @@ export const repositories = sqliteTable('repositories', {
lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull()
});
}, (t) => [index('idx_repositories_state').on(t.state)]);
// ---------------------------------------------------------------------------
// repository_versions
@@ -72,7 +73,7 @@ export const documents = sqliteTable('documents', {
tokenCount: integer('token_count').default(0),
checksum: text('checksum').notNull(), // SHA-256 of file content
indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull()
});
}, (t) => [index('idx_documents_repo_version').on(t.repositoryId, t.versionId)]);
// ---------------------------------------------------------------------------
// snippets
@@ -93,7 +94,10 @@ export const snippets = sqliteTable('snippets', {
breadcrumb: text('breadcrumb'), // e.g. "Installation > Getting Started"
tokenCount: integer('token_count').default(0),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
});
}, (t) => [
index('idx_snippets_repo_version').on(t.repositoryId, t.versionId),
index('idx_snippets_repo_type').on(t.repositoryId, t.type),
]);
// ---------------------------------------------------------------------------
// embedding_profiles
@@ -128,7 +132,10 @@ export const snippetEmbeddings = sqliteTable(
embedding: blob('embedding').notNull(), // Float32Array as binary blob
createdAt: integer('created_at').notNull()
},
(table) => [primaryKey({ columns: [table.snippetId, table.profileId] })]
(table) => [
primaryKey({ columns: [table.snippetId, table.profileId] }),
index('idx_embeddings_profile').on(table.profileId, table.snippetId),
]
);
// ---------------------------------------------------------------------------
@@ -154,7 +161,7 @@ export const indexingJobs = sqliteTable('indexing_jobs', {
startedAt: integer('started_at', { mode: 'timestamp' }),
completedAt: integer('completed_at', { mode: 'timestamp' }),
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
});
}, (t) => [index('idx_jobs_repo_status').on(t.repositoryId, t.status)]);
// ---------------------------------------------------------------------------
// repository_configs

View File

@@ -0,0 +1,49 @@
import type Database from 'better-sqlite3';
import * as sqliteVec from 'sqlite-vec';
const loadedConnections = new WeakSet<Database.Database>();
function stableHash(value: string): string {
let hash = 2166136261;
for (let index = 0; index < value.length; index += 1) {
hash ^= value.charCodeAt(index);
hash = Math.imul(hash, 16777619);
}
return (hash >>> 0).toString(16).padStart(8, '0');
}
function sanitizeIdentifierPart(value: string): string {
const sanitized = value
.toLowerCase()
.replace(/[^a-z0-9]+/g, '_')
.replace(/^_+|_+$/g, '');
return sanitized.length > 0 ? sanitized.slice(0, 32) : 'profile';
}
export function sqliteVecTableSuffix(profileId: string): string {
return `${sanitizeIdentifierPart(profileId)}_${stableHash(profileId)}`;
}
export function sqliteVecTableName(profileId: string): string {
return `snippet_embeddings_vec_${sqliteVecTableSuffix(profileId)}`;
}
export function sqliteVecRowidTableName(profileId: string): string {
return `snippet_embeddings_vec_rowids_${sqliteVecTableSuffix(profileId)}`;
}
export function quoteSqliteIdentifier(identifier: string): string {
return `"${identifier.replace(/"/g, '""')}"`;
}
export function loadSqliteVec(db: Database.Database): void {
if (loadedConnections.has(db)) {
return;
}
sqliteVec.load(db);
loadedConnections.add(db);
}

View File

@@ -0,0 +1,2 @@
-- Relational vec_embedding bootstrap removed in iteration 2.
-- Downstream sqlite-vec vec0 tables are created on demand in application code.

View File

@@ -12,6 +12,12 @@ import { migrate } from 'drizzle-orm/better-sqlite3/migrator';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import * as schema from '../db/schema.js';
import {
loadSqliteVec,
sqliteVecRowidTableName,
sqliteVecTableName
} from '../db/sqlite-vec.js';
import { SqliteVecStore } from '../search/sqlite-vec.store.js';
import { NoopEmbeddingProvider, EmbeddingError, type EmbeddingVector } from './provider.js';
import { OpenAIEmbeddingProvider } from './openai.provider.js';
@@ -31,6 +37,7 @@ import { createProviderFromProfile } from './registry.js';
function createTestDb() {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
loadSqliteVec(client);
const db = drizzle(client, { schema });
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
@@ -387,10 +394,19 @@ describe('EmbeddingService', () => {
embedding: Buffer;
profile_id: string;
};
expect((row as Record<string, unknown>).vec_embedding).toBeUndefined();
expect(row.model).toBe('test-model');
expect(row.dimensions).toBe(4);
expect(row.profile_id).toBe('local-default');
expect(row.embedding).toBeInstanceOf(Buffer);
const queryEmbedding = service.getEmbedding(snippetId, 'local-default');
const matches = new SqliteVecStore(client).queryNearestNeighbors(queryEmbedding!, {
repositoryId: '/test/embed-repo',
profileId: 'local-default',
limit: 5
});
expect(matches[0]?.snippetId).toBe(snippetId);
});
it('stores embeddings as retrievable Float32Array blobs', async () => {
@@ -436,6 +452,22 @@ describe('EmbeddingService', () => {
.prepare('SELECT profile_id FROM snippet_embeddings WHERE snippet_id = ?')
.get(snippetId) as { profile_id: string };
expect(row.profile_id).toBe('openai-custom');
const queryEmbedding = service.getEmbedding(snippetId, 'openai-custom');
const store = new SqliteVecStore(client);
const customMatches = store.queryNearestNeighbors(queryEmbedding!, {
repositoryId: '/test/embed-repo',
profileId: 'openai-custom',
limit: 5
});
const defaultMatches = store.queryNearestNeighbors(new Float32Array([1, 0, 0, 0]), {
repositoryId: '/test/embed-repo',
profileId: 'local-default',
limit: 5
});
expect(customMatches[0]?.snippetId).toBe(snippetId);
expect(defaultMatches).toHaveLength(0);
});
it('is idempotent — re-embedding replaces the existing row', async () => {
@@ -450,6 +482,17 @@ describe('EmbeddingService', () => {
.prepare('SELECT COUNT(*) as cnt FROM snippet_embeddings WHERE snippet_id = ?')
.get(snippetId) as { cnt: number };
expect(rows.cnt).toBe(1);
const vecTable = sqliteVecTableName('local-default');
const rowidTable = sqliteVecRowidTableName('local-default');
const vecRows = client.prepare(`SELECT COUNT(*) as cnt FROM "${vecTable}"`).get() as {
cnt: number;
};
const rowidRows = client.prepare(`SELECT COUNT(*) as cnt FROM "${rowidTable}"`).get() as {
cnt: number;
};
expect(vecRows.cnt).toBe(1);
expect(rowidRows.cnt).toBe(1);
});
it('calls onProgress after each batch', async () => {

View File

@@ -5,6 +5,7 @@
import type Database from 'better-sqlite3';
import type { EmbeddingProvider } from './provider.js';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
interface SnippetRow {
id: string;
@@ -17,11 +18,15 @@ const BATCH_SIZE = 50;
const TEXT_MAX_CHARS = 2048;
export class EmbeddingService {
private readonly sqliteVecStore: SqliteVecStore;
constructor(
private readonly db: Database.Database,
private readonly provider: EmbeddingProvider,
private readonly profileId: string = 'local-default'
) {}
) {
this.sqliteVecStore = new SqliteVecStore(db);
}
findSnippetIdsMissingEmbeddings(repositoryId: string, versionId: string | null): string[] {
if (versionId) {
@@ -104,13 +109,19 @@ export class EmbeddingService {
for (let j = 0; j < batchSnippets.length; j++) {
const snippet = batchSnippets[j];
const embedding = embeddings[j];
insert.run(
snippet.id,
this.profileId,
embedding.model,
embedding.dimensions,
Buffer.from(embedding.values.buffer)
Buffer.from(
embedding.values.buffer,
embedding.values.byteOffset,
embedding.values.byteLength
)
);
this.sqliteVecStore.upsertEmbedding(this.profileId, snippet.id, embedding.values);
}
});
insertMany();

View File

@@ -21,6 +21,11 @@ const db = new Database(dbPath);
db.pragma('journal_mode = WAL');
db.pragma('foreign_keys = ON');
db.pragma('busy_timeout = 5000');
db.pragma('synchronous = NORMAL');
db.pragma('cache_size = -65536');
db.pragma('temp_store = MEMORY');
db.pragma('mmap_size = 268435456');
db.pragma('wal_autocheckpoint = 1000');
// Load the embedding profile from DB
const rawProfile = db.prepare('SELECT * FROM embedding_profiles WHERE id = ?').get(embeddingProfileId);

View File

@@ -13,6 +13,9 @@ import { JobQueue } from './job-queue.js';
import { IndexingPipeline } from './indexing.pipeline.js';
import { recoverStaleJobs } from './startup.js';
import { EmbeddingService } from '$lib/server/embeddings/embedding.service.js';
import { loadSqliteVec } from '$lib/server/db/sqlite-vec.js';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
import { sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js';
import * as diffStrategy from './differential-strategy.js';
// ---------------------------------------------------------------------------
@@ -22,6 +25,7 @@ import * as diffStrategy from './differential-strategy.js';
function createTestDb(): Database.Database {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
loadSqliteVec(client);
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
for (const migrationFile of [
@@ -29,7 +33,9 @@ function createTestDb(): Database.Database {
'0001_quick_nighthawk.sql',
'0002_silky_stellaris.sql',
'0003_multiversion_config.sql',
'0004_complete_sentry.sql'
'0004_complete_sentry.sql',
'0005_fix_stage_defaults.sql',
'0006_yielding_centennial.sql'
]) {
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
@@ -539,6 +545,52 @@ describe('IndexingPipeline', () => {
expect(finalChecksum).toBe('sha-v2');
});
it('removes derived vec rows when changed documents are replaced', async () => {
const docId = crypto.randomUUID();
const snippetId = crypto.randomUUID();
const embedding = Float32Array.from([1, 0, 0]);
const vecStore = new SqliteVecStore(db);
db.prepare(
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
VALUES (?, '/test/repo', NULL, 'README.md', 'stale-doc', ?)`
).run(docId, now);
db.prepare(
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
VALUES (?, ?, '/test/repo', NULL, 'info', 'stale snippet', ?)`
).run(snippetId, docId, now);
db.prepare(
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
).run(snippetId, Buffer.from(embedding.buffer), now);
vecStore.upsertEmbedding('local-default', snippetId, embedding);
const pipeline = makePipeline({
files: [
{
path: 'README.md',
content: '# Updated\n\nFresh content.',
sha: 'sha-fresh',
language: 'markdown'
}
],
totalFiles: 1
});
const job = makeJob();
await pipeline.run(job as never);
const vecTable = sqliteVecTableName('local-default');
const rowidTable = sqliteVecRowidTableName('local-default');
const vecCount = db.prepare(`SELECT COUNT(*) as n FROM "${vecTable}"`).get() as { n: number };
const rowidCount = db.prepare(`SELECT COUNT(*) as n FROM "${rowidTable}"`).get() as {
n: number;
};
expect(vecCount.n).toBe(0);
expect(rowidCount.n).toBe(0);
});
it('updates job progress as files are processed', async () => {
const files = Array.from({ length: 5 }, (_, i) => ({
path: `file${i}.md`,
@@ -700,6 +752,60 @@ describe('IndexingPipeline', () => {
expect(version.indexed_at).not.toBeNull();
});
it('clones ancestor embeddings into the derived vec store for differential indexing', async () => {
const ancestorVersionId = insertVersion(db, { tag: 'v1.0.0', state: 'indexed' });
const targetVersionId = insertVersion(db, { tag: 'v1.1.0', state: 'pending' });
const vecStore = new SqliteVecStore(db);
const docId = crypto.randomUUID();
const snippetId = crypto.randomUUID();
const embedding = Float32Array.from([0.2, 0.4, 0.6]);
db.prepare(
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
VALUES (?, '/test/repo', ?, 'README.md', 'ancestor-doc', ?)`
).run(docId, ancestorVersionId, now);
db.prepare(
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
VALUES (?, ?, '/test/repo', ?, 'info', 'ancestor snippet', ?)`
).run(snippetId, docId, ancestorVersionId, now);
db.prepare(
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
).run(snippetId, Buffer.from(embedding.buffer), now);
vecStore.upsertEmbedding('local-default', snippetId, embedding);
vi.spyOn(diffStrategy, 'buildDifferentialPlan').mockResolvedValue({
ancestorTag: 'v1.0.0',
ancestorVersionId,
changedPaths: new Set<string>(),
unchangedPaths: new Set<string>(['README.md'])
});
const pipeline = makePipeline({ files: [], totalFiles: 0 });
const job = makeJob('/test/repo', targetVersionId);
await pipeline.run(job as never);
const targetRows = db
.prepare(
`SELECT se.snippet_id, se.embedding
FROM snippet_embeddings se
INNER JOIN snippets s ON s.id = se.snippet_id
WHERE s.version_id = ?`
)
.all(targetVersionId) as Array<{ snippet_id: string; embedding: Buffer }>;
expect(targetRows).toHaveLength(1);
const matches = vecStore.queryNearestNeighbors(embedding, {
repositoryId: '/test/repo',
versionId: targetVersionId,
profileId: 'local-default',
limit: 5
});
expect(matches[0]?.snippetId).toBe(targetRows[0].snippet_id);
});
it('updates repository_versions state to error when pipeline throws and job has versionId', async () => {
const versionId = insertVersion(db, { tag: 'v1.0.0', state: 'pending' });
const errorCrawl = vi.fn().mockRejectedValue(new Error('crawl failed'));

View File

@@ -22,6 +22,7 @@ import type { EmbeddingService } from '$lib/server/embeddings/embedding.service.
import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
import { IndexingJob } from '$lib/server/models/indexing-job.js';
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
import { resolveConfig, type ParsedConfig } from '$lib/server/config/config-parser.js';
import { parseFile } from '$lib/server/parser/index.js';
import { computeTrustScore } from '$lib/server/search/trust-score.js';
@@ -63,12 +64,16 @@ function sha256(content: string): string {
// ---------------------------------------------------------------------------
export class IndexingPipeline {
private readonly sqliteVecStore: SqliteVecStore;
constructor(
private readonly db: Database.Database,
private readonly githubCrawl: typeof GithubCrawlFn,
private readonly localCrawler: LocalCrawler,
private readonly embeddingService: EmbeddingService | null
) {}
) {
this.sqliteVecStore = new SqliteVecStore(db);
}
// -------------------------------------------------------------------------
// Public — run a job end to end
@@ -593,6 +598,12 @@ export class IndexingPipeline {
emb.embedding,
emb.created_at
);
this.sqliteVecStore.upsertEmbeddingBuffer(
emb.profile_id,
newSnippetId,
emb.embedding,
emb.dimensions
);
}
}
})();
@@ -623,6 +634,8 @@ export class IndexingPipeline {
);
this.db.transaction(() => {
this.sqliteVecStore.deleteEmbeddingsForDocumentIds(changedDocIds);
// Delete stale documents (cascade deletes their snippets via FK).
if (changedDocIds.length > 0) {
const placeholders = changedDocIds.map(() => '?').join(',');

View File

@@ -17,6 +17,54 @@ import type { WorkerPool } from './worker-pool.js';
const JOB_SELECT = `SELECT * FROM indexing_jobs`;
type JobStatusFilter = IndexingJob['status'] | Array<IndexingJob['status']>;
function escapeLikePattern(value: string): string {
return value.replaceAll('\\', '\\\\').replaceAll('%', '\\%').replaceAll('_', '\\_');
}
function isSpecificRepositoryId(repositoryId: string): boolean {
return repositoryId.split('/').filter(Boolean).length >= 2;
}
function normalizeStatuses(status?: JobStatusFilter): Array<IndexingJob['status']> {
if (!status) {
return [];
}
const statuses = Array.isArray(status) ? status : [status];
return [...new Set(statuses)];
}
function buildJobFilterQuery(options?: {
repositoryId?: string;
status?: JobStatusFilter;
}): { where: string; params: unknown[] } {
const conditions: string[] = [];
const params: unknown[] = [];
if (options?.repositoryId) {
if (isSpecificRepositoryId(options.repositoryId)) {
conditions.push('repository_id = ?');
params.push(options.repositoryId);
} else {
conditions.push(`(repository_id = ? OR repository_id LIKE ? ESCAPE '\\')`);
params.push(options.repositoryId, `${escapeLikePattern(options.repositoryId)}/%`);
}
}
const statuses = normalizeStatuses(options?.status);
if (statuses.length > 0) {
conditions.push(`status IN (${statuses.map(() => '?').join(', ')})`);
params.push(...statuses);
}
return {
where: conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '',
params
};
}
export class JobQueue {
private workerPool: WorkerPool | null = null;
@@ -144,23 +192,11 @@ export class JobQueue {
*/
listJobs(options?: {
repositoryId?: string;
status?: IndexingJob['status'];
status?: JobStatusFilter;
limit?: number;
}): IndexingJob[] {
const limit = Math.min(options?.limit ?? 20, 200);
const conditions: string[] = [];
const params: unknown[] = [];
if (options?.repositoryId) {
conditions.push('repository_id = ?');
params.push(options.repositoryId);
}
if (options?.status) {
conditions.push('status = ?');
params.push(options.status);
}
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
const { where, params } = buildJobFilterQuery(options);
const sql = `${JOB_SELECT} ${where} ORDER BY created_at DESC LIMIT ?`;
params.push(limit);
@@ -194,19 +230,7 @@ export class JobQueue {
* Count all jobs matching optional filters.
*/
countJobs(options?: { repositoryId?: string; status?: IndexingJob['status'] }): number {
const conditions: string[] = [];
const params: unknown[] = [];
if (options?.repositoryId) {
conditions.push('repository_id = ?');
params.push(options.repositoryId);
}
if (options?.status) {
conditions.push('status = ?');
params.push(options.status);
}
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND')}` : '';
const { where, params } = buildJobFilterQuery(options);
const sql = `SELECT COUNT(*) as n FROM indexing_jobs ${where}`;
const row = this.db.prepare<unknown[], { n: number }>(sql).get(...params);
return row?.n ?? 0;

View File

@@ -171,4 +171,25 @@ describe('ProgressBroadcaster', () => {
reader1.cancel();
reader2.cancel();
});
it('broadcastWorkerStatus sends worker-status events to global subscribers', async () => {
const broadcaster = new ProgressBroadcaster();
const stream = broadcaster.subscribeAll();
const reader = stream.getReader();
broadcaster.broadcastWorkerStatus({
concurrency: 2,
active: 1,
idle: 1,
workers: [{ index: 0, state: 'running', jobId: 'job-1', repositoryId: '/repo/1', versionId: null }]
});
const { value } = await reader.read();
const text = value as string;
expect(text).toContain('event: worker-status');
expect(text).toContain('"active":1');
reader.cancel();
});
});

View File

@@ -10,6 +10,7 @@ export class ProgressBroadcaster {
private allSubscribers = new Set<ReadableStreamDefaultController<string>>();
private lastEventCache = new Map<string, SSEEvent>();
private eventCounters = new Map<string, number>();
private globalEventCounter = 0;
subscribe(jobId: string): ReadableStream<string> {
return new ReadableStream({
@@ -135,6 +136,24 @@ export class ProgressBroadcaster {
}
}
broadcastWorkerStatus(data: object): void {
this.globalEventCounter += 1;
const event: SSEEvent = {
id: this.globalEventCounter,
event: 'worker-status',
data: JSON.stringify(data)
};
const sse = this.formatSSE(event);
for (const controller of this.allSubscribers) {
try {
controller.enqueue(sse);
} catch {
// Controller might be closed or errored
}
}
}
getLastEvent(jobId: string): SSEEvent | null {
return this.lastEventCache.get(jobId) ?? null;
}

View File

@@ -16,7 +16,6 @@ import { LocalCrawler } from '$lib/server/crawler/local.crawler.js';
import { IndexingPipeline } from './indexing.pipeline.js';
import { JobQueue } from './job-queue.js';
import { WorkerPool } from './worker-pool.js';
import type { ParseWorkerResponse } from './worker-types.js';
import { initBroadcaster } from './progress-broadcaster.js';
import type { ProgressBroadcaster } from './progress-broadcaster.js';
import path from 'node:path';
@@ -90,17 +89,28 @@ export function initializePipeline(
if (options?.dbPath) {
_broadcaster = initBroadcaster();
const getRepositoryIdForJob = (jobId: string): string => {
const row = db
.prepare<[string], { repository_id: string }>(
`SELECT repository_id FROM indexing_jobs WHERE id = ?`
)
.get(jobId);
return row?.repository_id ?? '';
};
// Resolve worker script paths relative to this file (build/workers/ directory)
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const workerScript = path.join(__dirname, '../../../build/workers/worker-entry.mjs');
const embedWorkerScript = path.join(__dirname, '../../../build/workers/embed-worker-entry.mjs');
const writeWorkerScript = path.join(__dirname, '../../../build/workers/write-worker-entry.mjs');
try {
_pool = new WorkerPool({
concurrency: options.concurrency ?? 2,
workerScript,
embedWorkerScript,
writeWorkerScript,
dbPath: options.dbPath,
onProgress: (jobId, msg) => {
// Update DB with progress
@@ -112,7 +122,10 @@ export function initializePipeline(
// Broadcast progress event
if (_broadcaster) {
_broadcaster.broadcast(jobId, '', 'progress', msg);
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-progress', {
...msg,
status: 'running'
});
}
},
onJobDone: (jobId: string) => {
@@ -123,7 +136,10 @@ export function initializePipeline(
// Broadcast done event
if (_broadcaster) {
_broadcaster.broadcast(jobId, '', 'job-done', { jobId });
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-done', {
jobId,
status: 'done'
});
}
},
onJobFailed: (jobId: string, error: string) => {
@@ -134,7 +150,11 @@ export function initializePipeline(
// Broadcast failed event
if (_broadcaster) {
_broadcaster.broadcast(jobId, '', 'job-failed', { jobId, error });
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-failed', {
jobId,
status: 'failed',
error
});
}
},
onEmbedDone: (jobId: string) => {
@@ -142,6 +162,9 @@ export function initializePipeline(
},
onEmbedFailed: (jobId: string, error: string) => {
console.error('[WorkerPool] Embedding failed for job:', jobId, error);
},
onWorkerStatus: (status) => {
_broadcaster?.broadcastWorkerStatus(status);
}
});

View File

@@ -13,6 +13,11 @@ const db = new Database(dbPath);
db.pragma('journal_mode = WAL');
db.pragma('foreign_keys = ON');
db.pragma('busy_timeout = 5000');
db.pragma('synchronous = NORMAL');
db.pragma('cache_size = -65536');
db.pragma('temp_store = MEMORY');
db.pragma('mmap_size = 268435456');
db.pragma('wal_autocheckpoint = 1000');
const pipeline = new IndexingPipeline(db, githubCrawl, new LocalCrawler(), null);
let currentJobId: string | null = null;

View File

@@ -1,11 +1,19 @@
import { Worker } from 'node:worker_threads';
import { existsSync } from 'node:fs';
import type { ParseWorkerRequest, ParseWorkerResponse, EmbedWorkerRequest, EmbedWorkerResponse, WorkerInitData } from './worker-types.js';
import type {
ParseWorkerRequest,
ParseWorkerResponse,
EmbedWorkerRequest,
EmbedWorkerResponse,
WorkerInitData,
WriteWorkerResponse
} from './worker-types.js';
export interface WorkerPoolOptions {
concurrency: number;
workerScript: string;
embedWorkerScript: string;
writeWorkerScript?: string;
dbPath: string;
embeddingProfileId?: string;
onProgress: (jobId: string, msg: Extract<ParseWorkerResponse, { type: 'progress' }>) => void;
@@ -13,6 +21,22 @@ export interface WorkerPoolOptions {
onJobFailed: (jobId: string, error: string) => void;
onEmbedDone: (jobId: string) => void;
onEmbedFailed: (jobId: string, error: string) => void;
onWorkerStatus?: (status: WorkerPoolStatus) => void;
}
export interface WorkerStatusEntry {
index: number;
state: 'idle' | 'running';
jobId: string | null;
repositoryId: string | null;
versionId: string | null;
}
export interface WorkerPoolStatus {
concurrency: number;
active: number;
idle: number;
workers: WorkerStatusEntry[];
}
interface QueuedJob {
@@ -24,6 +48,7 @@ interface QueuedJob {
interface RunningJob {
jobId: string;
repositoryId: string;
versionId?: string | null;
}
interface EmbedQueuedJob {
@@ -36,10 +61,12 @@ export class WorkerPool {
private workers: Worker[] = [];
private idleWorkers: Worker[] = [];
private embedWorker: Worker | null = null;
private writeWorker: Worker | null = null;
private embedReady = false;
private writeReady = false;
private jobQueue: QueuedJob[] = [];
private runningJobs = new Map<Worker, RunningJob>();
private runningRepoIds = new Set<string>();
private runningJobKeys = new Set<string>();
private embedQueue: EmbedQueuedJob[] = [];
private options: WorkerPoolOptions;
private fallbackMode = false;
@@ -66,6 +93,12 @@ export class WorkerPool {
if (options.embeddingProfileId && existsSync(options.embedWorkerScript)) {
this.embedWorker = this.spawnEmbedWorker();
}
if (options.writeWorkerScript && existsSync(options.writeWorkerScript)) {
this.writeWorker = this.spawnWriteWorker(options.writeWorkerScript);
}
this.emitStatusChanged();
}
private spawnParseWorker(): Worker {
@@ -94,6 +127,22 @@ export class WorkerPool {
return worker;
}
private spawnWriteWorker(writeWorkerScript: string): Worker {
const worker = new Worker(writeWorkerScript, {
workerData: {
dbPath: this.options.dbPath
} satisfies WorkerInitData
});
worker.on('message', (msg: WriteWorkerResponse) => this.onWriteWorkerMessage(msg));
worker.on('exit', () => {
this.writeReady = false;
this.writeWorker = null;
});
return worker;
}
public enqueue(jobId: string, repositoryId: string, versionId?: string | null): void {
if (this.shuttingDown) {
console.warn('WorkerPool is shutting down, ignoring enqueue request');
@@ -109,10 +158,18 @@ export class WorkerPool {
this.dispatch();
}
private static jobKey(repositoryId: string, versionId?: string | null): string {
return `${repositoryId}:${versionId ?? ''}`;
}
private dispatch(): void {
let statusChanged = false;
while (this.idleWorkers.length > 0 && this.jobQueue.length > 0) {
// Find first job whose repositoryId is not currently running
const jobIdx = this.jobQueue.findIndex((j) => !this.runningRepoIds.has(j.repositoryId));
// Find first job whose (repositoryId, versionId) compound key is not currently running
const jobIdx = this.jobQueue.findIndex(
(j) => !this.runningJobKeys.has(WorkerPool.jobKey(j.repositoryId, j.versionId))
);
if (jobIdx === -1) {
// No eligible job found (all repos have running jobs)
@@ -122,12 +179,17 @@ export class WorkerPool {
const job = this.jobQueue.splice(jobIdx, 1)[0];
const worker = this.idleWorkers.pop()!;
this.runningJobs.set(worker, { jobId: job.jobId, repositoryId: job.repositoryId });
this.runningRepoIds.add(job.repositoryId);
this.runningJobs.set(worker, { jobId: job.jobId, repositoryId: job.repositoryId, versionId: job.versionId });
this.runningJobKeys.add(WorkerPool.jobKey(job.repositoryId, job.versionId));
statusChanged = true;
const msg: ParseWorkerRequest = { type: 'run', jobId: job.jobId };
worker.postMessage(msg);
}
if (statusChanged) {
this.emitStatusChanged();
}
}
private onWorkerMessage(worker: Worker, msg: ParseWorkerResponse): void {
@@ -137,15 +199,20 @@ export class WorkerPool {
const runningJob = this.runningJobs.get(worker);
if (runningJob) {
this.runningJobs.delete(worker);
this.runningRepoIds.delete(runningJob.repositoryId);
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
}
this.idleWorkers.push(worker);
this.options.onJobDone(msg.jobId);
this.emitStatusChanged();
// If embedding configured, enqueue embed request
if (this.embedWorker && this.options.embeddingProfileId) {
const runningJobData = runningJob || { jobId: msg.jobId, repositoryId: '' };
this.enqueueEmbed(msg.jobId, runningJobData.repositoryId, null);
const runningJobData = runningJob || { jobId: msg.jobId, repositoryId: '', versionId: null };
this.enqueueEmbed(
msg.jobId,
runningJobData.repositoryId,
runningJobData.versionId ?? null
);
}
this.dispatch();
@@ -153,10 +220,11 @@ export class WorkerPool {
const runningJob = this.runningJobs.get(worker);
if (runningJob) {
this.runningJobs.delete(worker);
this.runningRepoIds.delete(runningJob.repositoryId);
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
}
this.idleWorkers.push(worker);
this.options.onJobFailed(msg.jobId, msg.error);
this.emitStatusChanged();
this.dispatch();
}
}
@@ -176,13 +244,15 @@ export class WorkerPool {
const runningJob = this.runningJobs.get(worker);
if (runningJob && code !== 0) {
this.runningJobs.delete(worker);
this.runningRepoIds.delete(runningJob.repositoryId);
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
this.options.onJobFailed(runningJob.jobId, `Worker crashed with code ${code}`);
} else if (runningJob) {
this.runningJobs.delete(worker);
this.runningRepoIds.delete(runningJob.repositoryId);
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
}
this.emitStatusChanged();
// Remove from workers array
const workerIdx = this.workers.indexOf(worker);
if (workerIdx !== -1) {
@@ -212,6 +282,17 @@ export class WorkerPool {
}
}
private onWriteWorkerMessage(msg: WriteWorkerResponse): void {
if (msg.type === 'ready') {
this.writeReady = true;
return;
}
if (msg.type === 'write_error') {
console.error('[WorkerPool] Write worker failed for job:', msg.jobId, msg.error);
}
}
private processEmbedQueue(): void {
if (!this.embedWorker || !this.embedReady) {
return;
@@ -250,6 +331,7 @@ export class WorkerPool {
}
public setMaxConcurrency(n: number): void {
this.options.concurrency = n;
const current = this.workers.length;
if (n > current) {
@@ -274,6 +356,8 @@ export class WorkerPool {
}
}
}
this.emitStatusChanged();
}
public async shutdown(): Promise<void> {
@@ -300,6 +384,14 @@ export class WorkerPool {
}
}
if (this.writeWorker) {
try {
this.writeWorker.postMessage({ type: 'shutdown' });
} catch {
// Worker might already be exited
}
}
// Wait for workers to exit with timeout
const timeout = 5000;
const startTime = Date.now();
@@ -329,9 +421,41 @@ export class WorkerPool {
}
}
if (this.writeWorker) {
try {
this.writeWorker.terminate();
} catch {
// Already terminated
}
}
this.workers = [];
this.idleWorkers = [];
this.embedWorker = null;
this.writeWorker = null;
this.emitStatusChanged();
}
public getStatus(): WorkerPoolStatus {
return {
concurrency: this.options.concurrency,
active: this.runningJobs.size,
idle: this.idleWorkers.length,
workers: this.workers.map((worker, index) => {
const runningJob = this.runningJobs.get(worker);
return {
index,
state: runningJob ? 'running' : 'idle',
jobId: runningJob?.jobId ?? null,
repositoryId: runningJob?.repositoryId ?? null,
versionId: runningJob?.versionId ?? null
};
})
};
}
private emitStatusChanged(): void {
this.options.onWorkerStatus?.(this.getStatus());
}
public get isFallbackMode(): boolean {

View File

@@ -19,7 +19,61 @@ export type EmbedWorkerResponse =
| { type: 'embed-done'; jobId: string }
| { type: 'embed-failed'; jobId: string; error: string };
export type WriteWorkerRequest = WriteRequest | { type: 'shutdown' };
export type WriteWorkerResponse =
| { type: 'ready' }
| WriteAck
| WriteError;
export interface WorkerInitData {
dbPath: string;
embeddingProfileId?: string;
}
// Write worker message types (Phase 6)
export interface SerializedDocument {
id: string;
repositoryId: string;
versionId: string | null;
filePath: string;
title: string | null;
language: string | null;
tokenCount: number;
checksum: string;
indexedAt: number;
}
export interface SerializedSnippet {
id: string;
documentId: string;
repositoryId: string;
versionId: string | null;
type: 'code' | 'info';
title: string | null;
content: string;
language: string | null;
breadcrumb: string | null;
tokenCount: number;
createdAt: number;
}
export type WriteRequest = {
type: 'write';
jobId: string;
documents: SerializedDocument[];
snippets: SerializedSnippet[];
};
export type WriteAck = {
type: 'write_ack';
jobId: string;
documentCount: number;
snippetCount: number;
};
export type WriteError = {
type: 'write_error';
jobId: string;
error: string;
};

View File

@@ -0,0 +1,93 @@
import { workerData, parentPort } from 'node:worker_threads';
import Database from 'better-sqlite3';
import type {
SerializedDocument,
SerializedSnippet,
WorkerInitData,
WriteWorkerRequest,
WriteWorkerResponse
} from './worker-types.js';
const { dbPath } = workerData as WorkerInitData;
const db = new Database(dbPath);
db.pragma('journal_mode = WAL');
db.pragma('foreign_keys = ON');
db.pragma('busy_timeout = 5000');
db.pragma('synchronous = NORMAL');
db.pragma('cache_size = -65536');
db.pragma('temp_store = MEMORY');
db.pragma('mmap_size = 268435456');
db.pragma('wal_autocheckpoint = 1000');
const insertDocument = db.prepare(
`INSERT OR REPLACE INTO documents
(id, repository_id, version_id, file_path, title, language, token_count, checksum, indexed_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
);
const insertSnippet = db.prepare(
`INSERT OR REPLACE INTO snippets
(id, document_id, repository_id, version_id, type, title, content, language, breadcrumb, token_count, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
);
const writeBatch = db.transaction((documents: SerializedDocument[], snippets: SerializedSnippet[]) => {
for (const document of documents) {
insertDocument.run(
document.id,
document.repositoryId,
document.versionId,
document.filePath,
document.title,
document.language,
document.tokenCount,
document.checksum,
document.indexedAt
);
}
for (const snippet of snippets) {
insertSnippet.run(
snippet.id,
snippet.documentId,
snippet.repositoryId,
snippet.versionId,
snippet.type,
snippet.title,
snippet.content,
snippet.language,
snippet.breadcrumb,
snippet.tokenCount,
snippet.createdAt
);
}
});
parentPort?.postMessage({ type: 'ready' } satisfies WriteWorkerResponse);
parentPort?.on('message', (msg: WriteWorkerRequest) => {
if (msg.type === 'shutdown') {
db.close();
process.exit(0);
}
if (msg.type !== 'write') {
return;
}
try {
writeBatch(msg.documents, msg.snippets);
parentPort?.postMessage({
type: 'write_ack',
jobId: msg.jobId,
documentCount: msg.documents.length,
snippetCount: msg.snippets.length
} satisfies WriteWorkerResponse);
} catch (error) {
parentPort?.postMessage({
type: 'write_error',
jobId: msg.jobId,
error: error instanceof Error ? error.message : String(error)
} satisfies WriteWorkerResponse);
}
});

View File

@@ -15,6 +15,8 @@ import { HybridSearchService } from './hybrid.search.service.js';
import { VectorSearch, cosineSimilarity } from './vector.search.js';
import { reciprocalRankFusion } from './rrf.js';
import type { EmbeddingProvider, EmbeddingVector } from '../embeddings/provider.js';
import { loadSqliteVec } from '../db/sqlite-vec.js';
import { SqliteVecStore } from './sqlite-vec.store.js';
// ---------------------------------------------------------------------------
// In-memory DB factory
@@ -23,6 +25,7 @@ import type { EmbeddingProvider, EmbeddingVector } from '../embeddings/provider.
function createTestDb(): Database.Database {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
loadSqliteVec(client);
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
@@ -30,7 +33,11 @@ function createTestDb(): Database.Database {
const migrations = [
'0000_large_master_chief.sql',
'0001_quick_nighthawk.sql',
'0002_silky_stellaris.sql'
'0002_silky_stellaris.sql',
'0003_multiversion_config.sql',
'0004_complete_sentry.sql',
'0005_fix_stage_defaults.sql',
'0006_yielding_centennial.sql'
];
for (const migrationFile of migrations) {
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
@@ -121,6 +128,7 @@ function seedEmbedding(
VALUES (?, ?, ?, ?, ?, ?)`
)
.run(snippetId, profileId, model, values.length, Buffer.from(f32.buffer), NOW_S);
new SqliteVecStore(client).upsertEmbedding(profileId, snippetId, f32);
}
// ---------------------------------------------------------------------------
@@ -368,6 +376,42 @@ describe('VectorSearch', () => {
const results = vs.vectorSearch(new Float32Array([-0.5, 0.5]), { repositoryId: repoId });
expect(results[0].score).toBeCloseTo(1.0, 4);
});
it('filters by profileId using per-profile vec tables', () => {
client
.prepare(
`INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
)
.run('secondary-profile', 'local-transformers', 'Secondary', 1, 0, 'test-model', 2, '{}', NOW_S, NOW_S);
const defaultSnippet = seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'default profile snippet'
});
const secondarySnippet = seedSnippet(client, {
repositoryId: repoId,
documentId: docId,
content: 'secondary profile snippet'
});
seedEmbedding(client, defaultSnippet, [1, 0], 'local-default');
seedEmbedding(client, secondarySnippet, [1, 0], 'secondary-profile');
const vs = new VectorSearch(client);
const defaultResults = vs.vectorSearch(new Float32Array([1, 0]), {
repositoryId: repoId,
profileId: 'local-default'
});
const secondaryResults = vs.vectorSearch(new Float32Array([1, 0]), {
repositoryId: repoId,
profileId: 'secondary-profile'
});
expect(defaultResults.map((result) => result.snippetId)).toEqual([defaultSnippet]);
expect(secondaryResults.map((result) => result.snippetId)).toEqual([secondarySnippet]);
});
});
// ===========================================================================

View File

@@ -148,7 +148,12 @@ export class HybridSearchService {
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
return {
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
results: this.fetchSnippetsByIds(
topIds,
options.repositoryId,
options.versionId,
options.type
),
searchModeUsed: 'semantic'
};
}
@@ -194,7 +199,12 @@ export class HybridSearchService {
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
return {
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
results: this.fetchSnippetsByIds(
topIds,
options.repositoryId,
options.versionId,
options.type
),
searchModeUsed: 'keyword_fallback'
};
}
@@ -220,7 +230,12 @@ export class HybridSearchService {
if (alpha === 1) {
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
return {
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
results: this.fetchSnippetsByIds(
topIds,
options.repositoryId,
options.versionId,
options.type
),
searchModeUsed: 'semantic'
};
}
@@ -234,7 +249,12 @@ export class HybridSearchService {
const topIds = fused.slice(0, limit).map((r) => r.id);
return {
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
results: this.fetchSnippetsByIds(
topIds,
options.repositoryId,
options.versionId,
options.type
),
searchModeUsed: 'hybrid'
};
}
@@ -253,13 +273,19 @@ export class HybridSearchService {
private fetchSnippetsByIds(
ids: string[],
repositoryId: string,
versionId?: string,
type?: 'code' | 'info'
): SnippetSearchResult[] {
if (ids.length === 0) return [];
const placeholders = ids.map(() => '?').join(', ');
const params: unknown[] = [...ids, repositoryId];
let versionClause = '';
let typeClause = '';
if (versionId !== undefined) {
versionClause = ' AND s.version_id = ?';
params.push(versionId);
}
if (type !== undefined) {
typeClause = ' AND s.type = ?';
params.push(type);
@@ -276,7 +302,7 @@ export class HybridSearchService {
FROM snippets s
JOIN repositories r ON r.id = s.repository_id
WHERE s.id IN (${placeholders})
AND s.repository_id = ?${typeClause}`
AND s.repository_id = ?${versionClause}${typeClause}`
)
.all(...params) as RawSnippetById[];

View File

@@ -0,0 +1,394 @@
import type Database from 'better-sqlite3';
import {
loadSqliteVec,
quoteSqliteIdentifier,
sqliteVecRowidTableName,
sqliteVecTableName
} from '$lib/server/db/sqlite-vec.js';
export interface SqliteVecQueryOptions {
repositoryId: string;
versionId?: string;
profileId?: string;
limit?: number;
}
export interface SqliteVecQueryResult {
snippetId: string;
score: number;
distance: number;
}
interface ProfileDimensionsRow {
dimensions: number;
}
interface StoredDimensionsRow {
count: number;
min_dimensions: number | null;
max_dimensions: number | null;
}
interface SnippetRowidRow {
rowid: number;
}
interface RawKnnRow {
snippet_id: string;
distance: number;
}
interface CanonicalEmbeddingRow {
snippet_id: string;
embedding: Buffer;
}
interface StoredEmbeddingRef {
profile_id: string;
snippet_id: string;
}
interface ProfileStoreTables {
vectorTableName: string;
rowidTableName: string;
quotedVectorTableName: string;
quotedRowidTableName: string;
dimensions: number;
}
function toEmbeddingBuffer(values: Float32Array): Buffer {
return Buffer.from(values.buffer, values.byteOffset, values.byteLength);
}
function distanceToScore(distance: number): number {
return 1 / (1 + distance);
}
export class SqliteVecStore {
constructor(private readonly db: Database.Database) {}
ensureProfileStore(profileId: string, preferredDimensions?: number): number {
const tables = this.getProfileStoreTables(profileId, preferredDimensions);
this.db.exec(`
CREATE TABLE IF NOT EXISTS ${tables.quotedRowidTableName} (
rowid INTEGER PRIMARY KEY,
snippet_id TEXT NOT NULL UNIQUE REFERENCES snippets(id) ON DELETE CASCADE
);
`);
this.db.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS ${tables.quotedVectorTableName}
USING vec0(embedding float[${tables.dimensions}]);
`);
return tables.dimensions;
}
upsertEmbedding(profileId: string, snippetId: string, embedding: Float32Array): void {
const tables = this.getProfileStoreTables(profileId, embedding.length);
this.ensureProfileStore(profileId, tables.dimensions);
const existingRow = this.db
.prepare<[string], SnippetRowidRow>(
`SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`
)
.get(snippetId);
const embeddingBuffer = toEmbeddingBuffer(embedding);
if (existingRow) {
this.db
.prepare<[Buffer, number]>(
`UPDATE ${tables.quotedVectorTableName} SET embedding = ? WHERE rowid = ?`
)
.run(embeddingBuffer, existingRow.rowid);
return;
}
const insertResult = this.db
.prepare<[Buffer]>(`INSERT INTO ${tables.quotedVectorTableName} (embedding) VALUES (?)`)
.run(embeddingBuffer);
this.db
.prepare<[number, string]>(
`INSERT INTO ${tables.quotedRowidTableName} (rowid, snippet_id) VALUES (?, ?)`
)
.run(Number(insertResult.lastInsertRowid), snippetId);
}
upsertEmbeddingBuffer(
profileId: string,
snippetId: string,
embedding: Buffer,
dimensions?: number
): void {
const vector = new Float32Array(
embedding.buffer,
embedding.byteOffset,
dimensions ?? Math.floor(embedding.byteLength / Float32Array.BYTES_PER_ELEMENT)
);
this.upsertEmbedding(profileId, snippetId, vector);
}
deleteEmbedding(profileId: string, snippetId: string): void {
const tables = this.getProfileStoreTables(profileId);
this.ensureProfileStore(profileId);
const existingRow = this.db
.prepare<[string], SnippetRowidRow>(
`SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`
)
.get(snippetId);
if (!existingRow) {
return;
}
this.db
.prepare<[number]>(`DELETE FROM ${tables.quotedVectorTableName} WHERE rowid = ?`)
.run(existingRow.rowid);
this.db
.prepare<[string]>(`DELETE FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`)
.run(snippetId);
}
deleteEmbeddingsForDocumentIds(documentIds: string[]): void {
if (documentIds.length === 0) {
return;
}
const placeholders = documentIds.map(() => '?').join(', ');
const rows = this.db
.prepare<unknown[], StoredEmbeddingRef>(
`SELECT DISTINCT se.profile_id, se.snippet_id
FROM snippet_embeddings se
INNER JOIN snippets s ON s.id = se.snippet_id
WHERE s.document_id IN (${placeholders})`
)
.all(...documentIds);
this.deleteEmbeddingRefs(rows);
}
deleteEmbeddingsForRepository(repositoryId: string): void {
const rows = this.db
.prepare<[string], StoredEmbeddingRef>(
`SELECT DISTINCT se.profile_id, se.snippet_id
FROM snippet_embeddings se
INNER JOIN snippets s ON s.id = se.snippet_id
WHERE s.repository_id = ?`
)
.all(repositoryId);
this.deleteEmbeddingRefs(rows);
}
deleteEmbeddingsForVersion(repositoryId: string, versionId: string): void {
const rows = this.db
.prepare<[string, string], StoredEmbeddingRef>(
`SELECT DISTINCT se.profile_id, se.snippet_id
FROM snippet_embeddings se
INNER JOIN snippets s ON s.id = se.snippet_id
WHERE s.repository_id = ? AND s.version_id = ?`
)
.all(repositoryId, versionId);
this.deleteEmbeddingRefs(rows);
}
queryNearestNeighbors(
queryEmbedding: Float32Array,
options: SqliteVecQueryOptions
): SqliteVecQueryResult[] {
const { repositoryId, versionId, profileId = 'local-default', limit = 50 } = options;
if (limit <= 0) {
return [];
}
const tables = this.getProfileStoreTables(profileId, queryEmbedding.length);
this.ensureProfileStore(profileId, tables.dimensions);
const totalRows = this.synchronizeProfileStore(profileId, tables);
if (totalRows === 0) {
return [];
}
let sql = `
SELECT rowids.snippet_id, vec.distance
FROM ${tables.quotedVectorTableName} vec
JOIN ${tables.quotedRowidTableName} rowids ON rowids.rowid = vec.rowid
JOIN snippets s ON s.id = rowids.snippet_id
WHERE vec.embedding MATCH ?
AND vec.k = ?
AND s.repository_id = ?
`;
const params: unknown[] = [toEmbeddingBuffer(queryEmbedding), totalRows, repositoryId];
if (versionId !== undefined) {
sql += ' AND s.version_id = ?';
params.push(versionId);
}
sql += ' ORDER BY vec.distance ASC LIMIT ?';
params.push(limit);
const rows = this.db.prepare<unknown[], RawKnnRow>(sql).all(...params);
return rows.map((row) => ({
snippetId: row.snippet_id,
score: distanceToScore(row.distance),
distance: row.distance
}));
}
private synchronizeProfileStore(profileId: string, tables: ProfileStoreTables): number {
this.db
.prepare<[string, number]>(
`DELETE FROM ${tables.quotedRowidTableName}
WHERE rowid IN (
SELECT rowids.rowid
FROM ${tables.quotedRowidTableName} rowids
LEFT JOIN snippet_embeddings se
ON se.snippet_id = rowids.snippet_id
AND se.profile_id = ?
AND se.dimensions = ?
LEFT JOIN ${tables.quotedVectorTableName} vec ON vec.rowid = rowids.rowid
WHERE se.snippet_id IS NULL OR vec.rowid IS NULL
)`
)
.run(profileId, tables.dimensions);
this.db
.prepare(
`DELETE FROM ${tables.quotedVectorTableName}
WHERE rowid NOT IN (SELECT rowid FROM ${tables.quotedRowidTableName})`
)
.run();
const missingRows = this.db
.prepare<[string, number], CanonicalEmbeddingRow>(
`SELECT se.snippet_id, se.embedding
FROM snippet_embeddings se
LEFT JOIN ${tables.quotedRowidTableName} rowids ON rowids.snippet_id = se.snippet_id
WHERE se.profile_id = ?
AND se.dimensions = ?
AND rowids.snippet_id IS NULL`
)
.all(profileId, tables.dimensions);
if (missingRows.length > 0) {
const backfill = this.db.transaction((rows: CanonicalEmbeddingRow[]) => {
for (const row of rows) {
this.upsertEmbedding(
profileId,
row.snippet_id,
new Float32Array(
row.embedding.buffer,
row.embedding.byteOffset,
tables.dimensions
)
);
}
});
backfill(missingRows);
}
return (
this.db
.prepare<[], { count: number }>(
`SELECT COUNT(*) AS count
FROM ${tables.quotedVectorTableName} vec
JOIN ${tables.quotedRowidTableName} rowids ON rowids.rowid = vec.rowid`
)
.get()?.count ?? 0
);
}
private deleteEmbeddingRefs(rows: StoredEmbeddingRef[]): void {
if (rows.length === 0) {
return;
}
const removeRows = this.db.transaction((refs: StoredEmbeddingRef[]) => {
for (const ref of refs) {
this.deleteEmbedding(ref.profile_id, ref.snippet_id);
}
});
removeRows(rows);
}
private getProfileStoreTables(
profileId: string,
preferredDimensions?: number
): ProfileStoreTables {
loadSqliteVec(this.db);
const dimensionsRow = this.db
.prepare<[string], ProfileDimensionsRow>(
'SELECT dimensions FROM embedding_profiles WHERE id = ?'
)
.get(profileId);
if (!dimensionsRow) {
throw new Error(`Embedding profile not found: ${profileId}`);
}
const storedDimensions = this.db
.prepare<[string], StoredDimensionsRow>(
`SELECT
COUNT(*) AS count,
MIN(dimensions) AS min_dimensions,
MAX(dimensions) AS max_dimensions
FROM snippet_embeddings
WHERE profile_id = ?`
)
.get(profileId);
const effectiveDimensions = this.resolveDimensions(
profileId,
dimensionsRow.dimensions,
storedDimensions,
preferredDimensions
);
const vectorTableName = sqliteVecTableName(profileId);
const rowidTableName = sqliteVecRowidTableName(profileId);
return {
vectorTableName,
rowidTableName,
quotedVectorTableName: quoteSqliteIdentifier(vectorTableName),
quotedRowidTableName: quoteSqliteIdentifier(rowidTableName),
dimensions: effectiveDimensions
};
}
private resolveDimensions(
profileId: string,
profileDimensions: number,
storedDimensions: StoredDimensionsRow | undefined,
preferredDimensions?: number
): number {
if (storedDimensions && storedDimensions.count > 0) {
if (storedDimensions.min_dimensions !== storedDimensions.max_dimensions) {
throw new Error(`Stored embedding dimensions are inconsistent for profile ${profileId}`);
}
const canonicalDimensions = storedDimensions.min_dimensions;
if (canonicalDimensions === null) {
throw new Error(`Stored embedding dimensions are missing for profile ${profileId}`);
}
if (
preferredDimensions !== undefined &&
preferredDimensions !== canonicalDimensions
) {
throw new Error(
`Embedding dimension mismatch for profile ${profileId}: expected ${canonicalDimensions}, received ${preferredDimensions}`
);
}
return canonicalDimensions;
}
return preferredDimensions ?? profileDimensions;
}
}

View File

@@ -1,16 +1,12 @@
/**
* Vector similarity search over stored snippet embeddings.
*
* SQLite does not natively support vector operations, so cosine similarity is
* computed in JavaScript after loading candidate embeddings from the
* snippet_embeddings table.
*
* Performance note: For repositories with > 50k snippets, pre-filtering by
* FTS5 candidates before computing cosine similarity is recommended. For v1,
* in-memory computation is acceptable.
* Uses sqlite-vec vector_top_k() for ANN search instead of in-memory cosine
* similarity computation over all embeddings.
*/
import type Database from 'better-sqlite3';
import { SqliteVecStore } from './sqlite-vec.store.js';
// ---------------------------------------------------------------------------
// Types
@@ -28,12 +24,6 @@ export interface VectorSearchOptions {
limit?: number;
}
/** Raw DB row from snippet_embeddings joined with snippets. */
interface RawEmbeddingRow {
snippet_id: string;
embedding: Buffer;
}
// ---------------------------------------------------------------------------
// Math helpers
// ---------------------------------------------------------------------------
@@ -69,46 +59,26 @@ export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
// ---------------------------------------------------------------------------
export class VectorSearch {
constructor(private readonly db: Database.Database) {}
private readonly sqliteVecStore: SqliteVecStore;
constructor(private readonly db: Database.Database) {
this.sqliteVecStore = new SqliteVecStore(db);
}
/**
* Search stored embeddings by cosine similarity to the query embedding.
*
* Uses in-memory cosine similarity computation. The vec_embedding column
* stores raw Float32 bytes for forward compatibility with vector-capable
* libSQL builds; scoring is performed in JS using the same bytes.
*
* @param queryEmbedding - The embedded representation of the search query.
* @param options - Search options including repositoryId, optional versionId, profileId, and limit.
* @returns Results sorted by descending cosine similarity score.
*/
vectorSearch(queryEmbedding: Float32Array, options: VectorSearchOptions): VectorSearchResult[] {
const { repositoryId, versionId, profileId = 'local-default', limit = 50 } = options;
let sql = `
SELECT se.snippet_id, se.embedding
FROM snippet_embeddings se
JOIN snippets s ON s.id = se.snippet_id
WHERE s.repository_id = ?
AND se.profile_id = ?
`;
const params: unknown[] = [repositoryId, profileId];
if (versionId) {
sql += ' AND s.version_id = ?';
params.push(versionId);
}
const rows = this.db.prepare<unknown[], RawEmbeddingRow>(sql).all(...params);
const scored: VectorSearchResult[] = rows.map((row) => {
const embedding = new Float32Array(
row.embedding.buffer,
row.embedding.byteOffset,
row.embedding.byteLength / 4
);
return {
snippetId: row.snippet_id,
score: cosineSimilarity(queryEmbedding, embedding)
};
});
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
return this.sqliteVecStore
.queryNearestNeighbors(queryEmbedding, options)
.map((result) => ({ snippetId: result.snippetId, score: result.score }));
}
}

View File

@@ -11,6 +11,8 @@ import Database from 'better-sqlite3';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { RepositoryService } from './repository.service';
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
import {
AlreadyExistsError,
InvalidInputError,
@@ -25,6 +27,7 @@ import {
function createTestDb(): Database.Database {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
loadSqliteVec(client);
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
@@ -33,7 +36,9 @@ function createTestDb(): Database.Database {
'0001_quick_nighthawk.sql',
'0002_silky_stellaris.sql',
'0003_multiversion_config.sql',
'0004_complete_sentry.sql'
'0004_complete_sentry.sql',
'0005_fix_stage_defaults.sql',
'0006_yielding_centennial.sql'
]) {
const statements = readFileSync(join(migrationsFolder, migration), 'utf-8')
.split('--> statement-breakpoint')
@@ -331,6 +336,41 @@ describe('RepositoryService.remove()', () => {
it('throws NotFoundError when the repository does not exist', () => {
expect(() => service.remove('/not/found')).toThrow(NotFoundError);
});
it('removes derived vec rows before the repository cascade deletes snippets', () => {
const docId = crypto.randomUUID();
const snippetId = crypto.randomUUID();
const embedding = Float32Array.from([1, 0, 0]);
const vecStore = new SqliteVecStore((service as unknown as { db: Database.Database }).db);
const db = (service as unknown as { db: Database.Database }).db;
const now = Math.floor(Date.now() / 1000);
db.prepare(
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
VALUES (?, '/facebook/react', NULL, 'README.md', 'repo-doc', ?)`
).run(docId, now);
db.prepare(
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
VALUES (?, ?, '/facebook/react', NULL, 'info', 'repo snippet', ?)`
).run(snippetId, docId, now);
db.prepare(
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
).run(snippetId, Buffer.from(embedding.buffer), now);
vecStore.upsertEmbedding('local-default', snippetId, embedding);
service.remove('/facebook/react');
const vecTable = sqliteVecTableName('local-default');
const rowidTable = sqliteVecRowidTableName('local-default');
const vecCount = db.prepare(`SELECT COUNT(*) as n FROM "${vecTable}"`).get() as { n: number };
const rowidCount = db.prepare(`SELECT COUNT(*) as n FROM "${rowidTable}"`).get() as {
n: number;
};
expect(vecCount.n).toBe(0);
expect(rowidCount.n).toBe(0);
});
});
// ---------------------------------------------------------------------------

View File

@@ -8,6 +8,7 @@ import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
import { IndexingJobMapper } from '$lib/server/mappers/indexing-job.mapper.js';
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
import { IndexingJob, IndexingJobEntity } from '$lib/server/models/indexing-job.js';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
import { resolveGitHubId, resolveLocalId } from '$lib/server/utils/id-resolver';
import {
AlreadyExistsError,
@@ -230,7 +231,11 @@ export class RepositoryService {
const existing = this.get(id);
if (!existing) throw new NotFoundError(`Repository ${id} not found`);
this.db.prepare(`DELETE FROM repositories WHERE id = ?`).run(id);
const sqliteVecStore = new SqliteVecStore(this.db);
this.db.transaction(() => {
sqliteVecStore.deleteEmbeddingsForRepository(id);
this.db.prepare(`DELETE FROM repositories WHERE id = ?`).run(id);
})();
}
/**

View File

@@ -10,6 +10,8 @@ import { describe, it, expect } from 'vitest';
import Database from 'better-sqlite3';
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
import { VersionService } from './version.service';
import { RepositoryService } from './repository.service';
import { AlreadyExistsError, NotFoundError } from '$lib/server/utils/validation';
@@ -21,31 +23,27 @@ import { AlreadyExistsError, NotFoundError } from '$lib/server/utils/validation'
function createTestDb(): Database.Database {
const client = new Database(':memory:');
client.pragma('foreign_keys = ON');
loadSqliteVec(client);
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
// Apply all migration files in order
const migration0 = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
const migration1 = readFileSync(join(migrationsFolder, '0001_quick_nighthawk.sql'), 'utf-8');
for (const migration of [
'0000_large_master_chief.sql',
'0001_quick_nighthawk.sql',
'0002_silky_stellaris.sql',
'0003_multiversion_config.sql',
'0004_complete_sentry.sql',
'0005_fix_stage_defaults.sql',
'0006_yielding_centennial.sql'
]) {
const statements = readFileSync(join(migrationsFolder, migration), 'utf-8')
.split('--> statement-breakpoint')
.map((statement) => statement.trim())
.filter(Boolean);
// Apply first migration
const statements0 = migration0
.split('--> statement-breakpoint')
.map((s) => s.trim())
.filter(Boolean);
for (const stmt of statements0) {
client.exec(stmt);
}
// Apply second migration
const statements1 = migration1
.split('--> statement-breakpoint')
.map((s) => s.trim())
.filter(Boolean);
for (const stmt of statements1) {
client.exec(stmt);
for (const statement of statements) {
client.exec(statement);
}
}
return client;
@@ -198,6 +196,44 @@ describe('VersionService.remove()', () => {
const doc = client.prepare(`SELECT id FROM documents WHERE id = ?`).get(docId);
expect(doc).toBeUndefined();
});
it('removes derived vec rows before deleting the version', () => {
const { client, versionService } = setup();
const version = versionService.add('/facebook/react', 'v18.3.0');
const docId = crypto.randomUUID();
const snippetId = crypto.randomUUID();
const embedding = Float32Array.from([0.5, 0.25, 0.125]);
const now = Math.floor(Date.now() / 1000);
const vecStore = new SqliteVecStore(client);
client.prepare(
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
VALUES (?, '/facebook/react', ?, 'README.md', 'version-doc', ?)`
).run(docId, version.id, now);
client.prepare(
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
VALUES (?, ?, '/facebook/react', ?, 'info', 'version snippet', ?)`
).run(snippetId, docId, version.id, now);
client.prepare(
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
).run(snippetId, Buffer.from(embedding.buffer), now);
vecStore.upsertEmbedding('local-default', snippetId, embedding);
versionService.remove('/facebook/react', 'v18.3.0');
const vecTable = sqliteVecTableName('local-default');
const rowidTable = sqliteVecRowidTableName('local-default');
const vecCount = client.prepare(`SELECT COUNT(*) as n FROM "${vecTable}"`).get() as {
n: number;
};
const rowidCount = client.prepare(`SELECT COUNT(*) as n FROM "${rowidTable}"`).get() as {
n: number;
};
expect(vecCount.n).toBe(0);
expect(rowidCount.n).toBe(0);
});
});
// ---------------------------------------------------------------------------

View File

@@ -11,6 +11,7 @@ import {
RepositoryVersion,
RepositoryVersionEntity
} from '$lib/server/models/repository-version.js';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
import { AlreadyExistsError, NotFoundError } from '$lib/server/utils/validation';
import { resolveTagToCommit, discoverVersionTags } from '$lib/server/utils/git.js';
@@ -99,9 +100,13 @@ export class VersionService {
throw new NotFoundError(`Version ${tag} not found for repository ${repositoryId}`);
}
this.db
.prepare(`DELETE FROM repository_versions WHERE repository_id = ? AND tag = ?`)
.run(repositoryId, tag);
const sqliteVecStore = new SqliteVecStore(this.db);
this.db.transaction(() => {
sqliteVecStore.deleteEmbeddingsForVersion(repositoryId, version.id);
this.db
.prepare(`DELETE FROM repository_versions WHERE repository_id = ? AND tag = ?`)
.run(repositoryId, tag);
})();
}
/**

View File

@@ -1,5 +1,10 @@
<script lang="ts">
import { onMount } from 'svelte';
import { SvelteURLSearchParams } from 'svelte/reactivity';
import JobSkeleton from '$lib/components/admin/JobSkeleton.svelte';
import JobStatusBadge from '$lib/components/admin/JobStatusBadge.svelte';
import Toast from '$lib/components/admin/Toast.svelte';
import WorkerStatusPanel from '$lib/components/admin/WorkerStatusPanel.svelte';
import type { IndexingJobDto } from '$lib/server/models/indexing-job.js';
interface JobResponse {
@@ -7,174 +12,16 @@
total: number;
}
let jobs = $state<IndexingJobDto[]>([]);
let loading = $state(true);
let error = $state<string | null>(null);
let actionInProgress = $state<string | null>(null);
// Fetch jobs from API
async function fetchJobs() {
try {
const response = await fetch('/api/v1/jobs?limit=50');
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const data: JobResponse = await response.json();
jobs = data.jobs;
error = null;
} catch (err) {
error = err instanceof Error ? err.message : 'Failed to fetch jobs';
console.error('Failed to fetch jobs:', err);
} finally {
loading = false;
}
interface ToastItem {
id: string;
message: string;
type: 'success' | 'error' | 'info';
}
// Action handlers
async function pauseJob(id: string) {
actionInProgress = id;
try {
const response = await fetch(`/api/v1/jobs/${id}/pause`, { method: 'POST' });
if (!response.ok) {
const errorData = await response.json().catch(() => ({ message: 'Unknown error' }));
throw new Error(errorData.message || `HTTP ${response.status}`);
}
// Optimistic update
jobs = jobs.map((j) => (j.id === id ? { ...j, status: 'paused' as const } : j));
// Show success message
showToast('Job paused successfully');
} catch (err) {
const msg = err instanceof Error ? err.message : 'Failed to pause job';
showToast(msg, 'error');
console.error('Failed to pause job:', err);
} finally {
actionInProgress = null;
// Refresh after a short delay to get the actual state
setTimeout(fetchJobs, 500);
}
}
type FilterStatus = 'queued' | 'running' | 'done' | 'failed';
type JobAction = 'pause' | 'resume' | 'cancel';
async function resumeJob(id: string) {
actionInProgress = id;
try {
const response = await fetch(`/api/v1/jobs/${id}/resume`, { method: 'POST' });
if (!response.ok) {
const errorData = await response.json().catch(() => ({ message: 'Unknown error' }));
throw new Error(errorData.message || `HTTP ${response.status}`);
}
// Optimistic update
jobs = jobs.map((j) => (j.id === id ? { ...j, status: 'queued' as const } : j));
showToast('Job resumed successfully');
} catch (err) {
const msg = err instanceof Error ? err.message : 'Failed to resume job';
showToast(msg, 'error');
console.error('Failed to resume job:', err);
} finally {
actionInProgress = null;
setTimeout(fetchJobs, 500);
}
}
async function cancelJob(id: string) {
if (!confirm('Are you sure you want to cancel this job?')) {
return;
}
actionInProgress = id;
try {
const response = await fetch(`/api/v1/jobs/${id}/cancel`, { method: 'POST' });
if (!response.ok) {
const errorData = await response.json().catch(() => ({ message: 'Unknown error' }));
throw new Error(errorData.message || `HTTP ${response.status}`);
}
// Optimistic update
jobs = jobs.map((j) => (j.id === id ? { ...j, status: 'cancelled' as const } : j));
showToast('Job cancelled successfully');
} catch (err) {
const msg = err instanceof Error ? err.message : 'Failed to cancel job';
showToast(msg, 'error');
console.error('Failed to cancel job:', err);
} finally {
actionInProgress = null;
setTimeout(fetchJobs, 500);
}
}
// Simple toast notification (using alert for v1, can be enhanced later)
function showToast(message: string, type: 'success' | 'error' = 'success') {
// For v1, just use alert. In production, integrate with a toast library.
if (type === 'error') {
alert(`Error: ${message}`);
} else {
console.log(`✓ ${message}`);
}
}
// Auto-refresh with EventSource streaming + fallback polling
$effect(() => {
fetchJobs();
const es = new EventSource('/api/v1/jobs/stream');
let fallbackInterval: ReturnType<typeof setInterval> | null = null;
es.addEventListener('job-progress', (event) => {
const data = JSON.parse(event.data);
jobs = jobs.map((j) =>
j.id === data.jobId
? {
...j,
progress: data.progress,
stage: data.stage,
stageDetail: data.stageDetail,
processedFiles: data.processedFiles,
totalFiles: data.totalFiles
}
: j
);
});
es.addEventListener('job-done', () => {
void fetchJobs();
});
es.addEventListener('job-failed', () => {
void fetchJobs();
});
es.onerror = () => {
es.close();
// Fall back to polling on error
fallbackInterval = setInterval(fetchJobs, 3000);
};
return () => {
es.close();
if (fallbackInterval) {
clearInterval(fallbackInterval);
}
};
});
// Format date for display
function formatDate(date: Date | null): string {
if (!date) return '—';
return new Date(date).toLocaleString();
}
// Determine which actions are available for a job
function canPause(status: IndexingJobDto['status']): boolean {
return status === 'queued' || status === 'running';
}
function canResume(status: IndexingJobDto['status']): boolean {
return status === 'paused';
}
function canCancel(status: IndexingJobDto['status']): boolean {
return status !== 'done' && status !== 'failed';
}
// Map IndexingStage values to display labels
const filterStatuses: FilterStatus[] = ['queued', 'running', 'done', 'failed'];
const stageLabels: Record<string, string> = {
queued: 'Queued',
differential: 'Diff',
@@ -187,9 +34,274 @@
failed: 'Failed'
};
let jobs = $state<IndexingJobDto[]>([]);
let total = $state(0);
let loading = $state(true);
let refreshing = $state(false);
let error = $state<string | null>(null);
let repositoryInput = $state('');
let selectedStatuses = $state<FilterStatus[]>([]);
let appliedRepositoryFilter = $state('');
let appliedStatuses = $state<FilterStatus[]>([]);
let pendingCancelJobId = $state<string | null>(null);
let rowActions = $state<Record<string, JobAction | undefined>>({});
let toasts = $state<ToastItem[]>([]);
let refreshTimer: ReturnType<typeof setTimeout> | null = null;
function buildJobsUrl(): string {
const params = new SvelteURLSearchParams({ limit: '50' });
if (appliedRepositoryFilter) {
params.set('repositoryId', appliedRepositoryFilter);
}
if (appliedStatuses.length > 0) {
params.set('status', appliedStatuses.join(','));
}
return `/api/v1/jobs?${params.toString()}`;
}
function pushToast(message: string, type: ToastItem['type'] = 'success') {
toasts = [...toasts, { id: crypto.randomUUID(), message, type }];
}
function clearRowAction(jobId: string) {
const next = { ...rowActions };
delete next[jobId];
rowActions = next;
}
function setRowAction(jobId: string, action: JobAction) {
rowActions = { ...rowActions, [jobId]: action };
}
function scheduleRefresh(delayMs = 500) {
if (refreshTimer) {
clearTimeout(refreshTimer);
}
refreshTimer = setTimeout(() => {
void fetchJobs({ background: true });
}, delayMs);
}
function hasAppliedFilters(): boolean {
return appliedRepositoryFilter.length > 0 || appliedStatuses.length > 0;
}
function sameStatuses(left: FilterStatus[], right: FilterStatus[]): boolean {
return left.length === right.length && left.every((status, index) => status === right[index]);
}
function filtersDirty(): boolean {
return repositoryInput.trim() !== appliedRepositoryFilter || !sameStatuses(selectedStatuses, appliedStatuses);
}
function isSpecificRepositoryId(repositoryId: string): boolean {
return repositoryId.split('/').filter(Boolean).length >= 2;
}
function matchesAppliedFilters(job: IndexingJobDto): boolean {
if (appliedRepositoryFilter) {
const repositoryFilter = appliedRepositoryFilter;
const repositoryMatches = isSpecificRepositoryId(repositoryFilter)
? job.repositoryId === repositoryFilter
: job.repositoryId === repositoryFilter || job.repositoryId.startsWith(`${repositoryFilter}/`);
if (!repositoryMatches) {
return false;
}
}
if (appliedStatuses.length === 0) {
return true;
}
return appliedStatuses.includes(job.status as FilterStatus);
}
function syncCancelState(nextJobs: IndexingJobDto[]) {
if (!pendingCancelJobId) {
return;
}
const pendingJob = nextJobs.find((job) => job.id === pendingCancelJobId);
if (!pendingJob || !canCancel(pendingJob.status)) {
pendingCancelJobId = null;
}
}
async function fetchJobs(options: { background?: boolean } = {}) {
const background = options.background ?? false;
if (background) {
refreshing = true;
} else {
loading = true;
}
try {
const response = await fetch(buildJobsUrl());
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const data: JobResponse = await response.json();
jobs = data.jobs;
total = data.total;
error = null;
syncCancelState(data.jobs);
} catch (err) {
error = err instanceof Error ? err.message : 'Failed to fetch jobs';
console.error('Failed to fetch jobs:', err);
} finally {
loading = false;
refreshing = false;
}
}
async function runJobAction(job: IndexingJobDto, action: JobAction) {
setRowAction(job.id, action);
try {
const response = await fetch(`/api/v1/jobs/${job.id}/${action}`, { method: 'POST' });
const payload = await response.json().catch(() => ({ message: 'Unknown error' }));
if (!response.ok) {
throw new Error(payload.message || `HTTP ${response.status}`);
}
const updatedJob = payload.job as IndexingJobDto | undefined;
if (updatedJob) {
if (matchesAppliedFilters(updatedJob)) {
jobs = jobs.map((currentJob) =>
currentJob.id === updatedJob.id ? updatedJob : currentJob
);
} else {
jobs = jobs.filter((currentJob) => currentJob.id !== updatedJob.id);
}
}
pendingCancelJobId = null;
pushToast(`Job ${action}d successfully`);
} catch (err) {
const message = err instanceof Error ? err.message : `Failed to ${action} job`;
pushToast(message, 'error');
console.error(`Failed to ${action} job:`, err);
} finally {
clearRowAction(job.id);
scheduleRefresh();
}
}
function toggleStatusFilter(status: FilterStatus) {
selectedStatuses = selectedStatuses.includes(status)
? selectedStatuses.filter((candidate) => candidate !== status)
: [...selectedStatuses, status].sort(
(left, right) => filterStatuses.indexOf(left) - filterStatuses.indexOf(right)
);
}
function applyFilters(event?: SubmitEvent) {
event?.preventDefault();
appliedRepositoryFilter = repositoryInput.trim();
appliedStatuses = [...selectedStatuses];
pendingCancelJobId = null;
void fetchJobs();
}
function resetFilters() {
repositoryInput = '';
selectedStatuses = [];
appliedRepositoryFilter = '';
appliedStatuses = [];
pendingCancelJobId = null;
void fetchJobs();
}
function requestCancel(jobId: string) {
pendingCancelJobId = pendingCancelJobId === jobId ? null : jobId;
}
function formatDate(date: Date | string | null): string {
if (!date) {
return '—';
}
return new Date(date).toLocaleString();
}
function canPause(status: IndexingJobDto['status']): boolean {
return status === 'queued' || status === 'running';
}
function canResume(status: IndexingJobDto['status']): boolean {
return status === 'paused';
}
function canCancel(status: IndexingJobDto['status']): boolean {
return status !== 'done' && status !== 'failed' && status !== 'cancelled';
}
function isRowBusy(jobId: string): boolean {
return Boolean(rowActions[jobId]);
}
function getStageLabel(stage: string | undefined): string {
return stage ? (stageLabels[stage] ?? stage) : '—';
}
onMount(() => {
void fetchJobs();
const es = new EventSource('/api/v1/jobs/stream');
let fallbackInterval: ReturnType<typeof setInterval> | null = null;
const refreshJobs = () => {
void fetchJobs({ background: true });
};
es.addEventListener('job-progress', (event) => {
const data = JSON.parse(event.data) as Partial<IndexingJobDto> & { jobId?: string };
if (!data.jobId) {
return;
}
jobs = jobs.map((job) =>
job.id === data.jobId
? {
...job,
progress: data.progress ?? job.progress,
stage: data.stage ?? job.stage,
stageDetail: data.stageDetail ?? job.stageDetail,
processedFiles: data.processedFiles ?? job.processedFiles,
totalFiles: data.totalFiles ?? job.totalFiles,
status: data.status ?? job.status
}
: job
);
});
es.addEventListener('job-done', refreshJobs);
es.addEventListener('job-failed', refreshJobs);
es.onerror = () => {
es.close();
if (!fallbackInterval) {
fallbackInterval = setInterval(refreshJobs, 3000);
}
};
return () => {
es.close();
if (fallbackInterval) {
clearInterval(fallbackInterval);
}
if (refreshTimer) {
clearTimeout(refreshTimer);
}
};
});
</script>
<svelte:head>
@@ -202,23 +314,92 @@
<p class="mt-2 text-gray-600">Monitor and control indexing jobs</p>
</div>
{#if loading && jobs.length === 0}
<div class="flex items-center justify-center py-12">
<div class="text-center">
<div
class="inline-block h-8 w-8 animate-spin rounded-full border-4 border-solid border-blue-600 border-r-transparent"
></div>
<p class="mt-2 text-gray-600">Loading jobs...</p>
<WorkerStatusPanel />
<form class="mb-6 rounded-lg border border-gray-200 bg-white p-4 shadow-sm" onsubmit={applyFilters}>
<div class="flex flex-col gap-4 lg:flex-row lg:items-end lg:justify-between">
<div class="flex-1">
<label class="mb-2 block text-sm font-medium text-gray-700" for="repository-filter">
Repository filter
</label>
<input
id="repository-filter"
type="text"
bind:value={repositoryInput}
placeholder="/owner or /owner/repo"
class="w-full rounded-md border border-gray-300 px-3 py-2 text-sm text-gray-900 shadow-sm focus:border-blue-500 focus:outline-none focus:ring-2 focus:ring-blue-200"
/>
<p class="mt-2 text-xs text-gray-500">
Use an owner prefix like <code>/facebook</code> or a full repository ID like <code>/facebook/react</code>.
</p>
</div>
<div class="lg:min-w-72">
<span class="mb-2 block text-sm font-medium text-gray-700">Statuses</span>
<div class="flex flex-wrap gap-2">
{#each filterStatuses as status (status)}
<button
type="button"
onclick={() => toggleStatusFilter(status)}
class="rounded-full border px-3 py-1 text-xs font-semibold uppercase transition {selectedStatuses.includes(status)
? 'border-blue-600 bg-blue-50 text-blue-700'
: 'border-gray-300 text-gray-600 hover:border-gray-400 hover:text-gray-900'}"
>
{status}
</button>
{/each}
</div>
</div>
<div class="flex gap-2">
<button
type="submit"
disabled={!filtersDirty()}
class="rounded bg-blue-600 px-4 py-2 text-sm font-semibold text-white hover:bg-blue-700 disabled:cursor-not-allowed disabled:opacity-50"
>
Apply filters
</button>
<button
type="button"
onclick={resetFilters}
class="rounded border border-gray-300 px-4 py-2 text-sm font-semibold text-gray-700 hover:border-gray-400 hover:text-gray-900"
>
Reset
</button>
</div>
</div>
{:else if error && jobs.length === 0}
<div class="rounded-md bg-red-50 p-4">
<p class="text-sm text-red-800">Error: {error}</p>
</form>
<div class="mb-4 flex flex-col gap-2 text-sm text-gray-600 md:flex-row md:items-center md:justify-between">
<p>
Showing <span class="font-semibold text-gray-900">{jobs.length}</span> of
<span class="font-semibold text-gray-900">{total}</span> jobs
</p>
{#if hasAppliedFilters()}
<p class="text-xs text-gray-500">
Active filters:
{appliedRepositoryFilter || 'all repositories'}
{#if appliedStatuses.length > 0}
· {appliedStatuses.join(', ')}
{:else}
· all statuses
{/if}
</p>
{/if}
</div>
{#if error}
<div class="mb-4 rounded-md border border-red-200 bg-red-50 px-4 py-3 text-sm text-red-800">
{error}
</div>
{:else if jobs.length === 0}
{/if}
{#if !loading && jobs.length === 0}
<div class="rounded-md bg-gray-50 p-8 text-center">
<p class="text-gray-600">
No jobs found. Jobs will appear here when repositories are indexed.
{hasAppliedFilters()
? 'No jobs match the current filters.'
: 'No jobs found. Jobs will appear here when repositories are indexed.'}
</p>
</div>
{:else}
@@ -259,86 +440,117 @@
</tr>
</thead>
<tbody class="divide-y divide-gray-200 bg-white">
{#each jobs as job (job.id)}
{#if loading && jobs.length === 0}
<JobSkeleton rows={6} />
{:else}
{#each jobs as job (job.id)}
<tr class="hover:bg-gray-50">
<td class="px-6 py-4 text-sm font-medium whitespace-nowrap text-gray-900">
{job.repositoryId}
{#if job.versionId}
<span class="ml-1 text-xs text-gray-500">@{job.versionId}</span>
{/if}
<div class="mt-1 text-xs text-gray-400">{job.id}</div>
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
<JobStatusBadge status={job.status} />
<JobStatusBadge status={job.status} spinning={job.status === 'running'} />
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
<div class="flex items-center gap-2">
<span>{getStageLabel(job.stage)}</span>
{#if job.stageDetail}
<span class="text-xs text-gray-400">{job.stageDetail}</span>
{/if}
</div>
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
<div class="flex items-center">
<span class="mr-2">{job.progress}%</span>
<div class="h-2 w-32 rounded-full bg-gray-200">
<div
class="h-2 rounded-full bg-blue-600 transition-all"
style="width: {job.progress}%"
></div>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
<div class="flex items-center gap-2">
<span>{getStageLabel(job.stage)}</span>
{#if job.stageDetail}
<span class="text-xs text-gray-400">{job.stageDetail}</span>
{/if}
</div>
{#if job.totalFiles > 0}
<span class="ml-2 text-xs text-gray-400">
{job.processedFiles}/{job.totalFiles} files
</span>
{/if}
</div>
</td>
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
<div class="space-y-2">
<div class="flex items-center gap-2">
<span class="w-12 text-right text-xs font-semibold text-gray-600">{job.progress}%</span>
<div class="h-2 w-32 rounded-full bg-gray-200">
<div
class="h-2 rounded-full bg-blue-600 transition-all"
style="width: {job.progress}%"
></div>
</div>
</div>
{#if job.totalFiles > 0}
<div class="text-xs text-gray-400">
{job.processedFiles}/{job.totalFiles} files processed
</div>
{/if}
</div>
</td>
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
{formatDate(job.createdAt)}
</td>
<td class="px-6 py-4 text-right text-sm font-medium whitespace-nowrap">
<div class="flex justify-end gap-2">
{#if canPause(job.status)}
{#if pendingCancelJobId === job.id}
<button
onclick={() => pauseJob(job.id)}
disabled={actionInProgress === job.id}
class="rounded bg-yellow-600 px-3 py-1 text-xs font-semibold text-white hover:bg-yellow-700 disabled:opacity-50"
type="button"
onclick={() => void runJobAction(job, 'cancel')}
disabled={isRowBusy(job.id)}
class="rounded bg-red-600 px-3 py-1 text-xs font-semibold text-white hover:bg-red-700 disabled:cursor-not-allowed disabled:opacity-50"
>
Pause
{rowActions[job.id] === 'cancel' ? 'Cancelling...' : 'Confirm cancel'}
</button>
{/if}
{#if canResume(job.status)}
<button
onclick={() => resumeJob(job.id)}
disabled={actionInProgress === job.id}
class="rounded bg-green-600 px-3 py-1 text-xs font-semibold text-white hover:bg-green-700 disabled:opacity-50"
type="button"
onclick={() => requestCancel(job.id)}
disabled={isRowBusy(job.id)}
class="rounded border border-gray-300 px-3 py-1 text-xs font-semibold text-gray-700 hover:border-gray-400 hover:text-gray-900 disabled:cursor-not-allowed disabled:opacity-50"
>
Resume
Keep job
</button>
{/if}
{#if canCancel(job.status)}
<button
onclick={() => cancelJob(job.id)}
disabled={actionInProgress === job.id}
class="rounded bg-red-600 px-3 py-1 text-xs font-semibold text-white hover:bg-red-700 disabled:opacity-50"
>
Cancel
</button>
{/if}
{#if !canPause(job.status) && !canResume(job.status) && !canCancel(job.status)}
<span class="text-xs text-gray-400"></span>
{:else}
{#if canPause(job.status)}
<button
type="button"
onclick={() => void runJobAction(job, 'pause')}
disabled={isRowBusy(job.id)}
class="rounded bg-yellow-600 px-3 py-1 text-xs font-semibold text-white hover:bg-yellow-700 disabled:cursor-not-allowed disabled:opacity-50"
>
{rowActions[job.id] === 'pause' ? 'Pausing...' : 'Pause'}
</button>
{/if}
{#if canResume(job.status)}
<button
type="button"
onclick={() => void runJobAction(job, 'resume')}
disabled={isRowBusy(job.id)}
class="rounded bg-green-600 px-3 py-1 text-xs font-semibold text-white hover:bg-green-700 disabled:cursor-not-allowed disabled:opacity-50"
>
{rowActions[job.id] === 'resume' ? 'Resuming...' : 'Resume'}
</button>
{/if}
{#if canCancel(job.status)}
<button
type="button"
onclick={() => requestCancel(job.id)}
disabled={isRowBusy(job.id)}
class="rounded bg-red-600 px-3 py-1 text-xs font-semibold text-white hover:bg-red-700 disabled:cursor-not-allowed disabled:opacity-50"
>
Cancel
</button>
{/if}
{#if !canPause(job.status) && !canResume(job.status) && !canCancel(job.status)}
<span class="text-xs text-gray-400"></span>
{/if}
{/if}
</div>
</td>
</tr>
{/each}
{/each}
{/if}
</tbody>
</table>
</div>
{#if loading}
{#if refreshing}
<div class="mt-4 text-center text-sm text-gray-500">Refreshing...</div>
{/if}
{/if}
</div>
<Toast bind:toasts={toasts} />

View File

@@ -15,15 +15,39 @@ import { JobQueue } from '$lib/server/pipeline/job-queue.js';
import { handleServiceError } from '$lib/server/utils/validation.js';
import type { IndexingJob } from '$lib/types';
const VALID_JOB_STATUSES: ReadonlySet<IndexingJob['status']> = new Set([
'queued',
'running',
'done',
'failed'
]);
function parseStatusFilter(searchValue: string | null): IndexingJob['status'] | Array<IndexingJob['status']> | undefined {
if (!searchValue) {
return undefined;
}
const statuses = [...new Set(
searchValue
.split(',')
.map((value) => value.trim())
.filter((value): value is IndexingJob['status'] => VALID_JOB_STATUSES.has(value as IndexingJob['status']))
)];
if (statuses.length === 0) {
return undefined;
}
return statuses.length === 1 ? statuses[0] : statuses;
}
export const GET: RequestHandler = ({ url }) => {
try {
const db = getClient();
const queue = new JobQueue(db);
const repositoryId = url.searchParams.get('repositoryId') ?? undefined;
const status = (url.searchParams.get('status') ?? undefined) as
| IndexingJob['status']
| undefined;
const repositoryId = url.searchParams.get('repositoryId')?.trim() || undefined;
const status = parseStatusFilter(url.searchParams.get('status'));
const limit = Math.min(parseInt(url.searchParams.get('limit') ?? '20', 10) || 20, 1000);
const jobs = queue.listJobs({ repositoryId, status, limit });

View File

@@ -44,7 +44,7 @@ export const GET: RequestHandler = ({ params, request }) => {
status: job.status,
error: job.error
};
controller.enqueue(`data: ${JSON.stringify(initialData)}\n\n`);
controller.enqueue(`event: job-progress\ndata: ${JSON.stringify(initialData)}\n\n`);
// Check for Last-Event-ID header for reconnect
const lastEventId = request.headers.get('Last-Event-ID');
@@ -57,6 +57,13 @@ export const GET: RequestHandler = ({ params, request }) => {
// Check if job is already done or failed - close immediately after first event
if (job.status === 'done' || job.status === 'failed') {
if (job.status === 'done') {
controller.enqueue(`event: job-done\ndata: ${JSON.stringify({ jobId })}\n\n`);
} else {
controller.enqueue(
`event: job-failed\ndata: ${JSON.stringify({ jobId, error: job.error })}\n\n`
);
}
controller.close();
return;
}
@@ -73,18 +80,29 @@ export const GET: RequestHandler = ({ params, request }) => {
controller.enqueue(value);
// Check if the incoming event indicates job completion
if (value.includes('event: done') || value.includes('event: failed')) {
if (
value.includes('event: job-done') ||
value.includes('event: job-failed')
) {
controller.close();
break;
}
}
} finally {
reader.releaseLock();
controller.close();
try {
controller.close();
} catch {
// Stream may already be closed after a terminal event.
}
}
} catch (err) {
console.error('SSE stream error:', err);
controller.close();
try {
controller.close();
} catch {
// Stream may already be closed.
}
}
}
});

View File

@@ -18,6 +18,7 @@ import type { ProgressBroadcaster as BroadcasterType } from '$lib/server/pipelin
let db: Database.Database;
// Closed over by the vi.mock factory below.
let mockBroadcaster: BroadcasterType | null = null;
let mockPool: { getStatus: () => object; setMaxConcurrency?: (value: number) => void } | null = null;
vi.mock('$lib/server/db/client', () => ({
getClient: () => db
@@ -29,12 +30,12 @@ vi.mock('$lib/server/db/client.js', () => ({
vi.mock('$lib/server/pipeline/startup', () => ({
getQueue: () => null,
getPool: () => null
getPool: () => mockPool
}));
vi.mock('$lib/server/pipeline/startup.js', () => ({
getQueue: () => null,
getPool: () => null
getPool: () => mockPool
}));
vi.mock('$lib/server/pipeline/progress-broadcaster', async (importOriginal) => {
@@ -58,9 +59,11 @@ vi.mock('$lib/server/pipeline/progress-broadcaster.js', async (importOriginal) =
// ---------------------------------------------------------------------------
import { ProgressBroadcaster } from '$lib/server/pipeline/progress-broadcaster.js';
import { GET as getJobsList } from './jobs/+server.js';
import { GET as getJobStream } from './jobs/[id]/stream/+server.js';
import { GET as getJobsStream } from './jobs/stream/+server.js';
import { GET as getIndexingSettings, PUT as putIndexingSettings } from './settings/indexing/+server.js';
import { GET as getWorkers } from './workers/+server.js';
// ---------------------------------------------------------------------------
// DB factory
@@ -306,6 +309,25 @@ describe('GET /api/v1/jobs/:id/stream', () => {
// The replay event should include the cached event data
expect(fullText).toContain('progress');
});
it('closes after receiving the broadcaster job-done event', async () => {
seedRepo(db);
const jobId = seedJob(db, { status: 'running', stage: 'parsing', progress: 10 });
const response = await getJobStream(makeEvent({ params: { id: jobId } }));
const reader = response.body!.getReader();
const initialChunk = await reader.read();
expect(String(initialChunk.value ?? '')).toContain('event: job-progress');
mockBroadcaster!.broadcast(jobId, '/test/repo', 'job-done', { jobId, status: 'done' });
const completionChunk = await reader.read();
expect(String(completionChunk.value ?? '')).toContain('event: job-done');
const closed = await reader.read();
expect(closed.done).toBe(true);
});
});
// ---------------------------------------------------------------------------
@@ -377,12 +399,125 @@ describe('GET /api/v1/jobs/stream', () => {
});
// ---------------------------------------------------------------------------
// Test group 3: GET /api/v1/settings/indexing
// Test group 3: GET /api/v1/jobs
// ---------------------------------------------------------------------------
describe('GET /api/v1/jobs', () => {
beforeEach(() => {
db = createTestDb();
});
it('supports repository prefix and comma-separated status filters', async () => {
seedRepo(db, '/facebook/react');
seedRepo(db, '/facebook/react-native');
seedRepo(db, '/vitejs/vite');
seedJob(db, { repository_id: '/facebook/react', status: 'queued' });
seedJob(db, { repository_id: '/facebook/react-native', status: 'running' });
seedJob(db, { repository_id: '/facebook/react', status: 'done' });
seedJob(db, { repository_id: '/vitejs/vite', status: 'queued' });
const response = await getJobsList(
makeEvent<Parameters<typeof getJobsList>[0]>({
url: 'http://localhost/api/v1/jobs?repositoryId=%2Ffacebook&status=queued,%20running'
})
);
const body = await response.json();
expect(response.status).toBe(200);
expect(body.total).toBe(2);
expect(body.jobs).toHaveLength(2);
expect(body.jobs.map((job: { repositoryId: string }) => job.repositoryId).sort()).toEqual([
'/facebook/react',
'/facebook/react-native'
]);
expect(body.jobs.map((job: { status: string }) => job.status).sort()).toEqual([
'queued',
'running'
]);
});
it('keeps exact-match behavior for specific repository IDs', async () => {
seedRepo(db, '/facebook/react');
seedRepo(db, '/facebook/react-native');
seedJob(db, { repository_id: '/facebook/react', status: 'queued' });
seedJob(db, { repository_id: '/facebook/react-native', status: 'queued' });
const response = await getJobsList(
makeEvent<Parameters<typeof getJobsList>[0]>({
url: 'http://localhost/api/v1/jobs?repositoryId=%2Ffacebook%2Freact&status=queued'
})
);
const body = await response.json();
expect(response.status).toBe(200);
expect(body.total).toBe(1);
expect(body.jobs).toHaveLength(1);
expect(body.jobs[0].repositoryId).toBe('/facebook/react');
});
});
// ---------------------------------------------------------------------------
// Test group 4: GET /api/v1/workers
// ---------------------------------------------------------------------------
describe('GET /api/v1/workers', () => {
beforeEach(() => {
mockPool = null;
});
it('returns 503 when the worker pool is not initialized', async () => {
const response = await getWorkers(makeEvent<Parameters<typeof getWorkers>[0]>({}));
expect(response.status).toBe(503);
});
it('returns the current worker status snapshot', async () => {
mockPool = {
getStatus: () => ({
concurrency: 2,
active: 1,
idle: 1,
workers: [
{
index: 0,
state: 'running',
jobId: 'job-1',
repositoryId: '/test/repo',
versionId: null
},
{
index: 1,
state: 'idle',
jobId: null,
repositoryId: null,
versionId: null
}
]
})
};
const response = await getWorkers(makeEvent<Parameters<typeof getWorkers>[0]>({}));
const body = await response.json();
expect(response.status).toBe(200);
expect(body.active).toBe(1);
expect(body.workers[0].jobId).toBe('job-1');
});
});
// ---------------------------------------------------------------------------
// Test group 5: GET /api/v1/settings/indexing
// ---------------------------------------------------------------------------
describe('GET /api/v1/settings/indexing', () => {
beforeEach(() => {
db = createTestDb();
mockPool = {
getStatus: () => ({ concurrency: 2, active: 0, idle: 2, workers: [] }),
setMaxConcurrency: vi.fn()
};
});
it('returns { concurrency: 2 } when no setting exists in DB', async () => {
@@ -417,12 +552,16 @@ describe('GET /api/v1/settings/indexing', () => {
});
// ---------------------------------------------------------------------------
// Test group 4: PUT /api/v1/settings/indexing
// Test group 6: PUT /api/v1/settings/indexing
// ---------------------------------------------------------------------------
describe('PUT /api/v1/settings/indexing', () => {
beforeEach(() => {
db = createTestDb();
mockPool = {
getStatus: () => ({ concurrency: 2, active: 0, idle: 2, workers: [] }),
setMaxConcurrency: vi.fn()
};
});
function makePutEvent(body: unknown) {

View File

@@ -0,0 +1,16 @@
import type { RequestHandler } from './$types';
import { getPool } from '$lib/server/pipeline/startup.js';
import { handleServiceError } from '$lib/server/utils/validation.js';
export const GET: RequestHandler = () => {
try {
const pool = getPool();
if (!pool) {
return new Response('Service unavailable', { status: 503 });
}
return Response.json(pool.getStatus());
} catch (error) {
return handleServiceError(error);
}
};