feat(TRUEREF-0023): add sqlite-vec search pipeline
This commit is contained in:
@@ -7,39 +7,33 @@
|
||||
|
||||
$effect(() => {
|
||||
job = null;
|
||||
let stopped = false;
|
||||
let completeFired = false;
|
||||
const es = new EventSource(`/api/v1/jobs/${jobId}/stream`);
|
||||
|
||||
async function poll() {
|
||||
if (stopped) return;
|
||||
try {
|
||||
const res = await fetch(`/api/v1/jobs/${jobId}`);
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
job = data.job;
|
||||
if (!completeFired && (job?.status === 'done' || job?.status === 'failed')) {
|
||||
completeFired = true;
|
||||
oncomplete?.();
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// ignore transient errors
|
||||
}
|
||||
}
|
||||
es.addEventListener('job-progress', (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
job = { ...job, ...data } as IndexingJob;
|
||||
});
|
||||
|
||||
void poll();
|
||||
const interval = setInterval(() => {
|
||||
if (job?.status === 'done' || job?.status === 'failed') {
|
||||
clearInterval(interval);
|
||||
return;
|
||||
}
|
||||
void poll();
|
||||
}, 2000);
|
||||
es.addEventListener('job-done', () => {
|
||||
void fetch(`/api/v1/jobs/${jobId}`)
|
||||
.then(r => r.json())
|
||||
.then(d => { job = d.job; oncomplete?.(); });
|
||||
es.close();
|
||||
});
|
||||
|
||||
return () => {
|
||||
stopped = true;
|
||||
clearInterval(interval);
|
||||
es.addEventListener('job-failed', (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
if (job) job = { ...job, status: 'failed', error: data.error ?? 'Unknown error' } as IndexingJob;
|
||||
oncomplete?.();
|
||||
es.close();
|
||||
});
|
||||
|
||||
es.onerror = () => {
|
||||
es.close();
|
||||
void fetch(`/api/v1/jobs/${jobId}`).then(r => r.json()).then(d => { job = d.job; });
|
||||
};
|
||||
|
||||
return () => es.close();
|
||||
});
|
||||
|
||||
const progress = $derived(job?.progress ?? 0);
|
||||
|
||||
19
src/lib/components/admin/JobSkeleton.svelte
Normal file
19
src/lib/components/admin/JobSkeleton.svelte
Normal file
@@ -0,0 +1,19 @@
|
||||
<script lang="ts">
|
||||
let { rows = 5 }: { rows?: number } = $props();
|
||||
</script>
|
||||
|
||||
{#each Array(rows) as _, i (i)}
|
||||
<tr>
|
||||
<td class="px-6 py-4">
|
||||
<div class="h-4 w-48 animate-pulse rounded bg-gray-200"></div>
|
||||
<div class="mt-1 h-3 w-24 animate-pulse rounded bg-gray-100"></div>
|
||||
</td>
|
||||
<td class="px-6 py-4"><div class="h-5 w-16 animate-pulse rounded-full bg-gray-200"></div></td>
|
||||
<td class="px-6 py-4"><div class="h-4 w-20 animate-pulse rounded bg-gray-200"></div></td>
|
||||
<td class="px-6 py-4"><div class="h-2 w-32 animate-pulse rounded-full bg-gray-200"></div></td>
|
||||
<td class="px-6 py-4"><div class="h-4 w-28 animate-pulse rounded bg-gray-200"></div></td>
|
||||
<td class="px-6 py-4 text-right"
|
||||
><div class="ml-auto h-7 w-20 animate-pulse rounded bg-gray-200"></div></td
|
||||
>
|
||||
</tr>
|
||||
{/each}
|
||||
@@ -1,9 +1,10 @@
|
||||
<script lang="ts">
|
||||
interface Props {
|
||||
status: 'queued' | 'running' | 'paused' | 'cancelled' | 'done' | 'failed';
|
||||
spinning?: boolean;
|
||||
}
|
||||
|
||||
let { status }: Props = $props();
|
||||
let { status, spinning = false }: Props = $props();
|
||||
|
||||
const statusConfig: Record<typeof status, { bg: string; text: string; label: string }> = {
|
||||
queued: { bg: 'bg-blue-100', text: 'text-blue-800', label: 'Queued' },
|
||||
@@ -21,4 +22,9 @@
|
||||
class="inline-flex items-center rounded-full px-2.5 py-0.5 text-xs font-medium {config.bg} {config.text}"
|
||||
>
|
||||
{config.label}
|
||||
{#if spinning}
|
||||
<span
|
||||
class="ml-1 inline-block h-3 w-3 animate-spin rounded-full border-2 border-current border-r-transparent"
|
||||
></span>
|
||||
{/if}
|
||||
</span>
|
||||
|
||||
77
src/lib/components/admin/Toast.svelte
Normal file
77
src/lib/components/admin/Toast.svelte
Normal file
@@ -0,0 +1,77 @@
|
||||
<script lang="ts">
|
||||
import { onDestroy } from 'svelte';
|
||||
|
||||
export interface ToastItem {
|
||||
id: string;
|
||||
message: string;
|
||||
type: 'success' | 'error' | 'info';
|
||||
}
|
||||
|
||||
let { toasts = $bindable([]) }: { toasts: ToastItem[] } = $props();
|
||||
const timers = new Map<string, ReturnType<typeof setTimeout>>();
|
||||
|
||||
$effect(() => {
|
||||
for (const toast of toasts) {
|
||||
if (timers.has(toast.id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
dismiss(toast.id);
|
||||
}, 4000);
|
||||
|
||||
timers.set(toast.id, timer);
|
||||
}
|
||||
|
||||
for (const [id, timer] of timers.entries()) {
|
||||
if (toasts.some((toast) => toast.id === id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
clearTimeout(timer);
|
||||
timers.delete(id);
|
||||
}
|
||||
});
|
||||
|
||||
onDestroy(() => {
|
||||
for (const timer of timers.values()) {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
timers.clear();
|
||||
});
|
||||
|
||||
function dismiss(id: string) {
|
||||
const timer = timers.get(id);
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
timers.delete(id);
|
||||
}
|
||||
|
||||
toasts = toasts.filter((toast: ToastItem) => toast.id !== id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<div class="fixed right-4 bottom-4 z-50 flex flex-col gap-2">
|
||||
{#each toasts as toast (toast.id)}
|
||||
<div
|
||||
role="status"
|
||||
aria-live="polite"
|
||||
class="flex items-center gap-3 rounded-lg px-4 py-3 shadow-lg {toast.type === 'error'
|
||||
? 'bg-red-600 text-white'
|
||||
: toast.type === 'info'
|
||||
? 'bg-blue-600 text-white'
|
||||
: 'bg-green-600 text-white'}"
|
||||
>
|
||||
<span class="text-sm">{toast.message}</span>
|
||||
<button
|
||||
type="button"
|
||||
aria-label="Dismiss notification"
|
||||
onclick={() => dismiss(toast.id)}
|
||||
class="ml-2 text-xs opacity-70 hover:opacity-100"
|
||||
>
|
||||
x
|
||||
</button
|
||||
>
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
81
src/lib/components/admin/WorkerStatusPanel.svelte
Normal file
81
src/lib/components/admin/WorkerStatusPanel.svelte
Normal file
@@ -0,0 +1,81 @@
|
||||
<script lang="ts">
|
||||
interface WorkerStatus {
|
||||
index: number;
|
||||
state: 'idle' | 'running';
|
||||
jobId: string | null;
|
||||
repositoryId: string | null;
|
||||
versionId: string | null;
|
||||
}
|
||||
|
||||
interface WorkersResponse {
|
||||
concurrency: number;
|
||||
active: number;
|
||||
idle: number;
|
||||
workers: WorkerStatus[];
|
||||
}
|
||||
|
||||
let status = $state<WorkersResponse>({ concurrency: 0, active: 0, idle: 0, workers: [] });
|
||||
let pollInterval: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
async function fetchStatus() {
|
||||
try {
|
||||
const res = await fetch('/api/v1/workers');
|
||||
if (res.ok) status = await res.json();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
|
||||
$effect(() => {
|
||||
void fetchStatus();
|
||||
const es = new EventSource('/api/v1/jobs/stream');
|
||||
es.addEventListener('worker-status', (event) => {
|
||||
try {
|
||||
status = JSON.parse(event.data);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
});
|
||||
es.onerror = () => {
|
||||
es.close();
|
||||
if (!pollInterval) {
|
||||
pollInterval = setInterval(() => void fetchStatus(), 5000);
|
||||
}
|
||||
};
|
||||
return () => {
|
||||
es.close();
|
||||
if (pollInterval) {
|
||||
clearInterval(pollInterval);
|
||||
pollInterval = null;
|
||||
}
|
||||
};
|
||||
});
|
||||
</script>
|
||||
|
||||
{#if status.concurrency > 0}
|
||||
<div class="mb-4 rounded-lg border border-gray-200 bg-white p-4 shadow-sm">
|
||||
<div class="mb-2 flex items-center justify-between">
|
||||
<h3 class="text-sm font-semibold text-gray-700">Workers</h3>
|
||||
<span class="text-xs text-gray-500">{status.active} / {status.concurrency} active</span>
|
||||
</div>
|
||||
<div class="space-y-1">
|
||||
{#each status.workers as worker (worker.index)}
|
||||
<div class="flex items-center gap-2 text-xs">
|
||||
<span
|
||||
class="flex h-2 w-2 rounded-full {worker.state === 'running'
|
||||
? 'animate-pulse bg-green-500'
|
||||
: 'bg-gray-300'}"
|
||||
></span>
|
||||
<span class="text-gray-600">Worker {worker.index}</span>
|
||||
{#if worker.state === 'running' && worker.repositoryId}
|
||||
<span class="truncate text-gray-400"
|
||||
>{worker.repositoryId}{worker.versionId ? ' / ' + worker.versionId : ''}</span
|
||||
>
|
||||
{:else}
|
||||
<span class="text-gray-400">idle</span>
|
||||
{/if}
|
||||
</div>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
@@ -4,6 +4,7 @@
|
||||
*/
|
||||
import Database from 'better-sqlite3';
|
||||
import { env } from '$env/dynamic/private';
|
||||
import { loadSqliteVec } from './sqlite-vec';
|
||||
|
||||
let _client: Database.Database | null = null;
|
||||
|
||||
@@ -14,6 +15,12 @@ export function getClient(): Database.Database {
|
||||
_client.pragma('journal_mode = WAL');
|
||||
_client.pragma('foreign_keys = ON');
|
||||
_client.pragma('busy_timeout = 5000');
|
||||
_client.pragma('synchronous = NORMAL');
|
||||
_client.pragma('cache_size = -65536');
|
||||
_client.pragma('temp_store = MEMORY');
|
||||
_client.pragma('mmap_size = 268435456');
|
||||
_client.pragma('wal_autocheckpoint = 1000');
|
||||
loadSqliteVec(_client);
|
||||
}
|
||||
return _client;
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import { readFileSync } from 'node:fs';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { join, dirname } from 'node:path';
|
||||
import * as schema from './schema';
|
||||
import { loadSqliteVec } from './sqlite-vec';
|
||||
import { env } from '$env/dynamic/private';
|
||||
|
||||
if (!env.DATABASE_URL) throw new Error('DATABASE_URL is not set');
|
||||
@@ -19,6 +20,12 @@ client.pragma('foreign_keys = ON');
|
||||
// Prevents SQLITE_BUSY errors when the indexing pipeline holds the write lock
|
||||
// and an HTTP request arrives simultaneously.
|
||||
client.pragma('busy_timeout = 5000');
|
||||
client.pragma('synchronous = NORMAL');
|
||||
client.pragma('cache_size = -65536');
|
||||
client.pragma('temp_store = MEMORY');
|
||||
client.pragma('mmap_size = 268435456');
|
||||
client.pragma('wal_autocheckpoint = 1000');
|
||||
loadSqliteVec(client);
|
||||
|
||||
export const db = drizzle(client, { schema });
|
||||
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
CREATE INDEX `idx_embeddings_profile` ON `snippet_embeddings` (`profile_id`,`snippet_id`);--> statement-breakpoint
|
||||
CREATE INDEX `idx_documents_repo_version` ON `documents` (`repository_id`,`version_id`);--> statement-breakpoint
|
||||
CREATE INDEX `idx_jobs_repo_status` ON `indexing_jobs` (`repository_id`,`status`);--> statement-breakpoint
|
||||
CREATE INDEX `idx_repositories_state` ON `repositories` (`state`);--> statement-breakpoint
|
||||
CREATE INDEX `idx_snippets_repo_version` ON `snippets` (`repository_id`,`version_id`);--> statement-breakpoint
|
||||
CREATE INDEX `idx_snippets_repo_type` ON `snippets` (`repository_id`,`type`);
|
||||
948
src/lib/server/db/migrations/meta/0006_snapshot.json
Normal file
948
src/lib/server/db/migrations/meta/0006_snapshot.json
Normal file
@@ -0,0 +1,948 @@
|
||||
{
|
||||
"version": "6",
|
||||
"dialect": "sqlite",
|
||||
"id": "b8998bda-f89b-41bc-b923-3f676d153c79",
|
||||
"prevId": "c326dcbe-1771-4a90-a566-0ebd1eca47ec",
|
||||
"tables": {
|
||||
"documents": {
|
||||
"name": "documents",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"version_id": {
|
||||
"name": "version_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"file_path": {
|
||||
"name": "file_path",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"language": {
|
||||
"name": "language",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"token_count": {
|
||||
"name": "token_count",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"checksum": {
|
||||
"name": "checksum",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"indexed_at": {
|
||||
"name": "indexed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_documents_repo_version": {
|
||||
"name": "idx_documents_repo_version",
|
||||
"columns": [
|
||||
"repository_id",
|
||||
"version_id"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"documents_repository_id_repositories_id_fk": {
|
||||
"name": "documents_repository_id_repositories_id_fk",
|
||||
"tableFrom": "documents",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"documents_version_id_repository_versions_id_fk": {
|
||||
"name": "documents_version_id_repository_versions_id_fk",
|
||||
"tableFrom": "documents",
|
||||
"tableTo": "repository_versions",
|
||||
"columnsFrom": [
|
||||
"version_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"embedding_profiles": {
|
||||
"name": "embedding_profiles",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"provider_kind": {
|
||||
"name": "provider_kind",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"enabled": {
|
||||
"name": "enabled",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": true
|
||||
},
|
||||
"is_default": {
|
||||
"name": "is_default",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": false
|
||||
},
|
||||
"model": {
|
||||
"name": "model",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"dimensions": {
|
||||
"name": "dimensions",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"config": {
|
||||
"name": "config",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"indexing_jobs": {
|
||||
"name": "indexing_jobs",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"version_id": {
|
||||
"name": "version_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"status": {
|
||||
"name": "status",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'queued'"
|
||||
},
|
||||
"progress": {
|
||||
"name": "progress",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"total_files": {
|
||||
"name": "total_files",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"processed_files": {
|
||||
"name": "processed_files",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"stage": {
|
||||
"name": "stage",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'queued'"
|
||||
},
|
||||
"stage_detail": {
|
||||
"name": "stage_detail",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"error": {
|
||||
"name": "error",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"started_at": {
|
||||
"name": "started_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"completed_at": {
|
||||
"name": "completed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_jobs_repo_status": {
|
||||
"name": "idx_jobs_repo_status",
|
||||
"columns": [
|
||||
"repository_id",
|
||||
"status"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"indexing_jobs_repository_id_repositories_id_fk": {
|
||||
"name": "indexing_jobs_repository_id_repositories_id_fk",
|
||||
"tableFrom": "indexing_jobs",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"repositories": {
|
||||
"name": "repositories",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"description": {
|
||||
"name": "description",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"source": {
|
||||
"name": "source",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"source_url": {
|
||||
"name": "source_url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"branch": {
|
||||
"name": "branch",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": "'main'"
|
||||
},
|
||||
"state": {
|
||||
"name": "state",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'pending'"
|
||||
},
|
||||
"total_snippets": {
|
||||
"name": "total_snippets",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"total_tokens": {
|
||||
"name": "total_tokens",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"trust_score": {
|
||||
"name": "trust_score",
|
||||
"type": "real",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"benchmark_score": {
|
||||
"name": "benchmark_score",
|
||||
"type": "real",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"stars": {
|
||||
"name": "stars",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"github_token": {
|
||||
"name": "github_token",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"last_indexed_at": {
|
||||
"name": "last_indexed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_repositories_state": {
|
||||
"name": "idx_repositories_state",
|
||||
"columns": [
|
||||
"state"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"repository_configs": {
|
||||
"name": "repository_configs",
|
||||
"columns": {
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"version_id": {
|
||||
"name": "version_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"project_title": {
|
||||
"name": "project_title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"description": {
|
||||
"name": "description",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"folders": {
|
||||
"name": "folders",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"exclude_folders": {
|
||||
"name": "exclude_folders",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"exclude_files": {
|
||||
"name": "exclude_files",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"rules": {
|
||||
"name": "rules",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"previous_versions": {
|
||||
"name": "previous_versions",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"uniq_repo_config_base": {
|
||||
"name": "uniq_repo_config_base",
|
||||
"columns": [
|
||||
"repository_id"
|
||||
],
|
||||
"isUnique": true,
|
||||
"where": "\"repository_configs\".\"version_id\" IS NULL"
|
||||
},
|
||||
"uniq_repo_config_version": {
|
||||
"name": "uniq_repo_config_version",
|
||||
"columns": [
|
||||
"repository_id",
|
||||
"version_id"
|
||||
],
|
||||
"isUnique": true,
|
||||
"where": "\"repository_configs\".\"version_id\" IS NOT NULL"
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"repository_configs_repository_id_repositories_id_fk": {
|
||||
"name": "repository_configs_repository_id_repositories_id_fk",
|
||||
"tableFrom": "repository_configs",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"repository_versions": {
|
||||
"name": "repository_versions",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"tag": {
|
||||
"name": "tag",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"commit_hash": {
|
||||
"name": "commit_hash",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"state": {
|
||||
"name": "state",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false,
|
||||
"default": "'pending'"
|
||||
},
|
||||
"total_snippets": {
|
||||
"name": "total_snippets",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"indexed_at": {
|
||||
"name": "indexed_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {
|
||||
"repository_versions_repository_id_repositories_id_fk": {
|
||||
"name": "repository_versions_repository_id_repositories_id_fk",
|
||||
"tableFrom": "repository_versions",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"settings": {
|
||||
"name": "settings",
|
||||
"columns": {
|
||||
"key": {
|
||||
"name": "key",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"value": {
|
||||
"name": "value",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"updated_at": {
|
||||
"name": "updated_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"snippet_embeddings": {
|
||||
"name": "snippet_embeddings",
|
||||
"columns": {
|
||||
"snippet_id": {
|
||||
"name": "snippet_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"profile_id": {
|
||||
"name": "profile_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"model": {
|
||||
"name": "model",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"dimensions": {
|
||||
"name": "dimensions",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"embedding": {
|
||||
"name": "embedding",
|
||||
"type": "blob",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_embeddings_profile": {
|
||||
"name": "idx_embeddings_profile",
|
||||
"columns": [
|
||||
"profile_id",
|
||||
"snippet_id"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"snippet_embeddings_snippet_id_snippets_id_fk": {
|
||||
"name": "snippet_embeddings_snippet_id_snippets_id_fk",
|
||||
"tableFrom": "snippet_embeddings",
|
||||
"tableTo": "snippets",
|
||||
"columnsFrom": [
|
||||
"snippet_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"snippet_embeddings_profile_id_embedding_profiles_id_fk": {
|
||||
"name": "snippet_embeddings_profile_id_embedding_profiles_id_fk",
|
||||
"tableFrom": "snippet_embeddings",
|
||||
"tableTo": "embedding_profiles",
|
||||
"columnsFrom": [
|
||||
"profile_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {
|
||||
"snippet_embeddings_snippet_id_profile_id_pk": {
|
||||
"columns": [
|
||||
"snippet_id",
|
||||
"profile_id"
|
||||
],
|
||||
"name": "snippet_embeddings_snippet_id_profile_id_pk"
|
||||
}
|
||||
},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
},
|
||||
"snippets": {
|
||||
"name": "snippets",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "text",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"document_id": {
|
||||
"name": "document_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"repository_id": {
|
||||
"name": "repository_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"version_id": {
|
||||
"name": "version_id",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"type": {
|
||||
"name": "type",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"title": {
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"content": {
|
||||
"name": "content",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
},
|
||||
"language": {
|
||||
"name": "language",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"breadcrumb": {
|
||||
"name": "breadcrumb",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false
|
||||
},
|
||||
"token_count": {
|
||||
"name": "token_count",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"autoincrement": false,
|
||||
"default": 0
|
||||
},
|
||||
"created_at": {
|
||||
"name": "created_at",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"autoincrement": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"idx_snippets_repo_version": {
|
||||
"name": "idx_snippets_repo_version",
|
||||
"columns": [
|
||||
"repository_id",
|
||||
"version_id"
|
||||
],
|
||||
"isUnique": false
|
||||
},
|
||||
"idx_snippets_repo_type": {
|
||||
"name": "idx_snippets_repo_type",
|
||||
"columns": [
|
||||
"repository_id",
|
||||
"type"
|
||||
],
|
||||
"isUnique": false
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"snippets_document_id_documents_id_fk": {
|
||||
"name": "snippets_document_id_documents_id_fk",
|
||||
"tableFrom": "snippets",
|
||||
"tableTo": "documents",
|
||||
"columnsFrom": [
|
||||
"document_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"snippets_repository_id_repositories_id_fk": {
|
||||
"name": "snippets_repository_id_repositories_id_fk",
|
||||
"tableFrom": "snippets",
|
||||
"tableTo": "repositories",
|
||||
"columnsFrom": [
|
||||
"repository_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"snippets_version_id_repository_versions_id_fk": {
|
||||
"name": "snippets_version_id_repository_versions_id_fk",
|
||||
"tableFrom": "snippets",
|
||||
"tableTo": "repository_versions",
|
||||
"columnsFrom": [
|
||||
"version_id"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "cascade",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"checkConstraints": {}
|
||||
}
|
||||
},
|
||||
"views": {},
|
||||
"enums": {},
|
||||
"_meta": {
|
||||
"schemas": {},
|
||||
"tables": {},
|
||||
"columns": {}
|
||||
},
|
||||
"internal": {
|
||||
"indexes": {}
|
||||
}
|
||||
}
|
||||
@@ -43,6 +43,13 @@
|
||||
"when": 1774890536284,
|
||||
"tag": "0005_fix_stage_defaults",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 6,
|
||||
"version": "6",
|
||||
"when": 1775038799913,
|
||||
"tag": "0006_yielding_centennial",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { eq } from 'drizzle-orm';
|
||||
import * as schema from './schema';
|
||||
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from './sqlite-vec';
|
||||
import {
|
||||
repositories,
|
||||
repositoryVersions,
|
||||
@@ -24,6 +25,7 @@ import {
|
||||
function createTestDb() {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const db = drizzle(client, { schema });
|
||||
|
||||
@@ -266,10 +268,11 @@ describe('snippets table', () => {
|
||||
|
||||
describe('snippet_embeddings table', () => {
|
||||
let db: ReturnType<typeof createTestDb>['db'];
|
||||
let client: Database.Database;
|
||||
let snippetId: string;
|
||||
|
||||
beforeEach(() => {
|
||||
({ db } = createTestDb());
|
||||
({ db, client } = createTestDb());
|
||||
db.insert(repositories).values(makeRepo()).run();
|
||||
const docId = crypto.randomUUID();
|
||||
db.insert(documents)
|
||||
@@ -344,6 +347,30 @@ describe('snippet_embeddings table', () => {
|
||||
const result = db.select().from(snippetEmbeddings).all();
|
||||
expect(result).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('keeps the relational schema free of vec_embedding and retains the profile index', () => {
|
||||
const columns = client
|
||||
.prepare("PRAGMA table_info('snippet_embeddings')")
|
||||
.all() as Array<{ name: string }>;
|
||||
expect(columns.map((column) => column.name)).not.toContain('vec_embedding');
|
||||
|
||||
const indexes = client
|
||||
.prepare("PRAGMA index_list('snippet_embeddings')")
|
||||
.all() as Array<{ name: string }>;
|
||||
expect(indexes.map((index) => index.name)).toContain('idx_embeddings_profile');
|
||||
});
|
||||
|
||||
it('loads sqlite-vec idempotently and derives deterministic per-profile table names', () => {
|
||||
expect(() => loadSqliteVec(client)).not.toThrow();
|
||||
const tableName = sqliteVecTableName('local-default');
|
||||
const rowidTableName = sqliteVecRowidTableName('local-default');
|
||||
|
||||
expect(tableName).toMatch(/^snippet_embeddings_vec_local_default_[0-9a-f]{8}$/);
|
||||
expect(rowidTableName).toMatch(/^snippet_embeddings_vec_rowids_local_default_[0-9a-f]{8}$/);
|
||||
expect(sqliteVecTableName('local-default')).toBe(tableName);
|
||||
expect(sqliteVecRowidTableName('local-default')).toBe(rowidTableName);
|
||||
expect(sqliteVecTableName('local-default')).not.toBe(sqliteVecTableName('openai/custom'));
|
||||
});
|
||||
});
|
||||
|
||||
describe('indexing_jobs table', () => {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { sql } from 'drizzle-orm';
|
||||
import {
|
||||
blob,
|
||||
index,
|
||||
integer,
|
||||
primaryKey,
|
||||
real,
|
||||
@@ -34,7 +35,7 @@ export const repositories = sqliteTable('repositories', {
|
||||
lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }),
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull(),
|
||||
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull()
|
||||
});
|
||||
}, (t) => [index('idx_repositories_state').on(t.state)]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// repository_versions
|
||||
@@ -72,7 +73,7 @@ export const documents = sqliteTable('documents', {
|
||||
tokenCount: integer('token_count').default(0),
|
||||
checksum: text('checksum').notNull(), // SHA-256 of file content
|
||||
indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull()
|
||||
});
|
||||
}, (t) => [index('idx_documents_repo_version').on(t.repositoryId, t.versionId)]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// snippets
|
||||
@@ -93,7 +94,10 @@ export const snippets = sqliteTable('snippets', {
|
||||
breadcrumb: text('breadcrumb'), // e.g. "Installation > Getting Started"
|
||||
tokenCount: integer('token_count').default(0),
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
|
||||
});
|
||||
}, (t) => [
|
||||
index('idx_snippets_repo_version').on(t.repositoryId, t.versionId),
|
||||
index('idx_snippets_repo_type').on(t.repositoryId, t.type),
|
||||
]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// embedding_profiles
|
||||
@@ -128,7 +132,10 @@ export const snippetEmbeddings = sqliteTable(
|
||||
embedding: blob('embedding').notNull(), // Float32Array as binary blob
|
||||
createdAt: integer('created_at').notNull()
|
||||
},
|
||||
(table) => [primaryKey({ columns: [table.snippetId, table.profileId] })]
|
||||
(table) => [
|
||||
primaryKey({ columns: [table.snippetId, table.profileId] }),
|
||||
index('idx_embeddings_profile').on(table.profileId, table.snippetId),
|
||||
]
|
||||
);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -154,7 +161,7 @@ export const indexingJobs = sqliteTable('indexing_jobs', {
|
||||
startedAt: integer('started_at', { mode: 'timestamp' }),
|
||||
completedAt: integer('completed_at', { mode: 'timestamp' }),
|
||||
createdAt: integer('created_at', { mode: 'timestamp' }).notNull()
|
||||
});
|
||||
}, (t) => [index('idx_jobs_repo_status').on(t.repositoryId, t.status)]);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// repository_configs
|
||||
|
||||
49
src/lib/server/db/sqlite-vec.ts
Normal file
49
src/lib/server/db/sqlite-vec.ts
Normal file
@@ -0,0 +1,49 @@
|
||||
import type Database from 'better-sqlite3';
|
||||
import * as sqliteVec from 'sqlite-vec';
|
||||
|
||||
const loadedConnections = new WeakSet<Database.Database>();
|
||||
|
||||
function stableHash(value: string): string {
|
||||
let hash = 2166136261;
|
||||
|
||||
for (let index = 0; index < value.length; index += 1) {
|
||||
hash ^= value.charCodeAt(index);
|
||||
hash = Math.imul(hash, 16777619);
|
||||
}
|
||||
|
||||
return (hash >>> 0).toString(16).padStart(8, '0');
|
||||
}
|
||||
|
||||
function sanitizeIdentifierPart(value: string): string {
|
||||
const sanitized = value
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '_')
|
||||
.replace(/^_+|_+$/g, '');
|
||||
|
||||
return sanitized.length > 0 ? sanitized.slice(0, 32) : 'profile';
|
||||
}
|
||||
|
||||
export function sqliteVecTableSuffix(profileId: string): string {
|
||||
return `${sanitizeIdentifierPart(profileId)}_${stableHash(profileId)}`;
|
||||
}
|
||||
|
||||
export function sqliteVecTableName(profileId: string): string {
|
||||
return `snippet_embeddings_vec_${sqliteVecTableSuffix(profileId)}`;
|
||||
}
|
||||
|
||||
export function sqliteVecRowidTableName(profileId: string): string {
|
||||
return `snippet_embeddings_vec_rowids_${sqliteVecTableSuffix(profileId)}`;
|
||||
}
|
||||
|
||||
export function quoteSqliteIdentifier(identifier: string): string {
|
||||
return `"${identifier.replace(/"/g, '""')}"`;
|
||||
}
|
||||
|
||||
export function loadSqliteVec(db: Database.Database): void {
|
||||
if (loadedConnections.has(db)) {
|
||||
return;
|
||||
}
|
||||
|
||||
sqliteVec.load(db);
|
||||
loadedConnections.add(db);
|
||||
}
|
||||
2
src/lib/server/db/vectors.sql
Normal file
2
src/lib/server/db/vectors.sql
Normal file
@@ -0,0 +1,2 @@
|
||||
-- Relational vec_embedding bootstrap removed in iteration 2.
|
||||
-- Downstream sqlite-vec vec0 tables are created on demand in application code.
|
||||
@@ -12,6 +12,12 @@ import { migrate } from 'drizzle-orm/better-sqlite3/migrator';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import * as schema from '../db/schema.js';
|
||||
import {
|
||||
loadSqliteVec,
|
||||
sqliteVecRowidTableName,
|
||||
sqliteVecTableName
|
||||
} from '../db/sqlite-vec.js';
|
||||
import { SqliteVecStore } from '../search/sqlite-vec.store.js';
|
||||
|
||||
import { NoopEmbeddingProvider, EmbeddingError, type EmbeddingVector } from './provider.js';
|
||||
import { OpenAIEmbeddingProvider } from './openai.provider.js';
|
||||
@@ -31,6 +37,7 @@ import { createProviderFromProfile } from './registry.js';
|
||||
function createTestDb() {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const db = drizzle(client, { schema });
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
@@ -387,10 +394,19 @@ describe('EmbeddingService', () => {
|
||||
embedding: Buffer;
|
||||
profile_id: string;
|
||||
};
|
||||
expect((row as Record<string, unknown>).vec_embedding).toBeUndefined();
|
||||
expect(row.model).toBe('test-model');
|
||||
expect(row.dimensions).toBe(4);
|
||||
expect(row.profile_id).toBe('local-default');
|
||||
expect(row.embedding).toBeInstanceOf(Buffer);
|
||||
|
||||
const queryEmbedding = service.getEmbedding(snippetId, 'local-default');
|
||||
const matches = new SqliteVecStore(client).queryNearestNeighbors(queryEmbedding!, {
|
||||
repositoryId: '/test/embed-repo',
|
||||
profileId: 'local-default',
|
||||
limit: 5
|
||||
});
|
||||
expect(matches[0]?.snippetId).toBe(snippetId);
|
||||
});
|
||||
|
||||
it('stores embeddings as retrievable Float32Array blobs', async () => {
|
||||
@@ -436,6 +452,22 @@ describe('EmbeddingService', () => {
|
||||
.prepare('SELECT profile_id FROM snippet_embeddings WHERE snippet_id = ?')
|
||||
.get(snippetId) as { profile_id: string };
|
||||
expect(row.profile_id).toBe('openai-custom');
|
||||
|
||||
const queryEmbedding = service.getEmbedding(snippetId, 'openai-custom');
|
||||
const store = new SqliteVecStore(client);
|
||||
const customMatches = store.queryNearestNeighbors(queryEmbedding!, {
|
||||
repositoryId: '/test/embed-repo',
|
||||
profileId: 'openai-custom',
|
||||
limit: 5
|
||||
});
|
||||
const defaultMatches = store.queryNearestNeighbors(new Float32Array([1, 0, 0, 0]), {
|
||||
repositoryId: '/test/embed-repo',
|
||||
profileId: 'local-default',
|
||||
limit: 5
|
||||
});
|
||||
|
||||
expect(customMatches[0]?.snippetId).toBe(snippetId);
|
||||
expect(defaultMatches).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('is idempotent — re-embedding replaces the existing row', async () => {
|
||||
@@ -450,6 +482,17 @@ describe('EmbeddingService', () => {
|
||||
.prepare('SELECT COUNT(*) as cnt FROM snippet_embeddings WHERE snippet_id = ?')
|
||||
.get(snippetId) as { cnt: number };
|
||||
expect(rows.cnt).toBe(1);
|
||||
|
||||
const vecTable = sqliteVecTableName('local-default');
|
||||
const rowidTable = sqliteVecRowidTableName('local-default');
|
||||
const vecRows = client.prepare(`SELECT COUNT(*) as cnt FROM "${vecTable}"`).get() as {
|
||||
cnt: number;
|
||||
};
|
||||
const rowidRows = client.prepare(`SELECT COUNT(*) as cnt FROM "${rowidTable}"`).get() as {
|
||||
cnt: number;
|
||||
};
|
||||
expect(vecRows.cnt).toBe(1);
|
||||
expect(rowidRows.cnt).toBe(1);
|
||||
});
|
||||
|
||||
it('calls onProgress after each batch', async () => {
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
|
||||
import type Database from 'better-sqlite3';
|
||||
import type { EmbeddingProvider } from './provider.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
|
||||
interface SnippetRow {
|
||||
id: string;
|
||||
@@ -17,11 +18,15 @@ const BATCH_SIZE = 50;
|
||||
const TEXT_MAX_CHARS = 2048;
|
||||
|
||||
export class EmbeddingService {
|
||||
private readonly sqliteVecStore: SqliteVecStore;
|
||||
|
||||
constructor(
|
||||
private readonly db: Database.Database,
|
||||
private readonly provider: EmbeddingProvider,
|
||||
private readonly profileId: string = 'local-default'
|
||||
) {}
|
||||
) {
|
||||
this.sqliteVecStore = new SqliteVecStore(db);
|
||||
}
|
||||
|
||||
findSnippetIdsMissingEmbeddings(repositoryId: string, versionId: string | null): string[] {
|
||||
if (versionId) {
|
||||
@@ -104,13 +109,19 @@ export class EmbeddingService {
|
||||
for (let j = 0; j < batchSnippets.length; j++) {
|
||||
const snippet = batchSnippets[j];
|
||||
const embedding = embeddings[j];
|
||||
|
||||
insert.run(
|
||||
snippet.id,
|
||||
this.profileId,
|
||||
embedding.model,
|
||||
embedding.dimensions,
|
||||
Buffer.from(embedding.values.buffer)
|
||||
Buffer.from(
|
||||
embedding.values.buffer,
|
||||
embedding.values.byteOffset,
|
||||
embedding.values.byteLength
|
||||
)
|
||||
);
|
||||
this.sqliteVecStore.upsertEmbedding(this.profileId, snippet.id, embedding.values);
|
||||
}
|
||||
});
|
||||
insertMany();
|
||||
|
||||
@@ -21,6 +21,11 @@ const db = new Database(dbPath);
|
||||
db.pragma('journal_mode = WAL');
|
||||
db.pragma('foreign_keys = ON');
|
||||
db.pragma('busy_timeout = 5000');
|
||||
db.pragma('synchronous = NORMAL');
|
||||
db.pragma('cache_size = -65536');
|
||||
db.pragma('temp_store = MEMORY');
|
||||
db.pragma('mmap_size = 268435456');
|
||||
db.pragma('wal_autocheckpoint = 1000');
|
||||
|
||||
// Load the embedding profile from DB
|
||||
const rawProfile = db.prepare('SELECT * FROM embedding_profiles WHERE id = ?').get(embeddingProfileId);
|
||||
|
||||
@@ -13,6 +13,9 @@ import { JobQueue } from './job-queue.js';
|
||||
import { IndexingPipeline } from './indexing.pipeline.js';
|
||||
import { recoverStaleJobs } from './startup.js';
|
||||
import { EmbeddingService } from '$lib/server/embeddings/embedding.service.js';
|
||||
import { loadSqliteVec } from '$lib/server/db/sqlite-vec.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import { sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js';
|
||||
import * as diffStrategy from './differential-strategy.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -22,6 +25,7 @@ import * as diffStrategy from './differential-strategy.js';
|
||||
function createTestDb(): Database.Database {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
for (const migrationFile of [
|
||||
@@ -29,7 +33,9 @@ function createTestDb(): Database.Database {
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql',
|
||||
'0003_multiversion_config.sql',
|
||||
'0004_complete_sentry.sql'
|
||||
'0004_complete_sentry.sql',
|
||||
'0005_fix_stage_defaults.sql',
|
||||
'0006_yielding_centennial.sql'
|
||||
]) {
|
||||
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
|
||||
|
||||
@@ -539,6 +545,52 @@ describe('IndexingPipeline', () => {
|
||||
expect(finalChecksum).toBe('sha-v2');
|
||||
});
|
||||
|
||||
it('removes derived vec rows when changed documents are replaced', async () => {
|
||||
const docId = crypto.randomUUID();
|
||||
const snippetId = crypto.randomUUID();
|
||||
const embedding = Float32Array.from([1, 0, 0]);
|
||||
const vecStore = new SqliteVecStore(db);
|
||||
|
||||
db.prepare(
|
||||
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, '/test/repo', NULL, 'README.md', 'stale-doc', ?)`
|
||||
).run(docId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
|
||||
VALUES (?, ?, '/test/repo', NULL, 'info', 'stale snippet', ?)`
|
||||
).run(snippetId, docId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
|
||||
).run(snippetId, Buffer.from(embedding.buffer), now);
|
||||
vecStore.upsertEmbedding('local-default', snippetId, embedding);
|
||||
|
||||
const pipeline = makePipeline({
|
||||
files: [
|
||||
{
|
||||
path: 'README.md',
|
||||
content: '# Updated\n\nFresh content.',
|
||||
sha: 'sha-fresh',
|
||||
language: 'markdown'
|
||||
}
|
||||
],
|
||||
totalFiles: 1
|
||||
});
|
||||
const job = makeJob();
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
const vecTable = sqliteVecTableName('local-default');
|
||||
const rowidTable = sqliteVecRowidTableName('local-default');
|
||||
const vecCount = db.prepare(`SELECT COUNT(*) as n FROM "${vecTable}"`).get() as { n: number };
|
||||
const rowidCount = db.prepare(`SELECT COUNT(*) as n FROM "${rowidTable}"`).get() as {
|
||||
n: number;
|
||||
};
|
||||
|
||||
expect(vecCount.n).toBe(0);
|
||||
expect(rowidCount.n).toBe(0);
|
||||
});
|
||||
|
||||
it('updates job progress as files are processed', async () => {
|
||||
const files = Array.from({ length: 5 }, (_, i) => ({
|
||||
path: `file${i}.md`,
|
||||
@@ -700,6 +752,60 @@ describe('IndexingPipeline', () => {
|
||||
expect(version.indexed_at).not.toBeNull();
|
||||
});
|
||||
|
||||
it('clones ancestor embeddings into the derived vec store for differential indexing', async () => {
|
||||
const ancestorVersionId = insertVersion(db, { tag: 'v1.0.0', state: 'indexed' });
|
||||
const targetVersionId = insertVersion(db, { tag: 'v1.1.0', state: 'pending' });
|
||||
const vecStore = new SqliteVecStore(db);
|
||||
const docId = crypto.randomUUID();
|
||||
const snippetId = crypto.randomUUID();
|
||||
const embedding = Float32Array.from([0.2, 0.4, 0.6]);
|
||||
|
||||
db.prepare(
|
||||
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, '/test/repo', ?, 'README.md', 'ancestor-doc', ?)`
|
||||
).run(docId, ancestorVersionId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
|
||||
VALUES (?, ?, '/test/repo', ?, 'info', 'ancestor snippet', ?)`
|
||||
).run(snippetId, docId, ancestorVersionId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
|
||||
).run(snippetId, Buffer.from(embedding.buffer), now);
|
||||
vecStore.upsertEmbedding('local-default', snippetId, embedding);
|
||||
|
||||
vi.spyOn(diffStrategy, 'buildDifferentialPlan').mockResolvedValue({
|
||||
ancestorTag: 'v1.0.0',
|
||||
ancestorVersionId,
|
||||
changedPaths: new Set<string>(),
|
||||
unchangedPaths: new Set<string>(['README.md'])
|
||||
});
|
||||
|
||||
const pipeline = makePipeline({ files: [], totalFiles: 0 });
|
||||
const job = makeJob('/test/repo', targetVersionId);
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
const targetRows = db
|
||||
.prepare(
|
||||
`SELECT se.snippet_id, se.embedding
|
||||
FROM snippet_embeddings se
|
||||
INNER JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.version_id = ?`
|
||||
)
|
||||
.all(targetVersionId) as Array<{ snippet_id: string; embedding: Buffer }>;
|
||||
|
||||
expect(targetRows).toHaveLength(1);
|
||||
const matches = vecStore.queryNearestNeighbors(embedding, {
|
||||
repositoryId: '/test/repo',
|
||||
versionId: targetVersionId,
|
||||
profileId: 'local-default',
|
||||
limit: 5
|
||||
});
|
||||
|
||||
expect(matches[0]?.snippetId).toBe(targetRows[0].snippet_id);
|
||||
});
|
||||
|
||||
it('updates repository_versions state to error when pipeline throws and job has versionId', async () => {
|
||||
const versionId = insertVersion(db, { tag: 'v1.0.0', state: 'pending' });
|
||||
const errorCrawl = vi.fn().mockRejectedValue(new Error('crawl failed'));
|
||||
|
||||
@@ -22,6 +22,7 @@ import type { EmbeddingService } from '$lib/server/embeddings/embedding.service.
|
||||
import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
|
||||
import { IndexingJob } from '$lib/server/models/indexing-job.js';
|
||||
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import { resolveConfig, type ParsedConfig } from '$lib/server/config/config-parser.js';
|
||||
import { parseFile } from '$lib/server/parser/index.js';
|
||||
import { computeTrustScore } from '$lib/server/search/trust-score.js';
|
||||
@@ -63,12 +64,16 @@ function sha256(content: string): string {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class IndexingPipeline {
|
||||
private readonly sqliteVecStore: SqliteVecStore;
|
||||
|
||||
constructor(
|
||||
private readonly db: Database.Database,
|
||||
private readonly githubCrawl: typeof GithubCrawlFn,
|
||||
private readonly localCrawler: LocalCrawler,
|
||||
private readonly embeddingService: EmbeddingService | null
|
||||
) {}
|
||||
) {
|
||||
this.sqliteVecStore = new SqliteVecStore(db);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Public — run a job end to end
|
||||
@@ -593,6 +598,12 @@ export class IndexingPipeline {
|
||||
emb.embedding,
|
||||
emb.created_at
|
||||
);
|
||||
this.sqliteVecStore.upsertEmbeddingBuffer(
|
||||
emb.profile_id,
|
||||
newSnippetId,
|
||||
emb.embedding,
|
||||
emb.dimensions
|
||||
);
|
||||
}
|
||||
}
|
||||
})();
|
||||
@@ -623,6 +634,8 @@ export class IndexingPipeline {
|
||||
);
|
||||
|
||||
this.db.transaction(() => {
|
||||
this.sqliteVecStore.deleteEmbeddingsForDocumentIds(changedDocIds);
|
||||
|
||||
// Delete stale documents (cascade deletes their snippets via FK).
|
||||
if (changedDocIds.length > 0) {
|
||||
const placeholders = changedDocIds.map(() => '?').join(',');
|
||||
|
||||
@@ -17,6 +17,54 @@ import type { WorkerPool } from './worker-pool.js';
|
||||
|
||||
const JOB_SELECT = `SELECT * FROM indexing_jobs`;
|
||||
|
||||
type JobStatusFilter = IndexingJob['status'] | Array<IndexingJob['status']>;
|
||||
|
||||
function escapeLikePattern(value: string): string {
|
||||
return value.replaceAll('\\', '\\\\').replaceAll('%', '\\%').replaceAll('_', '\\_');
|
||||
}
|
||||
|
||||
function isSpecificRepositoryId(repositoryId: string): boolean {
|
||||
return repositoryId.split('/').filter(Boolean).length >= 2;
|
||||
}
|
||||
|
||||
function normalizeStatuses(status?: JobStatusFilter): Array<IndexingJob['status']> {
|
||||
if (!status) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const statuses = Array.isArray(status) ? status : [status];
|
||||
return [...new Set(statuses)];
|
||||
}
|
||||
|
||||
function buildJobFilterQuery(options?: {
|
||||
repositoryId?: string;
|
||||
status?: JobStatusFilter;
|
||||
}): { where: string; params: unknown[] } {
|
||||
const conditions: string[] = [];
|
||||
const params: unknown[] = [];
|
||||
|
||||
if (options?.repositoryId) {
|
||||
if (isSpecificRepositoryId(options.repositoryId)) {
|
||||
conditions.push('repository_id = ?');
|
||||
params.push(options.repositoryId);
|
||||
} else {
|
||||
conditions.push(`(repository_id = ? OR repository_id LIKE ? ESCAPE '\\')`);
|
||||
params.push(options.repositoryId, `${escapeLikePattern(options.repositoryId)}/%`);
|
||||
}
|
||||
}
|
||||
|
||||
const statuses = normalizeStatuses(options?.status);
|
||||
if (statuses.length > 0) {
|
||||
conditions.push(`status IN (${statuses.map(() => '?').join(', ')})`);
|
||||
params.push(...statuses);
|
||||
}
|
||||
|
||||
return {
|
||||
where: conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '',
|
||||
params
|
||||
};
|
||||
}
|
||||
|
||||
export class JobQueue {
|
||||
private workerPool: WorkerPool | null = null;
|
||||
|
||||
@@ -144,23 +192,11 @@ export class JobQueue {
|
||||
*/
|
||||
listJobs(options?: {
|
||||
repositoryId?: string;
|
||||
status?: IndexingJob['status'];
|
||||
status?: JobStatusFilter;
|
||||
limit?: number;
|
||||
}): IndexingJob[] {
|
||||
const limit = Math.min(options?.limit ?? 20, 200);
|
||||
const conditions: string[] = [];
|
||||
const params: unknown[] = [];
|
||||
|
||||
if (options?.repositoryId) {
|
||||
conditions.push('repository_id = ?');
|
||||
params.push(options.repositoryId);
|
||||
}
|
||||
if (options?.status) {
|
||||
conditions.push('status = ?');
|
||||
params.push(options.status);
|
||||
}
|
||||
|
||||
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
||||
const { where, params } = buildJobFilterQuery(options);
|
||||
const sql = `${JOB_SELECT} ${where} ORDER BY created_at DESC LIMIT ?`;
|
||||
params.push(limit);
|
||||
|
||||
@@ -194,19 +230,7 @@ export class JobQueue {
|
||||
* Count all jobs matching optional filters.
|
||||
*/
|
||||
countJobs(options?: { repositoryId?: string; status?: IndexingJob['status'] }): number {
|
||||
const conditions: string[] = [];
|
||||
const params: unknown[] = [];
|
||||
|
||||
if (options?.repositoryId) {
|
||||
conditions.push('repository_id = ?');
|
||||
params.push(options.repositoryId);
|
||||
}
|
||||
if (options?.status) {
|
||||
conditions.push('status = ?');
|
||||
params.push(options.status);
|
||||
}
|
||||
|
||||
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND')}` : '';
|
||||
const { where, params } = buildJobFilterQuery(options);
|
||||
const sql = `SELECT COUNT(*) as n FROM indexing_jobs ${where}`;
|
||||
const row = this.db.prepare<unknown[], { n: number }>(sql).get(...params);
|
||||
return row?.n ?? 0;
|
||||
|
||||
@@ -171,4 +171,25 @@ describe('ProgressBroadcaster', () => {
|
||||
reader1.cancel();
|
||||
reader2.cancel();
|
||||
});
|
||||
|
||||
it('broadcastWorkerStatus sends worker-status events to global subscribers', async () => {
|
||||
const broadcaster = new ProgressBroadcaster();
|
||||
const stream = broadcaster.subscribeAll();
|
||||
const reader = stream.getReader();
|
||||
|
||||
broadcaster.broadcastWorkerStatus({
|
||||
concurrency: 2,
|
||||
active: 1,
|
||||
idle: 1,
|
||||
workers: [{ index: 0, state: 'running', jobId: 'job-1', repositoryId: '/repo/1', versionId: null }]
|
||||
});
|
||||
|
||||
const { value } = await reader.read();
|
||||
const text = value as string;
|
||||
|
||||
expect(text).toContain('event: worker-status');
|
||||
expect(text).toContain('"active":1');
|
||||
|
||||
reader.cancel();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -10,6 +10,7 @@ export class ProgressBroadcaster {
|
||||
private allSubscribers = new Set<ReadableStreamDefaultController<string>>();
|
||||
private lastEventCache = new Map<string, SSEEvent>();
|
||||
private eventCounters = new Map<string, number>();
|
||||
private globalEventCounter = 0;
|
||||
|
||||
subscribe(jobId: string): ReadableStream<string> {
|
||||
return new ReadableStream({
|
||||
@@ -135,6 +136,24 @@ export class ProgressBroadcaster {
|
||||
}
|
||||
}
|
||||
|
||||
broadcastWorkerStatus(data: object): void {
|
||||
this.globalEventCounter += 1;
|
||||
const event: SSEEvent = {
|
||||
id: this.globalEventCounter,
|
||||
event: 'worker-status',
|
||||
data: JSON.stringify(data)
|
||||
};
|
||||
const sse = this.formatSSE(event);
|
||||
|
||||
for (const controller of this.allSubscribers) {
|
||||
try {
|
||||
controller.enqueue(sse);
|
||||
} catch {
|
||||
// Controller might be closed or errored
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getLastEvent(jobId: string): SSEEvent | null {
|
||||
return this.lastEventCache.get(jobId) ?? null;
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ import { LocalCrawler } from '$lib/server/crawler/local.crawler.js';
|
||||
import { IndexingPipeline } from './indexing.pipeline.js';
|
||||
import { JobQueue } from './job-queue.js';
|
||||
import { WorkerPool } from './worker-pool.js';
|
||||
import type { ParseWorkerResponse } from './worker-types.js';
|
||||
import { initBroadcaster } from './progress-broadcaster.js';
|
||||
import type { ProgressBroadcaster } from './progress-broadcaster.js';
|
||||
import path from 'node:path';
|
||||
@@ -90,17 +89,28 @@ export function initializePipeline(
|
||||
if (options?.dbPath) {
|
||||
_broadcaster = initBroadcaster();
|
||||
|
||||
const getRepositoryIdForJob = (jobId: string): string => {
|
||||
const row = db
|
||||
.prepare<[string], { repository_id: string }>(
|
||||
`SELECT repository_id FROM indexing_jobs WHERE id = ?`
|
||||
)
|
||||
.get(jobId);
|
||||
return row?.repository_id ?? '';
|
||||
};
|
||||
|
||||
// Resolve worker script paths relative to this file (build/workers/ directory)
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const workerScript = path.join(__dirname, '../../../build/workers/worker-entry.mjs');
|
||||
const embedWorkerScript = path.join(__dirname, '../../../build/workers/embed-worker-entry.mjs');
|
||||
const writeWorkerScript = path.join(__dirname, '../../../build/workers/write-worker-entry.mjs');
|
||||
|
||||
try {
|
||||
_pool = new WorkerPool({
|
||||
concurrency: options.concurrency ?? 2,
|
||||
workerScript,
|
||||
embedWorkerScript,
|
||||
writeWorkerScript,
|
||||
dbPath: options.dbPath,
|
||||
onProgress: (jobId, msg) => {
|
||||
// Update DB with progress
|
||||
@@ -112,7 +122,10 @@ export function initializePipeline(
|
||||
|
||||
// Broadcast progress event
|
||||
if (_broadcaster) {
|
||||
_broadcaster.broadcast(jobId, '', 'progress', msg);
|
||||
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-progress', {
|
||||
...msg,
|
||||
status: 'running'
|
||||
});
|
||||
}
|
||||
},
|
||||
onJobDone: (jobId: string) => {
|
||||
@@ -123,7 +136,10 @@ export function initializePipeline(
|
||||
|
||||
// Broadcast done event
|
||||
if (_broadcaster) {
|
||||
_broadcaster.broadcast(jobId, '', 'job-done', { jobId });
|
||||
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-done', {
|
||||
jobId,
|
||||
status: 'done'
|
||||
});
|
||||
}
|
||||
},
|
||||
onJobFailed: (jobId: string, error: string) => {
|
||||
@@ -134,7 +150,11 @@ export function initializePipeline(
|
||||
|
||||
// Broadcast failed event
|
||||
if (_broadcaster) {
|
||||
_broadcaster.broadcast(jobId, '', 'job-failed', { jobId, error });
|
||||
_broadcaster.broadcast(jobId, getRepositoryIdForJob(jobId), 'job-failed', {
|
||||
jobId,
|
||||
status: 'failed',
|
||||
error
|
||||
});
|
||||
}
|
||||
},
|
||||
onEmbedDone: (jobId: string) => {
|
||||
@@ -142,6 +162,9 @@ export function initializePipeline(
|
||||
},
|
||||
onEmbedFailed: (jobId: string, error: string) => {
|
||||
console.error('[WorkerPool] Embedding failed for job:', jobId, error);
|
||||
},
|
||||
onWorkerStatus: (status) => {
|
||||
_broadcaster?.broadcastWorkerStatus(status);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -13,6 +13,11 @@ const db = new Database(dbPath);
|
||||
db.pragma('journal_mode = WAL');
|
||||
db.pragma('foreign_keys = ON');
|
||||
db.pragma('busy_timeout = 5000');
|
||||
db.pragma('synchronous = NORMAL');
|
||||
db.pragma('cache_size = -65536');
|
||||
db.pragma('temp_store = MEMORY');
|
||||
db.pragma('mmap_size = 268435456');
|
||||
db.pragma('wal_autocheckpoint = 1000');
|
||||
|
||||
const pipeline = new IndexingPipeline(db, githubCrawl, new LocalCrawler(), null);
|
||||
let currentJobId: string | null = null;
|
||||
|
||||
@@ -1,11 +1,19 @@
|
||||
import { Worker } from 'node:worker_threads';
|
||||
import { existsSync } from 'node:fs';
|
||||
import type { ParseWorkerRequest, ParseWorkerResponse, EmbedWorkerRequest, EmbedWorkerResponse, WorkerInitData } from './worker-types.js';
|
||||
import type {
|
||||
ParseWorkerRequest,
|
||||
ParseWorkerResponse,
|
||||
EmbedWorkerRequest,
|
||||
EmbedWorkerResponse,
|
||||
WorkerInitData,
|
||||
WriteWorkerResponse
|
||||
} from './worker-types.js';
|
||||
|
||||
export interface WorkerPoolOptions {
|
||||
concurrency: number;
|
||||
workerScript: string;
|
||||
embedWorkerScript: string;
|
||||
writeWorkerScript?: string;
|
||||
dbPath: string;
|
||||
embeddingProfileId?: string;
|
||||
onProgress: (jobId: string, msg: Extract<ParseWorkerResponse, { type: 'progress' }>) => void;
|
||||
@@ -13,6 +21,22 @@ export interface WorkerPoolOptions {
|
||||
onJobFailed: (jobId: string, error: string) => void;
|
||||
onEmbedDone: (jobId: string) => void;
|
||||
onEmbedFailed: (jobId: string, error: string) => void;
|
||||
onWorkerStatus?: (status: WorkerPoolStatus) => void;
|
||||
}
|
||||
|
||||
export interface WorkerStatusEntry {
|
||||
index: number;
|
||||
state: 'idle' | 'running';
|
||||
jobId: string | null;
|
||||
repositoryId: string | null;
|
||||
versionId: string | null;
|
||||
}
|
||||
|
||||
export interface WorkerPoolStatus {
|
||||
concurrency: number;
|
||||
active: number;
|
||||
idle: number;
|
||||
workers: WorkerStatusEntry[];
|
||||
}
|
||||
|
||||
interface QueuedJob {
|
||||
@@ -24,6 +48,7 @@ interface QueuedJob {
|
||||
interface RunningJob {
|
||||
jobId: string;
|
||||
repositoryId: string;
|
||||
versionId?: string | null;
|
||||
}
|
||||
|
||||
interface EmbedQueuedJob {
|
||||
@@ -36,10 +61,12 @@ export class WorkerPool {
|
||||
private workers: Worker[] = [];
|
||||
private idleWorkers: Worker[] = [];
|
||||
private embedWorker: Worker | null = null;
|
||||
private writeWorker: Worker | null = null;
|
||||
private embedReady = false;
|
||||
private writeReady = false;
|
||||
private jobQueue: QueuedJob[] = [];
|
||||
private runningJobs = new Map<Worker, RunningJob>();
|
||||
private runningRepoIds = new Set<string>();
|
||||
private runningJobKeys = new Set<string>();
|
||||
private embedQueue: EmbedQueuedJob[] = [];
|
||||
private options: WorkerPoolOptions;
|
||||
private fallbackMode = false;
|
||||
@@ -66,6 +93,12 @@ export class WorkerPool {
|
||||
if (options.embeddingProfileId && existsSync(options.embedWorkerScript)) {
|
||||
this.embedWorker = this.spawnEmbedWorker();
|
||||
}
|
||||
|
||||
if (options.writeWorkerScript && existsSync(options.writeWorkerScript)) {
|
||||
this.writeWorker = this.spawnWriteWorker(options.writeWorkerScript);
|
||||
}
|
||||
|
||||
this.emitStatusChanged();
|
||||
}
|
||||
|
||||
private spawnParseWorker(): Worker {
|
||||
@@ -94,6 +127,22 @@ export class WorkerPool {
|
||||
return worker;
|
||||
}
|
||||
|
||||
private spawnWriteWorker(writeWorkerScript: string): Worker {
|
||||
const worker = new Worker(writeWorkerScript, {
|
||||
workerData: {
|
||||
dbPath: this.options.dbPath
|
||||
} satisfies WorkerInitData
|
||||
});
|
||||
|
||||
worker.on('message', (msg: WriteWorkerResponse) => this.onWriteWorkerMessage(msg));
|
||||
worker.on('exit', () => {
|
||||
this.writeReady = false;
|
||||
this.writeWorker = null;
|
||||
});
|
||||
|
||||
return worker;
|
||||
}
|
||||
|
||||
public enqueue(jobId: string, repositoryId: string, versionId?: string | null): void {
|
||||
if (this.shuttingDown) {
|
||||
console.warn('WorkerPool is shutting down, ignoring enqueue request');
|
||||
@@ -109,10 +158,18 @@ export class WorkerPool {
|
||||
this.dispatch();
|
||||
}
|
||||
|
||||
private static jobKey(repositoryId: string, versionId?: string | null): string {
|
||||
return `${repositoryId}:${versionId ?? ''}`;
|
||||
}
|
||||
|
||||
private dispatch(): void {
|
||||
let statusChanged = false;
|
||||
|
||||
while (this.idleWorkers.length > 0 && this.jobQueue.length > 0) {
|
||||
// Find first job whose repositoryId is not currently running
|
||||
const jobIdx = this.jobQueue.findIndex((j) => !this.runningRepoIds.has(j.repositoryId));
|
||||
// Find first job whose (repositoryId, versionId) compound key is not currently running
|
||||
const jobIdx = this.jobQueue.findIndex(
|
||||
(j) => !this.runningJobKeys.has(WorkerPool.jobKey(j.repositoryId, j.versionId))
|
||||
);
|
||||
|
||||
if (jobIdx === -1) {
|
||||
// No eligible job found (all repos have running jobs)
|
||||
@@ -122,12 +179,17 @@ export class WorkerPool {
|
||||
const job = this.jobQueue.splice(jobIdx, 1)[0];
|
||||
const worker = this.idleWorkers.pop()!;
|
||||
|
||||
this.runningJobs.set(worker, { jobId: job.jobId, repositoryId: job.repositoryId });
|
||||
this.runningRepoIds.add(job.repositoryId);
|
||||
this.runningJobs.set(worker, { jobId: job.jobId, repositoryId: job.repositoryId, versionId: job.versionId });
|
||||
this.runningJobKeys.add(WorkerPool.jobKey(job.repositoryId, job.versionId));
|
||||
statusChanged = true;
|
||||
|
||||
const msg: ParseWorkerRequest = { type: 'run', jobId: job.jobId };
|
||||
worker.postMessage(msg);
|
||||
}
|
||||
|
||||
if (statusChanged) {
|
||||
this.emitStatusChanged();
|
||||
}
|
||||
}
|
||||
|
||||
private onWorkerMessage(worker: Worker, msg: ParseWorkerResponse): void {
|
||||
@@ -137,15 +199,20 @@ export class WorkerPool {
|
||||
const runningJob = this.runningJobs.get(worker);
|
||||
if (runningJob) {
|
||||
this.runningJobs.delete(worker);
|
||||
this.runningRepoIds.delete(runningJob.repositoryId);
|
||||
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
|
||||
}
|
||||
this.idleWorkers.push(worker);
|
||||
this.options.onJobDone(msg.jobId);
|
||||
this.emitStatusChanged();
|
||||
|
||||
// If embedding configured, enqueue embed request
|
||||
if (this.embedWorker && this.options.embeddingProfileId) {
|
||||
const runningJobData = runningJob || { jobId: msg.jobId, repositoryId: '' };
|
||||
this.enqueueEmbed(msg.jobId, runningJobData.repositoryId, null);
|
||||
const runningJobData = runningJob || { jobId: msg.jobId, repositoryId: '', versionId: null };
|
||||
this.enqueueEmbed(
|
||||
msg.jobId,
|
||||
runningJobData.repositoryId,
|
||||
runningJobData.versionId ?? null
|
||||
);
|
||||
}
|
||||
|
||||
this.dispatch();
|
||||
@@ -153,10 +220,11 @@ export class WorkerPool {
|
||||
const runningJob = this.runningJobs.get(worker);
|
||||
if (runningJob) {
|
||||
this.runningJobs.delete(worker);
|
||||
this.runningRepoIds.delete(runningJob.repositoryId);
|
||||
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
|
||||
}
|
||||
this.idleWorkers.push(worker);
|
||||
this.options.onJobFailed(msg.jobId, msg.error);
|
||||
this.emitStatusChanged();
|
||||
this.dispatch();
|
||||
}
|
||||
}
|
||||
@@ -176,13 +244,15 @@ export class WorkerPool {
|
||||
const runningJob = this.runningJobs.get(worker);
|
||||
if (runningJob && code !== 0) {
|
||||
this.runningJobs.delete(worker);
|
||||
this.runningRepoIds.delete(runningJob.repositoryId);
|
||||
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
|
||||
this.options.onJobFailed(runningJob.jobId, `Worker crashed with code ${code}`);
|
||||
} else if (runningJob) {
|
||||
this.runningJobs.delete(worker);
|
||||
this.runningRepoIds.delete(runningJob.repositoryId);
|
||||
this.runningJobKeys.delete(WorkerPool.jobKey(runningJob.repositoryId, runningJob.versionId));
|
||||
}
|
||||
|
||||
this.emitStatusChanged();
|
||||
|
||||
// Remove from workers array
|
||||
const workerIdx = this.workers.indexOf(worker);
|
||||
if (workerIdx !== -1) {
|
||||
@@ -212,6 +282,17 @@ export class WorkerPool {
|
||||
}
|
||||
}
|
||||
|
||||
private onWriteWorkerMessage(msg: WriteWorkerResponse): void {
|
||||
if (msg.type === 'ready') {
|
||||
this.writeReady = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'write_error') {
|
||||
console.error('[WorkerPool] Write worker failed for job:', msg.jobId, msg.error);
|
||||
}
|
||||
}
|
||||
|
||||
private processEmbedQueue(): void {
|
||||
if (!this.embedWorker || !this.embedReady) {
|
||||
return;
|
||||
@@ -250,6 +331,7 @@ export class WorkerPool {
|
||||
}
|
||||
|
||||
public setMaxConcurrency(n: number): void {
|
||||
this.options.concurrency = n;
|
||||
const current = this.workers.length;
|
||||
|
||||
if (n > current) {
|
||||
@@ -274,6 +356,8 @@ export class WorkerPool {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.emitStatusChanged();
|
||||
}
|
||||
|
||||
public async shutdown(): Promise<void> {
|
||||
@@ -300,6 +384,14 @@ export class WorkerPool {
|
||||
}
|
||||
}
|
||||
|
||||
if (this.writeWorker) {
|
||||
try {
|
||||
this.writeWorker.postMessage({ type: 'shutdown' });
|
||||
} catch {
|
||||
// Worker might already be exited
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for workers to exit with timeout
|
||||
const timeout = 5000;
|
||||
const startTime = Date.now();
|
||||
@@ -329,9 +421,41 @@ export class WorkerPool {
|
||||
}
|
||||
}
|
||||
|
||||
if (this.writeWorker) {
|
||||
try {
|
||||
this.writeWorker.terminate();
|
||||
} catch {
|
||||
// Already terminated
|
||||
}
|
||||
}
|
||||
|
||||
this.workers = [];
|
||||
this.idleWorkers = [];
|
||||
this.embedWorker = null;
|
||||
this.writeWorker = null;
|
||||
this.emitStatusChanged();
|
||||
}
|
||||
|
||||
public getStatus(): WorkerPoolStatus {
|
||||
return {
|
||||
concurrency: this.options.concurrency,
|
||||
active: this.runningJobs.size,
|
||||
idle: this.idleWorkers.length,
|
||||
workers: this.workers.map((worker, index) => {
|
||||
const runningJob = this.runningJobs.get(worker);
|
||||
return {
|
||||
index,
|
||||
state: runningJob ? 'running' : 'idle',
|
||||
jobId: runningJob?.jobId ?? null,
|
||||
repositoryId: runningJob?.repositoryId ?? null,
|
||||
versionId: runningJob?.versionId ?? null
|
||||
};
|
||||
})
|
||||
};
|
||||
}
|
||||
|
||||
private emitStatusChanged(): void {
|
||||
this.options.onWorkerStatus?.(this.getStatus());
|
||||
}
|
||||
|
||||
public get isFallbackMode(): boolean {
|
||||
|
||||
@@ -19,7 +19,61 @@ export type EmbedWorkerResponse =
|
||||
| { type: 'embed-done'; jobId: string }
|
||||
| { type: 'embed-failed'; jobId: string; error: string };
|
||||
|
||||
export type WriteWorkerRequest = WriteRequest | { type: 'shutdown' };
|
||||
|
||||
export type WriteWorkerResponse =
|
||||
| { type: 'ready' }
|
||||
| WriteAck
|
||||
| WriteError;
|
||||
|
||||
export interface WorkerInitData {
|
||||
dbPath: string;
|
||||
embeddingProfileId?: string;
|
||||
}
|
||||
|
||||
// Write worker message types (Phase 6)
|
||||
export interface SerializedDocument {
|
||||
id: string;
|
||||
repositoryId: string;
|
||||
versionId: string | null;
|
||||
filePath: string;
|
||||
title: string | null;
|
||||
language: string | null;
|
||||
tokenCount: number;
|
||||
checksum: string;
|
||||
indexedAt: number;
|
||||
}
|
||||
|
||||
export interface SerializedSnippet {
|
||||
id: string;
|
||||
documentId: string;
|
||||
repositoryId: string;
|
||||
versionId: string | null;
|
||||
type: 'code' | 'info';
|
||||
title: string | null;
|
||||
content: string;
|
||||
language: string | null;
|
||||
breadcrumb: string | null;
|
||||
tokenCount: number;
|
||||
createdAt: number;
|
||||
}
|
||||
|
||||
export type WriteRequest = {
|
||||
type: 'write';
|
||||
jobId: string;
|
||||
documents: SerializedDocument[];
|
||||
snippets: SerializedSnippet[];
|
||||
};
|
||||
|
||||
export type WriteAck = {
|
||||
type: 'write_ack';
|
||||
jobId: string;
|
||||
documentCount: number;
|
||||
snippetCount: number;
|
||||
};
|
||||
|
||||
export type WriteError = {
|
||||
type: 'write_error';
|
||||
jobId: string;
|
||||
error: string;
|
||||
};
|
||||
|
||||
93
src/lib/server/pipeline/write-worker-entry.ts
Normal file
93
src/lib/server/pipeline/write-worker-entry.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
import { workerData, parentPort } from 'node:worker_threads';
|
||||
import Database from 'better-sqlite3';
|
||||
import type {
|
||||
SerializedDocument,
|
||||
SerializedSnippet,
|
||||
WorkerInitData,
|
||||
WriteWorkerRequest,
|
||||
WriteWorkerResponse
|
||||
} from './worker-types.js';
|
||||
|
||||
const { dbPath } = workerData as WorkerInitData;
|
||||
const db = new Database(dbPath);
|
||||
db.pragma('journal_mode = WAL');
|
||||
db.pragma('foreign_keys = ON');
|
||||
db.pragma('busy_timeout = 5000');
|
||||
db.pragma('synchronous = NORMAL');
|
||||
db.pragma('cache_size = -65536');
|
||||
db.pragma('temp_store = MEMORY');
|
||||
db.pragma('mmap_size = 268435456');
|
||||
db.pragma('wal_autocheckpoint = 1000');
|
||||
|
||||
const insertDocument = db.prepare(
|
||||
`INSERT OR REPLACE INTO documents
|
||||
(id, repository_id, version_id, file_path, title, language, token_count, checksum, indexed_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
);
|
||||
|
||||
const insertSnippet = db.prepare(
|
||||
`INSERT OR REPLACE INTO snippets
|
||||
(id, document_id, repository_id, version_id, type, title, content, language, breadcrumb, token_count, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
);
|
||||
|
||||
const writeBatch = db.transaction((documents: SerializedDocument[], snippets: SerializedSnippet[]) => {
|
||||
for (const document of documents) {
|
||||
insertDocument.run(
|
||||
document.id,
|
||||
document.repositoryId,
|
||||
document.versionId,
|
||||
document.filePath,
|
||||
document.title,
|
||||
document.language,
|
||||
document.tokenCount,
|
||||
document.checksum,
|
||||
document.indexedAt
|
||||
);
|
||||
}
|
||||
|
||||
for (const snippet of snippets) {
|
||||
insertSnippet.run(
|
||||
snippet.id,
|
||||
snippet.documentId,
|
||||
snippet.repositoryId,
|
||||
snippet.versionId,
|
||||
snippet.type,
|
||||
snippet.title,
|
||||
snippet.content,
|
||||
snippet.language,
|
||||
snippet.breadcrumb,
|
||||
snippet.tokenCount,
|
||||
snippet.createdAt
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
parentPort?.postMessage({ type: 'ready' } satisfies WriteWorkerResponse);
|
||||
|
||||
parentPort?.on('message', (msg: WriteWorkerRequest) => {
|
||||
if (msg.type === 'shutdown') {
|
||||
db.close();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (msg.type !== 'write') {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
writeBatch(msg.documents, msg.snippets);
|
||||
parentPort?.postMessage({
|
||||
type: 'write_ack',
|
||||
jobId: msg.jobId,
|
||||
documentCount: msg.documents.length,
|
||||
snippetCount: msg.snippets.length
|
||||
} satisfies WriteWorkerResponse);
|
||||
} catch (error) {
|
||||
parentPort?.postMessage({
|
||||
type: 'write_error',
|
||||
jobId: msg.jobId,
|
||||
error: error instanceof Error ? error.message : String(error)
|
||||
} satisfies WriteWorkerResponse);
|
||||
}
|
||||
});
|
||||
@@ -15,6 +15,8 @@ import { HybridSearchService } from './hybrid.search.service.js';
|
||||
import { VectorSearch, cosineSimilarity } from './vector.search.js';
|
||||
import { reciprocalRankFusion } from './rrf.js';
|
||||
import type { EmbeddingProvider, EmbeddingVector } from '../embeddings/provider.js';
|
||||
import { loadSqliteVec } from '../db/sqlite-vec.js';
|
||||
import { SqliteVecStore } from './sqlite-vec.store.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// In-memory DB factory
|
||||
@@ -23,6 +25,7 @@ import type { EmbeddingProvider, EmbeddingVector } from '../embeddings/provider.
|
||||
function createTestDb(): Database.Database {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
|
||||
@@ -30,7 +33,11 @@ function createTestDb(): Database.Database {
|
||||
const migrations = [
|
||||
'0000_large_master_chief.sql',
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql'
|
||||
'0002_silky_stellaris.sql',
|
||||
'0003_multiversion_config.sql',
|
||||
'0004_complete_sentry.sql',
|
||||
'0005_fix_stage_defaults.sql',
|
||||
'0006_yielding_centennial.sql'
|
||||
];
|
||||
for (const migrationFile of migrations) {
|
||||
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
|
||||
@@ -121,6 +128,7 @@ function seedEmbedding(
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(snippetId, profileId, model, values.length, Buffer.from(f32.buffer), NOW_S);
|
||||
new SqliteVecStore(client).upsertEmbedding(profileId, snippetId, f32);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -368,6 +376,42 @@ describe('VectorSearch', () => {
|
||||
const results = vs.vectorSearch(new Float32Array([-0.5, 0.5]), { repositoryId: repoId });
|
||||
expect(results[0].score).toBeCloseTo(1.0, 4);
|
||||
});
|
||||
|
||||
it('filters by profileId using per-profile vec tables', () => {
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run('secondary-profile', 'local-transformers', 'Secondary', 1, 0, 'test-model', 2, '{}', NOW_S, NOW_S);
|
||||
|
||||
const defaultSnippet = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'default profile snippet'
|
||||
});
|
||||
const secondarySnippet = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'secondary profile snippet'
|
||||
});
|
||||
|
||||
seedEmbedding(client, defaultSnippet, [1, 0], 'local-default');
|
||||
seedEmbedding(client, secondarySnippet, [1, 0], 'secondary-profile');
|
||||
|
||||
const vs = new VectorSearch(client);
|
||||
const defaultResults = vs.vectorSearch(new Float32Array([1, 0]), {
|
||||
repositoryId: repoId,
|
||||
profileId: 'local-default'
|
||||
});
|
||||
const secondaryResults = vs.vectorSearch(new Float32Array([1, 0]), {
|
||||
repositoryId: repoId,
|
||||
profileId: 'secondary-profile'
|
||||
});
|
||||
|
||||
expect(defaultResults.map((result) => result.snippetId)).toEqual([defaultSnippet]);
|
||||
expect(secondaryResults.map((result) => result.snippetId)).toEqual([secondarySnippet]);
|
||||
});
|
||||
});
|
||||
|
||||
// ===========================================================================
|
||||
|
||||
@@ -148,7 +148,12 @@ export class HybridSearchService {
|
||||
|
||||
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
results: this.fetchSnippetsByIds(
|
||||
topIds,
|
||||
options.repositoryId,
|
||||
options.versionId,
|
||||
options.type
|
||||
),
|
||||
searchModeUsed: 'semantic'
|
||||
};
|
||||
}
|
||||
@@ -194,7 +199,12 @@ export class HybridSearchService {
|
||||
|
||||
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
results: this.fetchSnippetsByIds(
|
||||
topIds,
|
||||
options.repositoryId,
|
||||
options.versionId,
|
||||
options.type
|
||||
),
|
||||
searchModeUsed: 'keyword_fallback'
|
||||
};
|
||||
}
|
||||
@@ -220,7 +230,12 @@ export class HybridSearchService {
|
||||
if (alpha === 1) {
|
||||
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
results: this.fetchSnippetsByIds(
|
||||
topIds,
|
||||
options.repositoryId,
|
||||
options.versionId,
|
||||
options.type
|
||||
),
|
||||
searchModeUsed: 'semantic'
|
||||
};
|
||||
}
|
||||
@@ -234,7 +249,12 @@ export class HybridSearchService {
|
||||
|
||||
const topIds = fused.slice(0, limit).map((r) => r.id);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
results: this.fetchSnippetsByIds(
|
||||
topIds,
|
||||
options.repositoryId,
|
||||
options.versionId,
|
||||
options.type
|
||||
),
|
||||
searchModeUsed: 'hybrid'
|
||||
};
|
||||
}
|
||||
@@ -253,13 +273,19 @@ export class HybridSearchService {
|
||||
private fetchSnippetsByIds(
|
||||
ids: string[],
|
||||
repositoryId: string,
|
||||
versionId?: string,
|
||||
type?: 'code' | 'info'
|
||||
): SnippetSearchResult[] {
|
||||
if (ids.length === 0) return [];
|
||||
|
||||
const placeholders = ids.map(() => '?').join(', ');
|
||||
const params: unknown[] = [...ids, repositoryId];
|
||||
let versionClause = '';
|
||||
let typeClause = '';
|
||||
if (versionId !== undefined) {
|
||||
versionClause = ' AND s.version_id = ?';
|
||||
params.push(versionId);
|
||||
}
|
||||
if (type !== undefined) {
|
||||
typeClause = ' AND s.type = ?';
|
||||
params.push(type);
|
||||
@@ -276,7 +302,7 @@ export class HybridSearchService {
|
||||
FROM snippets s
|
||||
JOIN repositories r ON r.id = s.repository_id
|
||||
WHERE s.id IN (${placeholders})
|
||||
AND s.repository_id = ?${typeClause}`
|
||||
AND s.repository_id = ?${versionClause}${typeClause}`
|
||||
)
|
||||
.all(...params) as RawSnippetById[];
|
||||
|
||||
|
||||
394
src/lib/server/search/sqlite-vec.store.ts
Normal file
394
src/lib/server/search/sqlite-vec.store.ts
Normal file
@@ -0,0 +1,394 @@
|
||||
import type Database from 'better-sqlite3';
|
||||
import {
|
||||
loadSqliteVec,
|
||||
quoteSqliteIdentifier,
|
||||
sqliteVecRowidTableName,
|
||||
sqliteVecTableName
|
||||
} from '$lib/server/db/sqlite-vec.js';
|
||||
|
||||
export interface SqliteVecQueryOptions {
|
||||
repositoryId: string;
|
||||
versionId?: string;
|
||||
profileId?: string;
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export interface SqliteVecQueryResult {
|
||||
snippetId: string;
|
||||
score: number;
|
||||
distance: number;
|
||||
}
|
||||
|
||||
interface ProfileDimensionsRow {
|
||||
dimensions: number;
|
||||
}
|
||||
|
||||
interface StoredDimensionsRow {
|
||||
count: number;
|
||||
min_dimensions: number | null;
|
||||
max_dimensions: number | null;
|
||||
}
|
||||
|
||||
interface SnippetRowidRow {
|
||||
rowid: number;
|
||||
}
|
||||
|
||||
interface RawKnnRow {
|
||||
snippet_id: string;
|
||||
distance: number;
|
||||
}
|
||||
|
||||
interface CanonicalEmbeddingRow {
|
||||
snippet_id: string;
|
||||
embedding: Buffer;
|
||||
}
|
||||
|
||||
interface StoredEmbeddingRef {
|
||||
profile_id: string;
|
||||
snippet_id: string;
|
||||
}
|
||||
|
||||
interface ProfileStoreTables {
|
||||
vectorTableName: string;
|
||||
rowidTableName: string;
|
||||
quotedVectorTableName: string;
|
||||
quotedRowidTableName: string;
|
||||
dimensions: number;
|
||||
}
|
||||
|
||||
function toEmbeddingBuffer(values: Float32Array): Buffer {
|
||||
return Buffer.from(values.buffer, values.byteOffset, values.byteLength);
|
||||
}
|
||||
|
||||
function distanceToScore(distance: number): number {
|
||||
return 1 / (1 + distance);
|
||||
}
|
||||
|
||||
export class SqliteVecStore {
|
||||
constructor(private readonly db: Database.Database) {}
|
||||
|
||||
ensureProfileStore(profileId: string, preferredDimensions?: number): number {
|
||||
const tables = this.getProfileStoreTables(profileId, preferredDimensions);
|
||||
|
||||
this.db.exec(`
|
||||
CREATE TABLE IF NOT EXISTS ${tables.quotedRowidTableName} (
|
||||
rowid INTEGER PRIMARY KEY,
|
||||
snippet_id TEXT NOT NULL UNIQUE REFERENCES snippets(id) ON DELETE CASCADE
|
||||
);
|
||||
`);
|
||||
this.db.exec(`
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS ${tables.quotedVectorTableName}
|
||||
USING vec0(embedding float[${tables.dimensions}]);
|
||||
`);
|
||||
|
||||
return tables.dimensions;
|
||||
}
|
||||
|
||||
upsertEmbedding(profileId: string, snippetId: string, embedding: Float32Array): void {
|
||||
const tables = this.getProfileStoreTables(profileId, embedding.length);
|
||||
|
||||
this.ensureProfileStore(profileId, tables.dimensions);
|
||||
|
||||
const existingRow = this.db
|
||||
.prepare<[string], SnippetRowidRow>(
|
||||
`SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`
|
||||
)
|
||||
.get(snippetId);
|
||||
|
||||
const embeddingBuffer = toEmbeddingBuffer(embedding);
|
||||
if (existingRow) {
|
||||
this.db
|
||||
.prepare<[Buffer, number]>(
|
||||
`UPDATE ${tables.quotedVectorTableName} SET embedding = ? WHERE rowid = ?`
|
||||
)
|
||||
.run(embeddingBuffer, existingRow.rowid);
|
||||
return;
|
||||
}
|
||||
|
||||
const insertResult = this.db
|
||||
.prepare<[Buffer]>(`INSERT INTO ${tables.quotedVectorTableName} (embedding) VALUES (?)`)
|
||||
.run(embeddingBuffer);
|
||||
this.db
|
||||
.prepare<[number, string]>(
|
||||
`INSERT INTO ${tables.quotedRowidTableName} (rowid, snippet_id) VALUES (?, ?)`
|
||||
)
|
||||
.run(Number(insertResult.lastInsertRowid), snippetId);
|
||||
}
|
||||
|
||||
upsertEmbeddingBuffer(
|
||||
profileId: string,
|
||||
snippetId: string,
|
||||
embedding: Buffer,
|
||||
dimensions?: number
|
||||
): void {
|
||||
const vector = new Float32Array(
|
||||
embedding.buffer,
|
||||
embedding.byteOffset,
|
||||
dimensions ?? Math.floor(embedding.byteLength / Float32Array.BYTES_PER_ELEMENT)
|
||||
);
|
||||
this.upsertEmbedding(profileId, snippetId, vector);
|
||||
}
|
||||
|
||||
deleteEmbedding(profileId: string, snippetId: string): void {
|
||||
const tables = this.getProfileStoreTables(profileId);
|
||||
this.ensureProfileStore(profileId);
|
||||
|
||||
const existingRow = this.db
|
||||
.prepare<[string], SnippetRowidRow>(
|
||||
`SELECT rowid FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`
|
||||
)
|
||||
.get(snippetId);
|
||||
|
||||
if (!existingRow) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.db
|
||||
.prepare<[number]>(`DELETE FROM ${tables.quotedVectorTableName} WHERE rowid = ?`)
|
||||
.run(existingRow.rowid);
|
||||
this.db
|
||||
.prepare<[string]>(`DELETE FROM ${tables.quotedRowidTableName} WHERE snippet_id = ?`)
|
||||
.run(snippetId);
|
||||
}
|
||||
|
||||
deleteEmbeddingsForDocumentIds(documentIds: string[]): void {
|
||||
if (documentIds.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const placeholders = documentIds.map(() => '?').join(', ');
|
||||
const rows = this.db
|
||||
.prepare<unknown[], StoredEmbeddingRef>(
|
||||
`SELECT DISTINCT se.profile_id, se.snippet_id
|
||||
FROM snippet_embeddings se
|
||||
INNER JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.document_id IN (${placeholders})`
|
||||
)
|
||||
.all(...documentIds);
|
||||
|
||||
this.deleteEmbeddingRefs(rows);
|
||||
}
|
||||
|
||||
deleteEmbeddingsForRepository(repositoryId: string): void {
|
||||
const rows = this.db
|
||||
.prepare<[string], StoredEmbeddingRef>(
|
||||
`SELECT DISTINCT se.profile_id, se.snippet_id
|
||||
FROM snippet_embeddings se
|
||||
INNER JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ?`
|
||||
)
|
||||
.all(repositoryId);
|
||||
|
||||
this.deleteEmbeddingRefs(rows);
|
||||
}
|
||||
|
||||
deleteEmbeddingsForVersion(repositoryId: string, versionId: string): void {
|
||||
const rows = this.db
|
||||
.prepare<[string, string], StoredEmbeddingRef>(
|
||||
`SELECT DISTINCT se.profile_id, se.snippet_id
|
||||
FROM snippet_embeddings se
|
||||
INNER JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ? AND s.version_id = ?`
|
||||
)
|
||||
.all(repositoryId, versionId);
|
||||
|
||||
this.deleteEmbeddingRefs(rows);
|
||||
}
|
||||
|
||||
queryNearestNeighbors(
|
||||
queryEmbedding: Float32Array,
|
||||
options: SqliteVecQueryOptions
|
||||
): SqliteVecQueryResult[] {
|
||||
const { repositoryId, versionId, profileId = 'local-default', limit = 50 } = options;
|
||||
if (limit <= 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const tables = this.getProfileStoreTables(profileId, queryEmbedding.length);
|
||||
|
||||
this.ensureProfileStore(profileId, tables.dimensions);
|
||||
const totalRows = this.synchronizeProfileStore(profileId, tables);
|
||||
if (totalRows === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
let sql = `
|
||||
SELECT rowids.snippet_id, vec.distance
|
||||
FROM ${tables.quotedVectorTableName} vec
|
||||
JOIN ${tables.quotedRowidTableName} rowids ON rowids.rowid = vec.rowid
|
||||
JOIN snippets s ON s.id = rowids.snippet_id
|
||||
WHERE vec.embedding MATCH ?
|
||||
AND vec.k = ?
|
||||
AND s.repository_id = ?
|
||||
`;
|
||||
const params: unknown[] = [toEmbeddingBuffer(queryEmbedding), totalRows, repositoryId];
|
||||
|
||||
if (versionId !== undefined) {
|
||||
sql += ' AND s.version_id = ?';
|
||||
params.push(versionId);
|
||||
}
|
||||
|
||||
sql += ' ORDER BY vec.distance ASC LIMIT ?';
|
||||
params.push(limit);
|
||||
|
||||
const rows = this.db.prepare<unknown[], RawKnnRow>(sql).all(...params);
|
||||
return rows.map((row) => ({
|
||||
snippetId: row.snippet_id,
|
||||
score: distanceToScore(row.distance),
|
||||
distance: row.distance
|
||||
}));
|
||||
}
|
||||
|
||||
private synchronizeProfileStore(profileId: string, tables: ProfileStoreTables): number {
|
||||
this.db
|
||||
.prepare<[string, number]>(
|
||||
`DELETE FROM ${tables.quotedRowidTableName}
|
||||
WHERE rowid IN (
|
||||
SELECT rowids.rowid
|
||||
FROM ${tables.quotedRowidTableName} rowids
|
||||
LEFT JOIN snippet_embeddings se
|
||||
ON se.snippet_id = rowids.snippet_id
|
||||
AND se.profile_id = ?
|
||||
AND se.dimensions = ?
|
||||
LEFT JOIN ${tables.quotedVectorTableName} vec ON vec.rowid = rowids.rowid
|
||||
WHERE se.snippet_id IS NULL OR vec.rowid IS NULL
|
||||
)`
|
||||
)
|
||||
.run(profileId, tables.dimensions);
|
||||
|
||||
this.db
|
||||
.prepare(
|
||||
`DELETE FROM ${tables.quotedVectorTableName}
|
||||
WHERE rowid NOT IN (SELECT rowid FROM ${tables.quotedRowidTableName})`
|
||||
)
|
||||
.run();
|
||||
|
||||
const missingRows = this.db
|
||||
.prepare<[string, number], CanonicalEmbeddingRow>(
|
||||
`SELECT se.snippet_id, se.embedding
|
||||
FROM snippet_embeddings se
|
||||
LEFT JOIN ${tables.quotedRowidTableName} rowids ON rowids.snippet_id = se.snippet_id
|
||||
WHERE se.profile_id = ?
|
||||
AND se.dimensions = ?
|
||||
AND rowids.snippet_id IS NULL`
|
||||
)
|
||||
.all(profileId, tables.dimensions);
|
||||
|
||||
if (missingRows.length > 0) {
|
||||
const backfill = this.db.transaction((rows: CanonicalEmbeddingRow[]) => {
|
||||
for (const row of rows) {
|
||||
this.upsertEmbedding(
|
||||
profileId,
|
||||
row.snippet_id,
|
||||
new Float32Array(
|
||||
row.embedding.buffer,
|
||||
row.embedding.byteOffset,
|
||||
tables.dimensions
|
||||
)
|
||||
);
|
||||
}
|
||||
});
|
||||
backfill(missingRows);
|
||||
}
|
||||
|
||||
return (
|
||||
this.db
|
||||
.prepare<[], { count: number }>(
|
||||
`SELECT COUNT(*) AS count
|
||||
FROM ${tables.quotedVectorTableName} vec
|
||||
JOIN ${tables.quotedRowidTableName} rowids ON rowids.rowid = vec.rowid`
|
||||
)
|
||||
.get()?.count ?? 0
|
||||
);
|
||||
}
|
||||
|
||||
private deleteEmbeddingRefs(rows: StoredEmbeddingRef[]): void {
|
||||
if (rows.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const removeRows = this.db.transaction((refs: StoredEmbeddingRef[]) => {
|
||||
for (const ref of refs) {
|
||||
this.deleteEmbedding(ref.profile_id, ref.snippet_id);
|
||||
}
|
||||
});
|
||||
|
||||
removeRows(rows);
|
||||
}
|
||||
|
||||
private getProfileStoreTables(
|
||||
profileId: string,
|
||||
preferredDimensions?: number
|
||||
): ProfileStoreTables {
|
||||
loadSqliteVec(this.db);
|
||||
|
||||
const dimensionsRow = this.db
|
||||
.prepare<[string], ProfileDimensionsRow>(
|
||||
'SELECT dimensions FROM embedding_profiles WHERE id = ?'
|
||||
)
|
||||
.get(profileId);
|
||||
if (!dimensionsRow) {
|
||||
throw new Error(`Embedding profile not found: ${profileId}`);
|
||||
}
|
||||
|
||||
const storedDimensions = this.db
|
||||
.prepare<[string], StoredDimensionsRow>(
|
||||
`SELECT
|
||||
COUNT(*) AS count,
|
||||
MIN(dimensions) AS min_dimensions,
|
||||
MAX(dimensions) AS max_dimensions
|
||||
FROM snippet_embeddings
|
||||
WHERE profile_id = ?`
|
||||
)
|
||||
.get(profileId);
|
||||
|
||||
const effectiveDimensions = this.resolveDimensions(
|
||||
profileId,
|
||||
dimensionsRow.dimensions,
|
||||
storedDimensions,
|
||||
preferredDimensions
|
||||
);
|
||||
|
||||
const vectorTableName = sqliteVecTableName(profileId);
|
||||
const rowidTableName = sqliteVecRowidTableName(profileId);
|
||||
|
||||
return {
|
||||
vectorTableName,
|
||||
rowidTableName,
|
||||
quotedVectorTableName: quoteSqliteIdentifier(vectorTableName),
|
||||
quotedRowidTableName: quoteSqliteIdentifier(rowidTableName),
|
||||
dimensions: effectiveDimensions
|
||||
};
|
||||
}
|
||||
|
||||
private resolveDimensions(
|
||||
profileId: string,
|
||||
profileDimensions: number,
|
||||
storedDimensions: StoredDimensionsRow | undefined,
|
||||
preferredDimensions?: number
|
||||
): number {
|
||||
if (storedDimensions && storedDimensions.count > 0) {
|
||||
if (storedDimensions.min_dimensions !== storedDimensions.max_dimensions) {
|
||||
throw new Error(`Stored embedding dimensions are inconsistent for profile ${profileId}`);
|
||||
}
|
||||
|
||||
const canonicalDimensions = storedDimensions.min_dimensions;
|
||||
if (canonicalDimensions === null) {
|
||||
throw new Error(`Stored embedding dimensions are missing for profile ${profileId}`);
|
||||
}
|
||||
|
||||
if (
|
||||
preferredDimensions !== undefined &&
|
||||
preferredDimensions !== canonicalDimensions
|
||||
) {
|
||||
throw new Error(
|
||||
`Embedding dimension mismatch for profile ${profileId}: expected ${canonicalDimensions}, received ${preferredDimensions}`
|
||||
);
|
||||
}
|
||||
|
||||
return canonicalDimensions;
|
||||
}
|
||||
|
||||
return preferredDimensions ?? profileDimensions;
|
||||
}
|
||||
}
|
||||
@@ -1,16 +1,12 @@
|
||||
/**
|
||||
* Vector similarity search over stored snippet embeddings.
|
||||
*
|
||||
* SQLite does not natively support vector operations, so cosine similarity is
|
||||
* computed in JavaScript after loading candidate embeddings from the
|
||||
* snippet_embeddings table.
|
||||
*
|
||||
* Performance note: For repositories with > 50k snippets, pre-filtering by
|
||||
* FTS5 candidates before computing cosine similarity is recommended. For v1,
|
||||
* in-memory computation is acceptable.
|
||||
* Uses sqlite-vec vector_top_k() for ANN search instead of in-memory cosine
|
||||
* similarity computation over all embeddings.
|
||||
*/
|
||||
|
||||
import type Database from 'better-sqlite3';
|
||||
import { SqliteVecStore } from './sqlite-vec.store.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
@@ -28,12 +24,6 @@ export interface VectorSearchOptions {
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
/** Raw DB row from snippet_embeddings joined with snippets. */
|
||||
interface RawEmbeddingRow {
|
||||
snippet_id: string;
|
||||
embedding: Buffer;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Math helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -69,46 +59,26 @@ export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class VectorSearch {
|
||||
constructor(private readonly db: Database.Database) {}
|
||||
private readonly sqliteVecStore: SqliteVecStore;
|
||||
|
||||
constructor(private readonly db: Database.Database) {
|
||||
this.sqliteVecStore = new SqliteVecStore(db);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search stored embeddings by cosine similarity to the query embedding.
|
||||
*
|
||||
* Uses in-memory cosine similarity computation. The vec_embedding column
|
||||
* stores raw Float32 bytes for forward compatibility with vector-capable
|
||||
* libSQL builds; scoring is performed in JS using the same bytes.
|
||||
*
|
||||
* @param queryEmbedding - The embedded representation of the search query.
|
||||
* @param options - Search options including repositoryId, optional versionId, profileId, and limit.
|
||||
* @returns Results sorted by descending cosine similarity score.
|
||||
*/
|
||||
vectorSearch(queryEmbedding: Float32Array, options: VectorSearchOptions): VectorSearchResult[] {
|
||||
const { repositoryId, versionId, profileId = 'local-default', limit = 50 } = options;
|
||||
|
||||
let sql = `
|
||||
SELECT se.snippet_id, se.embedding
|
||||
FROM snippet_embeddings se
|
||||
JOIN snippets s ON s.id = se.snippet_id
|
||||
WHERE s.repository_id = ?
|
||||
AND se.profile_id = ?
|
||||
`;
|
||||
const params: unknown[] = [repositoryId, profileId];
|
||||
|
||||
if (versionId) {
|
||||
sql += ' AND s.version_id = ?';
|
||||
params.push(versionId);
|
||||
}
|
||||
|
||||
const rows = this.db.prepare<unknown[], RawEmbeddingRow>(sql).all(...params);
|
||||
|
||||
const scored: VectorSearchResult[] = rows.map((row) => {
|
||||
const embedding = new Float32Array(
|
||||
row.embedding.buffer,
|
||||
row.embedding.byteOffset,
|
||||
row.embedding.byteLength / 4
|
||||
);
|
||||
return {
|
||||
snippetId: row.snippet_id,
|
||||
score: cosineSimilarity(queryEmbedding, embedding)
|
||||
};
|
||||
});
|
||||
|
||||
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
||||
return this.sqliteVecStore
|
||||
.queryNearestNeighbors(queryEmbedding, options)
|
||||
.map((result) => ({ snippetId: result.snippetId, score: result.score }));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,8 @@ import Database from 'better-sqlite3';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { RepositoryService } from './repository.service';
|
||||
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import {
|
||||
AlreadyExistsError,
|
||||
InvalidInputError,
|
||||
@@ -25,6 +27,7 @@ import {
|
||||
function createTestDb(): Database.Database {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
|
||||
@@ -33,7 +36,9 @@ function createTestDb(): Database.Database {
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql',
|
||||
'0003_multiversion_config.sql',
|
||||
'0004_complete_sentry.sql'
|
||||
'0004_complete_sentry.sql',
|
||||
'0005_fix_stage_defaults.sql',
|
||||
'0006_yielding_centennial.sql'
|
||||
]) {
|
||||
const statements = readFileSync(join(migrationsFolder, migration), 'utf-8')
|
||||
.split('--> statement-breakpoint')
|
||||
@@ -331,6 +336,41 @@ describe('RepositoryService.remove()', () => {
|
||||
it('throws NotFoundError when the repository does not exist', () => {
|
||||
expect(() => service.remove('/not/found')).toThrow(NotFoundError);
|
||||
});
|
||||
|
||||
it('removes derived vec rows before the repository cascade deletes snippets', () => {
|
||||
const docId = crypto.randomUUID();
|
||||
const snippetId = crypto.randomUUID();
|
||||
const embedding = Float32Array.from([1, 0, 0]);
|
||||
const vecStore = new SqliteVecStore((service as unknown as { db: Database.Database }).db);
|
||||
const db = (service as unknown as { db: Database.Database }).db;
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
|
||||
db.prepare(
|
||||
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, '/facebook/react', NULL, 'README.md', 'repo-doc', ?)`
|
||||
).run(docId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
|
||||
VALUES (?, ?, '/facebook/react', NULL, 'info', 'repo snippet', ?)`
|
||||
).run(snippetId, docId, now);
|
||||
db.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
|
||||
).run(snippetId, Buffer.from(embedding.buffer), now);
|
||||
vecStore.upsertEmbedding('local-default', snippetId, embedding);
|
||||
|
||||
service.remove('/facebook/react');
|
||||
|
||||
const vecTable = sqliteVecTableName('local-default');
|
||||
const rowidTable = sqliteVecRowidTableName('local-default');
|
||||
const vecCount = db.prepare(`SELECT COUNT(*) as n FROM "${vecTable}"`).get() as { n: number };
|
||||
const rowidCount = db.prepare(`SELECT COUNT(*) as n FROM "${rowidTable}"`).get() as {
|
||||
n: number;
|
||||
};
|
||||
|
||||
expect(vecCount.n).toBe(0);
|
||||
expect(rowidCount.n).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -8,6 +8,7 @@ import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
|
||||
import { IndexingJobMapper } from '$lib/server/mappers/indexing-job.mapper.js';
|
||||
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
|
||||
import { IndexingJob, IndexingJobEntity } from '$lib/server/models/indexing-job.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import { resolveGitHubId, resolveLocalId } from '$lib/server/utils/id-resolver';
|
||||
import {
|
||||
AlreadyExistsError,
|
||||
@@ -230,7 +231,11 @@ export class RepositoryService {
|
||||
const existing = this.get(id);
|
||||
if (!existing) throw new NotFoundError(`Repository ${id} not found`);
|
||||
|
||||
this.db.prepare(`DELETE FROM repositories WHERE id = ?`).run(id);
|
||||
const sqliteVecStore = new SqliteVecStore(this.db);
|
||||
this.db.transaction(() => {
|
||||
sqliteVecStore.deleteEmbeddingsForRepository(id);
|
||||
this.db.prepare(`DELETE FROM repositories WHERE id = ?`).run(id);
|
||||
})();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -10,6 +10,8 @@ import { describe, it, expect } from 'vitest';
|
||||
import Database from 'better-sqlite3';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { loadSqliteVec, sqliteVecRowidTableName, sqliteVecTableName } from '$lib/server/db/sqlite-vec.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import { VersionService } from './version.service';
|
||||
import { RepositoryService } from './repository.service';
|
||||
import { AlreadyExistsError, NotFoundError } from '$lib/server/utils/validation';
|
||||
@@ -21,31 +23,27 @@ import { AlreadyExistsError, NotFoundError } from '$lib/server/utils/validation'
|
||||
function createTestDb(): Database.Database {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
loadSqliteVec(client);
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
|
||||
// Apply all migration files in order
|
||||
const migration0 = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
|
||||
const migration1 = readFileSync(join(migrationsFolder, '0001_quick_nighthawk.sql'), 'utf-8');
|
||||
for (const migration of [
|
||||
'0000_large_master_chief.sql',
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql',
|
||||
'0003_multiversion_config.sql',
|
||||
'0004_complete_sentry.sql',
|
||||
'0005_fix_stage_defaults.sql',
|
||||
'0006_yielding_centennial.sql'
|
||||
]) {
|
||||
const statements = readFileSync(join(migrationsFolder, migration), 'utf-8')
|
||||
.split('--> statement-breakpoint')
|
||||
.map((statement) => statement.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
// Apply first migration
|
||||
const statements0 = migration0
|
||||
.split('--> statement-breakpoint')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
for (const stmt of statements0) {
|
||||
client.exec(stmt);
|
||||
}
|
||||
|
||||
// Apply second migration
|
||||
const statements1 = migration1
|
||||
.split('--> statement-breakpoint')
|
||||
.map((s) => s.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
for (const stmt of statements1) {
|
||||
client.exec(stmt);
|
||||
for (const statement of statements) {
|
||||
client.exec(statement);
|
||||
}
|
||||
}
|
||||
|
||||
return client;
|
||||
@@ -198,6 +196,44 @@ describe('VersionService.remove()', () => {
|
||||
const doc = client.prepare(`SELECT id FROM documents WHERE id = ?`).get(docId);
|
||||
expect(doc).toBeUndefined();
|
||||
});
|
||||
|
||||
it('removes derived vec rows before deleting the version', () => {
|
||||
const { client, versionService } = setup();
|
||||
const version = versionService.add('/facebook/react', 'v18.3.0');
|
||||
const docId = crypto.randomUUID();
|
||||
const snippetId = crypto.randomUUID();
|
||||
const embedding = Float32Array.from([0.5, 0.25, 0.125]);
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
const vecStore = new SqliteVecStore(client);
|
||||
|
||||
client.prepare(
|
||||
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, '/facebook/react', ?, 'README.md', 'version-doc', ?)`
|
||||
).run(docId, version.id, now);
|
||||
client.prepare(
|
||||
`INSERT INTO snippets (id, document_id, repository_id, version_id, type, content, created_at)
|
||||
VALUES (?, ?, '/facebook/react', ?, 'info', 'version snippet', ?)`
|
||||
).run(snippetId, docId, version.id, now);
|
||||
client.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, 'local-default', 'test-model', 3, ?, ?)`
|
||||
).run(snippetId, Buffer.from(embedding.buffer), now);
|
||||
vecStore.upsertEmbedding('local-default', snippetId, embedding);
|
||||
|
||||
versionService.remove('/facebook/react', 'v18.3.0');
|
||||
|
||||
const vecTable = sqliteVecTableName('local-default');
|
||||
const rowidTable = sqliteVecRowidTableName('local-default');
|
||||
const vecCount = client.prepare(`SELECT COUNT(*) as n FROM "${vecTable}"`).get() as {
|
||||
n: number;
|
||||
};
|
||||
const rowidCount = client.prepare(`SELECT COUNT(*) as n FROM "${rowidTable}"`).get() as {
|
||||
n: number;
|
||||
};
|
||||
|
||||
expect(vecCount.n).toBe(0);
|
||||
expect(rowidCount.n).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -11,6 +11,7 @@ import {
|
||||
RepositoryVersion,
|
||||
RepositoryVersionEntity
|
||||
} from '$lib/server/models/repository-version.js';
|
||||
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
||||
import { AlreadyExistsError, NotFoundError } from '$lib/server/utils/validation';
|
||||
import { resolveTagToCommit, discoverVersionTags } from '$lib/server/utils/git.js';
|
||||
|
||||
@@ -99,9 +100,13 @@ export class VersionService {
|
||||
throw new NotFoundError(`Version ${tag} not found for repository ${repositoryId}`);
|
||||
}
|
||||
|
||||
this.db
|
||||
.prepare(`DELETE FROM repository_versions WHERE repository_id = ? AND tag = ?`)
|
||||
.run(repositoryId, tag);
|
||||
const sqliteVecStore = new SqliteVecStore(this.db);
|
||||
this.db.transaction(() => {
|
||||
sqliteVecStore.deleteEmbeddingsForVersion(repositoryId, version.id);
|
||||
this.db
|
||||
.prepare(`DELETE FROM repository_versions WHERE repository_id = ? AND tag = ?`)
|
||||
.run(repositoryId, tag);
|
||||
})();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
<script lang="ts">
|
||||
import { onMount } from 'svelte';
|
||||
import { SvelteURLSearchParams } from 'svelte/reactivity';
|
||||
import JobSkeleton from '$lib/components/admin/JobSkeleton.svelte';
|
||||
import JobStatusBadge from '$lib/components/admin/JobStatusBadge.svelte';
|
||||
import Toast from '$lib/components/admin/Toast.svelte';
|
||||
import WorkerStatusPanel from '$lib/components/admin/WorkerStatusPanel.svelte';
|
||||
import type { IndexingJobDto } from '$lib/server/models/indexing-job.js';
|
||||
|
||||
interface JobResponse {
|
||||
@@ -7,174 +12,16 @@
|
||||
total: number;
|
||||
}
|
||||
|
||||
let jobs = $state<IndexingJobDto[]>([]);
|
||||
let loading = $state(true);
|
||||
let error = $state<string | null>(null);
|
||||
let actionInProgress = $state<string | null>(null);
|
||||
|
||||
// Fetch jobs from API
|
||||
async function fetchJobs() {
|
||||
try {
|
||||
const response = await fetch('/api/v1/jobs?limit=50');
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
const data: JobResponse = await response.json();
|
||||
jobs = data.jobs;
|
||||
error = null;
|
||||
} catch (err) {
|
||||
error = err instanceof Error ? err.message : 'Failed to fetch jobs';
|
||||
console.error('Failed to fetch jobs:', err);
|
||||
} finally {
|
||||
loading = false;
|
||||
}
|
||||
interface ToastItem {
|
||||
id: string;
|
||||
message: string;
|
||||
type: 'success' | 'error' | 'info';
|
||||
}
|
||||
|
||||
// Action handlers
|
||||
async function pauseJob(id: string) {
|
||||
actionInProgress = id;
|
||||
try {
|
||||
const response = await fetch(`/api/v1/jobs/${id}/pause`, { method: 'POST' });
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({ message: 'Unknown error' }));
|
||||
throw new Error(errorData.message || `HTTP ${response.status}`);
|
||||
}
|
||||
// Optimistic update
|
||||
jobs = jobs.map((j) => (j.id === id ? { ...j, status: 'paused' as const } : j));
|
||||
// Show success message
|
||||
showToast('Job paused successfully');
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : 'Failed to pause job';
|
||||
showToast(msg, 'error');
|
||||
console.error('Failed to pause job:', err);
|
||||
} finally {
|
||||
actionInProgress = null;
|
||||
// Refresh after a short delay to get the actual state
|
||||
setTimeout(fetchJobs, 500);
|
||||
}
|
||||
}
|
||||
type FilterStatus = 'queued' | 'running' | 'done' | 'failed';
|
||||
type JobAction = 'pause' | 'resume' | 'cancel';
|
||||
|
||||
async function resumeJob(id: string) {
|
||||
actionInProgress = id;
|
||||
try {
|
||||
const response = await fetch(`/api/v1/jobs/${id}/resume`, { method: 'POST' });
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({ message: 'Unknown error' }));
|
||||
throw new Error(errorData.message || `HTTP ${response.status}`);
|
||||
}
|
||||
// Optimistic update
|
||||
jobs = jobs.map((j) => (j.id === id ? { ...j, status: 'queued' as const } : j));
|
||||
showToast('Job resumed successfully');
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : 'Failed to resume job';
|
||||
showToast(msg, 'error');
|
||||
console.error('Failed to resume job:', err);
|
||||
} finally {
|
||||
actionInProgress = null;
|
||||
setTimeout(fetchJobs, 500);
|
||||
}
|
||||
}
|
||||
|
||||
async function cancelJob(id: string) {
|
||||
if (!confirm('Are you sure you want to cancel this job?')) {
|
||||
return;
|
||||
}
|
||||
|
||||
actionInProgress = id;
|
||||
try {
|
||||
const response = await fetch(`/api/v1/jobs/${id}/cancel`, { method: 'POST' });
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json().catch(() => ({ message: 'Unknown error' }));
|
||||
throw new Error(errorData.message || `HTTP ${response.status}`);
|
||||
}
|
||||
// Optimistic update
|
||||
jobs = jobs.map((j) => (j.id === id ? { ...j, status: 'cancelled' as const } : j));
|
||||
showToast('Job cancelled successfully');
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : 'Failed to cancel job';
|
||||
showToast(msg, 'error');
|
||||
console.error('Failed to cancel job:', err);
|
||||
} finally {
|
||||
actionInProgress = null;
|
||||
setTimeout(fetchJobs, 500);
|
||||
}
|
||||
}
|
||||
|
||||
// Simple toast notification (using alert for v1, can be enhanced later)
|
||||
function showToast(message: string, type: 'success' | 'error' = 'success') {
|
||||
// For v1, just use alert. In production, integrate with a toast library.
|
||||
if (type === 'error') {
|
||||
alert(`Error: ${message}`);
|
||||
} else {
|
||||
console.log(`✓ ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-refresh with EventSource streaming + fallback polling
|
||||
$effect(() => {
|
||||
fetchJobs();
|
||||
|
||||
const es = new EventSource('/api/v1/jobs/stream');
|
||||
let fallbackInterval: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
es.addEventListener('job-progress', (event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
jobs = jobs.map((j) =>
|
||||
j.id === data.jobId
|
||||
? {
|
||||
...j,
|
||||
progress: data.progress,
|
||||
stage: data.stage,
|
||||
stageDetail: data.stageDetail,
|
||||
processedFiles: data.processedFiles,
|
||||
totalFiles: data.totalFiles
|
||||
}
|
||||
: j
|
||||
);
|
||||
});
|
||||
|
||||
es.addEventListener('job-done', () => {
|
||||
void fetchJobs();
|
||||
});
|
||||
|
||||
es.addEventListener('job-failed', () => {
|
||||
void fetchJobs();
|
||||
});
|
||||
|
||||
es.onerror = () => {
|
||||
es.close();
|
||||
// Fall back to polling on error
|
||||
fallbackInterval = setInterval(fetchJobs, 3000);
|
||||
};
|
||||
|
||||
return () => {
|
||||
es.close();
|
||||
if (fallbackInterval) {
|
||||
clearInterval(fallbackInterval);
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
// Format date for display
|
||||
function formatDate(date: Date | null): string {
|
||||
if (!date) return '—';
|
||||
return new Date(date).toLocaleString();
|
||||
}
|
||||
|
||||
// Determine which actions are available for a job
|
||||
function canPause(status: IndexingJobDto['status']): boolean {
|
||||
return status === 'queued' || status === 'running';
|
||||
}
|
||||
|
||||
function canResume(status: IndexingJobDto['status']): boolean {
|
||||
return status === 'paused';
|
||||
}
|
||||
|
||||
function canCancel(status: IndexingJobDto['status']): boolean {
|
||||
return status !== 'done' && status !== 'failed';
|
||||
}
|
||||
|
||||
// Map IndexingStage values to display labels
|
||||
const filterStatuses: FilterStatus[] = ['queued', 'running', 'done', 'failed'];
|
||||
const stageLabels: Record<string, string> = {
|
||||
queued: 'Queued',
|
||||
differential: 'Diff',
|
||||
@@ -187,9 +34,274 @@
|
||||
failed: 'Failed'
|
||||
};
|
||||
|
||||
let jobs = $state<IndexingJobDto[]>([]);
|
||||
let total = $state(0);
|
||||
let loading = $state(true);
|
||||
let refreshing = $state(false);
|
||||
let error = $state<string | null>(null);
|
||||
let repositoryInput = $state('');
|
||||
let selectedStatuses = $state<FilterStatus[]>([]);
|
||||
let appliedRepositoryFilter = $state('');
|
||||
let appliedStatuses = $state<FilterStatus[]>([]);
|
||||
let pendingCancelJobId = $state<string | null>(null);
|
||||
let rowActions = $state<Record<string, JobAction | undefined>>({});
|
||||
let toasts = $state<ToastItem[]>([]);
|
||||
let refreshTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
function buildJobsUrl(): string {
|
||||
const params = new SvelteURLSearchParams({ limit: '50' });
|
||||
|
||||
if (appliedRepositoryFilter) {
|
||||
params.set('repositoryId', appliedRepositoryFilter);
|
||||
}
|
||||
|
||||
if (appliedStatuses.length > 0) {
|
||||
params.set('status', appliedStatuses.join(','));
|
||||
}
|
||||
|
||||
return `/api/v1/jobs?${params.toString()}`;
|
||||
}
|
||||
|
||||
function pushToast(message: string, type: ToastItem['type'] = 'success') {
|
||||
toasts = [...toasts, { id: crypto.randomUUID(), message, type }];
|
||||
}
|
||||
|
||||
function clearRowAction(jobId: string) {
|
||||
const next = { ...rowActions };
|
||||
delete next[jobId];
|
||||
rowActions = next;
|
||||
}
|
||||
|
||||
function setRowAction(jobId: string, action: JobAction) {
|
||||
rowActions = { ...rowActions, [jobId]: action };
|
||||
}
|
||||
|
||||
function scheduleRefresh(delayMs = 500) {
|
||||
if (refreshTimer) {
|
||||
clearTimeout(refreshTimer);
|
||||
}
|
||||
|
||||
refreshTimer = setTimeout(() => {
|
||||
void fetchJobs({ background: true });
|
||||
}, delayMs);
|
||||
}
|
||||
|
||||
function hasAppliedFilters(): boolean {
|
||||
return appliedRepositoryFilter.length > 0 || appliedStatuses.length > 0;
|
||||
}
|
||||
|
||||
function sameStatuses(left: FilterStatus[], right: FilterStatus[]): boolean {
|
||||
return left.length === right.length && left.every((status, index) => status === right[index]);
|
||||
}
|
||||
|
||||
function filtersDirty(): boolean {
|
||||
return repositoryInput.trim() !== appliedRepositoryFilter || !sameStatuses(selectedStatuses, appliedStatuses);
|
||||
}
|
||||
|
||||
function isSpecificRepositoryId(repositoryId: string): boolean {
|
||||
return repositoryId.split('/').filter(Boolean).length >= 2;
|
||||
}
|
||||
|
||||
function matchesAppliedFilters(job: IndexingJobDto): boolean {
|
||||
if (appliedRepositoryFilter) {
|
||||
const repositoryFilter = appliedRepositoryFilter;
|
||||
const repositoryMatches = isSpecificRepositoryId(repositoryFilter)
|
||||
? job.repositoryId === repositoryFilter
|
||||
: job.repositoryId === repositoryFilter || job.repositoryId.startsWith(`${repositoryFilter}/`);
|
||||
|
||||
if (!repositoryMatches) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (appliedStatuses.length === 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return appliedStatuses.includes(job.status as FilterStatus);
|
||||
}
|
||||
|
||||
function syncCancelState(nextJobs: IndexingJobDto[]) {
|
||||
if (!pendingCancelJobId) {
|
||||
return;
|
||||
}
|
||||
|
||||
const pendingJob = nextJobs.find((job) => job.id === pendingCancelJobId);
|
||||
if (!pendingJob || !canCancel(pendingJob.status)) {
|
||||
pendingCancelJobId = null;
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchJobs(options: { background?: boolean } = {}) {
|
||||
const background = options.background ?? false;
|
||||
|
||||
if (background) {
|
||||
refreshing = true;
|
||||
} else {
|
||||
loading = true;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(buildJobsUrl());
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const data: JobResponse = await response.json();
|
||||
jobs = data.jobs;
|
||||
total = data.total;
|
||||
error = null;
|
||||
syncCancelState(data.jobs);
|
||||
} catch (err) {
|
||||
error = err instanceof Error ? err.message : 'Failed to fetch jobs';
|
||||
console.error('Failed to fetch jobs:', err);
|
||||
} finally {
|
||||
loading = false;
|
||||
refreshing = false;
|
||||
}
|
||||
}
|
||||
|
||||
async function runJobAction(job: IndexingJobDto, action: JobAction) {
|
||||
setRowAction(job.id, action);
|
||||
|
||||
try {
|
||||
const response = await fetch(`/api/v1/jobs/${job.id}/${action}`, { method: 'POST' });
|
||||
const payload = await response.json().catch(() => ({ message: 'Unknown error' }));
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(payload.message || `HTTP ${response.status}`);
|
||||
}
|
||||
|
||||
const updatedJob = payload.job as IndexingJobDto | undefined;
|
||||
if (updatedJob) {
|
||||
if (matchesAppliedFilters(updatedJob)) {
|
||||
jobs = jobs.map((currentJob) =>
|
||||
currentJob.id === updatedJob.id ? updatedJob : currentJob
|
||||
);
|
||||
} else {
|
||||
jobs = jobs.filter((currentJob) => currentJob.id !== updatedJob.id);
|
||||
}
|
||||
}
|
||||
|
||||
pendingCancelJobId = null;
|
||||
pushToast(`Job ${action}d successfully`);
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : `Failed to ${action} job`;
|
||||
pushToast(message, 'error');
|
||||
console.error(`Failed to ${action} job:`, err);
|
||||
} finally {
|
||||
clearRowAction(job.id);
|
||||
scheduleRefresh();
|
||||
}
|
||||
}
|
||||
|
||||
function toggleStatusFilter(status: FilterStatus) {
|
||||
selectedStatuses = selectedStatuses.includes(status)
|
||||
? selectedStatuses.filter((candidate) => candidate !== status)
|
||||
: [...selectedStatuses, status].sort(
|
||||
(left, right) => filterStatuses.indexOf(left) - filterStatuses.indexOf(right)
|
||||
);
|
||||
}
|
||||
|
||||
function applyFilters(event?: SubmitEvent) {
|
||||
event?.preventDefault();
|
||||
appliedRepositoryFilter = repositoryInput.trim();
|
||||
appliedStatuses = [...selectedStatuses];
|
||||
pendingCancelJobId = null;
|
||||
void fetchJobs();
|
||||
}
|
||||
|
||||
function resetFilters() {
|
||||
repositoryInput = '';
|
||||
selectedStatuses = [];
|
||||
appliedRepositoryFilter = '';
|
||||
appliedStatuses = [];
|
||||
pendingCancelJobId = null;
|
||||
void fetchJobs();
|
||||
}
|
||||
|
||||
function requestCancel(jobId: string) {
|
||||
pendingCancelJobId = pendingCancelJobId === jobId ? null : jobId;
|
||||
}
|
||||
|
||||
function formatDate(date: Date | string | null): string {
|
||||
if (!date) {
|
||||
return '—';
|
||||
}
|
||||
|
||||
return new Date(date).toLocaleString();
|
||||
}
|
||||
|
||||
function canPause(status: IndexingJobDto['status']): boolean {
|
||||
return status === 'queued' || status === 'running';
|
||||
}
|
||||
|
||||
function canResume(status: IndexingJobDto['status']): boolean {
|
||||
return status === 'paused';
|
||||
}
|
||||
|
||||
function canCancel(status: IndexingJobDto['status']): boolean {
|
||||
return status !== 'done' && status !== 'failed' && status !== 'cancelled';
|
||||
}
|
||||
|
||||
function isRowBusy(jobId: string): boolean {
|
||||
return Boolean(rowActions[jobId]);
|
||||
}
|
||||
|
||||
function getStageLabel(stage: string | undefined): string {
|
||||
return stage ? (stageLabels[stage] ?? stage) : '—';
|
||||
}
|
||||
|
||||
onMount(() => {
|
||||
void fetchJobs();
|
||||
|
||||
const es = new EventSource('/api/v1/jobs/stream');
|
||||
let fallbackInterval: ReturnType<typeof setInterval> | null = null;
|
||||
const refreshJobs = () => {
|
||||
void fetchJobs({ background: true });
|
||||
};
|
||||
|
||||
es.addEventListener('job-progress', (event) => {
|
||||
const data = JSON.parse(event.data) as Partial<IndexingJobDto> & { jobId?: string };
|
||||
if (!data.jobId) {
|
||||
return;
|
||||
}
|
||||
|
||||
jobs = jobs.map((job) =>
|
||||
job.id === data.jobId
|
||||
? {
|
||||
...job,
|
||||
progress: data.progress ?? job.progress,
|
||||
stage: data.stage ?? job.stage,
|
||||
stageDetail: data.stageDetail ?? job.stageDetail,
|
||||
processedFiles: data.processedFiles ?? job.processedFiles,
|
||||
totalFiles: data.totalFiles ?? job.totalFiles,
|
||||
status: data.status ?? job.status
|
||||
}
|
||||
: job
|
||||
);
|
||||
});
|
||||
|
||||
es.addEventListener('job-done', refreshJobs);
|
||||
es.addEventListener('job-failed', refreshJobs);
|
||||
|
||||
es.onerror = () => {
|
||||
es.close();
|
||||
if (!fallbackInterval) {
|
||||
fallbackInterval = setInterval(refreshJobs, 3000);
|
||||
}
|
||||
};
|
||||
|
||||
return () => {
|
||||
es.close();
|
||||
if (fallbackInterval) {
|
||||
clearInterval(fallbackInterval);
|
||||
}
|
||||
if (refreshTimer) {
|
||||
clearTimeout(refreshTimer);
|
||||
}
|
||||
};
|
||||
});
|
||||
</script>
|
||||
|
||||
<svelte:head>
|
||||
@@ -202,23 +314,92 @@
|
||||
<p class="mt-2 text-gray-600">Monitor and control indexing jobs</p>
|
||||
</div>
|
||||
|
||||
{#if loading && jobs.length === 0}
|
||||
<div class="flex items-center justify-center py-12">
|
||||
<div class="text-center">
|
||||
<div
|
||||
class="inline-block h-8 w-8 animate-spin rounded-full border-4 border-solid border-blue-600 border-r-transparent"
|
||||
></div>
|
||||
<p class="mt-2 text-gray-600">Loading jobs...</p>
|
||||
<WorkerStatusPanel />
|
||||
|
||||
<form class="mb-6 rounded-lg border border-gray-200 bg-white p-4 shadow-sm" onsubmit={applyFilters}>
|
||||
<div class="flex flex-col gap-4 lg:flex-row lg:items-end lg:justify-between">
|
||||
<div class="flex-1">
|
||||
<label class="mb-2 block text-sm font-medium text-gray-700" for="repository-filter">
|
||||
Repository filter
|
||||
</label>
|
||||
<input
|
||||
id="repository-filter"
|
||||
type="text"
|
||||
bind:value={repositoryInput}
|
||||
placeholder="/owner or /owner/repo"
|
||||
class="w-full rounded-md border border-gray-300 px-3 py-2 text-sm text-gray-900 shadow-sm focus:border-blue-500 focus:outline-none focus:ring-2 focus:ring-blue-200"
|
||||
/>
|
||||
<p class="mt-2 text-xs text-gray-500">
|
||||
Use an owner prefix like <code>/facebook</code> or a full repository ID like <code>/facebook/react</code>.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="lg:min-w-72">
|
||||
<span class="mb-2 block text-sm font-medium text-gray-700">Statuses</span>
|
||||
<div class="flex flex-wrap gap-2">
|
||||
{#each filterStatuses as status (status)}
|
||||
<button
|
||||
type="button"
|
||||
onclick={() => toggleStatusFilter(status)}
|
||||
class="rounded-full border px-3 py-1 text-xs font-semibold uppercase transition {selectedStatuses.includes(status)
|
||||
? 'border-blue-600 bg-blue-50 text-blue-700'
|
||||
: 'border-gray-300 text-gray-600 hover:border-gray-400 hover:text-gray-900'}"
|
||||
>
|
||||
{status}
|
||||
</button>
|
||||
{/each}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex gap-2">
|
||||
<button
|
||||
type="submit"
|
||||
disabled={!filtersDirty()}
|
||||
class="rounded bg-blue-600 px-4 py-2 text-sm font-semibold text-white hover:bg-blue-700 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
Apply filters
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
onclick={resetFilters}
|
||||
class="rounded border border-gray-300 px-4 py-2 text-sm font-semibold text-gray-700 hover:border-gray-400 hover:text-gray-900"
|
||||
>
|
||||
Reset
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
{:else if error && jobs.length === 0}
|
||||
<div class="rounded-md bg-red-50 p-4">
|
||||
<p class="text-sm text-red-800">Error: {error}</p>
|
||||
</form>
|
||||
|
||||
<div class="mb-4 flex flex-col gap-2 text-sm text-gray-600 md:flex-row md:items-center md:justify-between">
|
||||
<p>
|
||||
Showing <span class="font-semibold text-gray-900">{jobs.length}</span> of
|
||||
<span class="font-semibold text-gray-900">{total}</span> jobs
|
||||
</p>
|
||||
{#if hasAppliedFilters()}
|
||||
<p class="text-xs text-gray-500">
|
||||
Active filters:
|
||||
{appliedRepositoryFilter || 'all repositories'}
|
||||
{#if appliedStatuses.length > 0}
|
||||
· {appliedStatuses.join(', ')}
|
||||
{:else}
|
||||
· all statuses
|
||||
{/if}
|
||||
</p>
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
{#if error}
|
||||
<div class="mb-4 rounded-md border border-red-200 bg-red-50 px-4 py-3 text-sm text-red-800">
|
||||
{error}
|
||||
</div>
|
||||
{:else if jobs.length === 0}
|
||||
{/if}
|
||||
|
||||
{#if !loading && jobs.length === 0}
|
||||
<div class="rounded-md bg-gray-50 p-8 text-center">
|
||||
<p class="text-gray-600">
|
||||
No jobs found. Jobs will appear here when repositories are indexed.
|
||||
{hasAppliedFilters()
|
||||
? 'No jobs match the current filters.'
|
||||
: 'No jobs found. Jobs will appear here when repositories are indexed.'}
|
||||
</p>
|
||||
</div>
|
||||
{:else}
|
||||
@@ -259,86 +440,117 @@
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="divide-y divide-gray-200 bg-white">
|
||||
{#each jobs as job (job.id)}
|
||||
{#if loading && jobs.length === 0}
|
||||
<JobSkeleton rows={6} />
|
||||
{:else}
|
||||
{#each jobs as job (job.id)}
|
||||
<tr class="hover:bg-gray-50">
|
||||
<td class="px-6 py-4 text-sm font-medium whitespace-nowrap text-gray-900">
|
||||
{job.repositoryId}
|
||||
{#if job.versionId}
|
||||
<span class="ml-1 text-xs text-gray-500">@{job.versionId}</span>
|
||||
{/if}
|
||||
<div class="mt-1 text-xs text-gray-400">{job.id}</div>
|
||||
</td>
|
||||
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
|
||||
<JobStatusBadge status={job.status} />
|
||||
<JobStatusBadge status={job.status} spinning={job.status === 'running'} />
|
||||
</td>
|
||||
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
|
||||
<div class="flex items-center gap-2">
|
||||
<span>{getStageLabel(job.stage)}</span>
|
||||
{#if job.stageDetail}
|
||||
<span class="text-xs text-gray-400">{job.stageDetail}</span>
|
||||
{/if}
|
||||
</div>
|
||||
</td>
|
||||
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
|
||||
<div class="flex items-center">
|
||||
<span class="mr-2">{job.progress}%</span>
|
||||
<div class="h-2 w-32 rounded-full bg-gray-200">
|
||||
<div
|
||||
class="h-2 rounded-full bg-blue-600 transition-all"
|
||||
style="width: {job.progress}%"
|
||||
></div>
|
||||
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
|
||||
<div class="flex items-center gap-2">
|
||||
<span>{getStageLabel(job.stage)}</span>
|
||||
{#if job.stageDetail}
|
||||
<span class="text-xs text-gray-400">{job.stageDetail}</span>
|
||||
{/if}
|
||||
</div>
|
||||
{#if job.totalFiles > 0}
|
||||
<span class="ml-2 text-xs text-gray-400">
|
||||
{job.processedFiles}/{job.totalFiles} files
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
</td>
|
||||
</td>
|
||||
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
|
||||
<div class="space-y-2">
|
||||
<div class="flex items-center gap-2">
|
||||
<span class="w-12 text-right text-xs font-semibold text-gray-600">{job.progress}%</span>
|
||||
<div class="h-2 w-32 rounded-full bg-gray-200">
|
||||
<div
|
||||
class="h-2 rounded-full bg-blue-600 transition-all"
|
||||
style="width: {job.progress}%"
|
||||
></div>
|
||||
</div>
|
||||
</div>
|
||||
{#if job.totalFiles > 0}
|
||||
<div class="text-xs text-gray-400">
|
||||
{job.processedFiles}/{job.totalFiles} files processed
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</td>
|
||||
<td class="px-6 py-4 text-sm whitespace-nowrap text-gray-500">
|
||||
{formatDate(job.createdAt)}
|
||||
</td>
|
||||
<td class="px-6 py-4 text-right text-sm font-medium whitespace-nowrap">
|
||||
<div class="flex justify-end gap-2">
|
||||
{#if canPause(job.status)}
|
||||
{#if pendingCancelJobId === job.id}
|
||||
<button
|
||||
onclick={() => pauseJob(job.id)}
|
||||
disabled={actionInProgress === job.id}
|
||||
class="rounded bg-yellow-600 px-3 py-1 text-xs font-semibold text-white hover:bg-yellow-700 disabled:opacity-50"
|
||||
type="button"
|
||||
onclick={() => void runJobAction(job, 'cancel')}
|
||||
disabled={isRowBusy(job.id)}
|
||||
class="rounded bg-red-600 px-3 py-1 text-xs font-semibold text-white hover:bg-red-700 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
Pause
|
||||
{rowActions[job.id] === 'cancel' ? 'Cancelling...' : 'Confirm cancel'}
|
||||
</button>
|
||||
{/if}
|
||||
{#if canResume(job.status)}
|
||||
<button
|
||||
onclick={() => resumeJob(job.id)}
|
||||
disabled={actionInProgress === job.id}
|
||||
class="rounded bg-green-600 px-3 py-1 text-xs font-semibold text-white hover:bg-green-700 disabled:opacity-50"
|
||||
type="button"
|
||||
onclick={() => requestCancel(job.id)}
|
||||
disabled={isRowBusy(job.id)}
|
||||
class="rounded border border-gray-300 px-3 py-1 text-xs font-semibold text-gray-700 hover:border-gray-400 hover:text-gray-900 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
Resume
|
||||
Keep job
|
||||
</button>
|
||||
{/if}
|
||||
{#if canCancel(job.status)}
|
||||
<button
|
||||
onclick={() => cancelJob(job.id)}
|
||||
disabled={actionInProgress === job.id}
|
||||
class="rounded bg-red-600 px-3 py-1 text-xs font-semibold text-white hover:bg-red-700 disabled:opacity-50"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
{/if}
|
||||
{#if !canPause(job.status) && !canResume(job.status) && !canCancel(job.status)}
|
||||
<span class="text-xs text-gray-400">—</span>
|
||||
{:else}
|
||||
{#if canPause(job.status)}
|
||||
<button
|
||||
type="button"
|
||||
onclick={() => void runJobAction(job, 'pause')}
|
||||
disabled={isRowBusy(job.id)}
|
||||
class="rounded bg-yellow-600 px-3 py-1 text-xs font-semibold text-white hover:bg-yellow-700 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
{rowActions[job.id] === 'pause' ? 'Pausing...' : 'Pause'}
|
||||
</button>
|
||||
{/if}
|
||||
{#if canResume(job.status)}
|
||||
<button
|
||||
type="button"
|
||||
onclick={() => void runJobAction(job, 'resume')}
|
||||
disabled={isRowBusy(job.id)}
|
||||
class="rounded bg-green-600 px-3 py-1 text-xs font-semibold text-white hover:bg-green-700 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
{rowActions[job.id] === 'resume' ? 'Resuming...' : 'Resume'}
|
||||
</button>
|
||||
{/if}
|
||||
{#if canCancel(job.status)}
|
||||
<button
|
||||
type="button"
|
||||
onclick={() => requestCancel(job.id)}
|
||||
disabled={isRowBusy(job.id)}
|
||||
class="rounded bg-red-600 px-3 py-1 text-xs font-semibold text-white hover:bg-red-700 disabled:cursor-not-allowed disabled:opacity-50"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
{/if}
|
||||
{#if !canPause(job.status) && !canResume(job.status) && !canCancel(job.status)}
|
||||
<span class="text-xs text-gray-400">—</span>
|
||||
{/if}
|
||||
{/if}
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
{/each}
|
||||
{/each}
|
||||
{/if}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
{#if loading}
|
||||
{#if refreshing}
|
||||
<div class="mt-4 text-center text-sm text-gray-500">Refreshing...</div>
|
||||
{/if}
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<Toast bind:toasts={toasts} />
|
||||
|
||||
@@ -15,15 +15,39 @@ import { JobQueue } from '$lib/server/pipeline/job-queue.js';
|
||||
import { handleServiceError } from '$lib/server/utils/validation.js';
|
||||
import type { IndexingJob } from '$lib/types';
|
||||
|
||||
const VALID_JOB_STATUSES: ReadonlySet<IndexingJob['status']> = new Set([
|
||||
'queued',
|
||||
'running',
|
||||
'done',
|
||||
'failed'
|
||||
]);
|
||||
|
||||
function parseStatusFilter(searchValue: string | null): IndexingJob['status'] | Array<IndexingJob['status']> | undefined {
|
||||
if (!searchValue) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const statuses = [...new Set(
|
||||
searchValue
|
||||
.split(',')
|
||||
.map((value) => value.trim())
|
||||
.filter((value): value is IndexingJob['status'] => VALID_JOB_STATUSES.has(value as IndexingJob['status']))
|
||||
)];
|
||||
|
||||
if (statuses.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return statuses.length === 1 ? statuses[0] : statuses;
|
||||
}
|
||||
|
||||
export const GET: RequestHandler = ({ url }) => {
|
||||
try {
|
||||
const db = getClient();
|
||||
const queue = new JobQueue(db);
|
||||
|
||||
const repositoryId = url.searchParams.get('repositoryId') ?? undefined;
|
||||
const status = (url.searchParams.get('status') ?? undefined) as
|
||||
| IndexingJob['status']
|
||||
| undefined;
|
||||
const repositoryId = url.searchParams.get('repositoryId')?.trim() || undefined;
|
||||
const status = parseStatusFilter(url.searchParams.get('status'));
|
||||
const limit = Math.min(parseInt(url.searchParams.get('limit') ?? '20', 10) || 20, 1000);
|
||||
|
||||
const jobs = queue.listJobs({ repositoryId, status, limit });
|
||||
|
||||
@@ -44,7 +44,7 @@ export const GET: RequestHandler = ({ params, request }) => {
|
||||
status: job.status,
|
||||
error: job.error
|
||||
};
|
||||
controller.enqueue(`data: ${JSON.stringify(initialData)}\n\n`);
|
||||
controller.enqueue(`event: job-progress\ndata: ${JSON.stringify(initialData)}\n\n`);
|
||||
|
||||
// Check for Last-Event-ID header for reconnect
|
||||
const lastEventId = request.headers.get('Last-Event-ID');
|
||||
@@ -57,6 +57,13 @@ export const GET: RequestHandler = ({ params, request }) => {
|
||||
|
||||
// Check if job is already done or failed - close immediately after first event
|
||||
if (job.status === 'done' || job.status === 'failed') {
|
||||
if (job.status === 'done') {
|
||||
controller.enqueue(`event: job-done\ndata: ${JSON.stringify({ jobId })}\n\n`);
|
||||
} else {
|
||||
controller.enqueue(
|
||||
`event: job-failed\ndata: ${JSON.stringify({ jobId, error: job.error })}\n\n`
|
||||
);
|
||||
}
|
||||
controller.close();
|
||||
return;
|
||||
}
|
||||
@@ -73,18 +80,29 @@ export const GET: RequestHandler = ({ params, request }) => {
|
||||
controller.enqueue(value);
|
||||
|
||||
// Check if the incoming event indicates job completion
|
||||
if (value.includes('event: done') || value.includes('event: failed')) {
|
||||
if (
|
||||
value.includes('event: job-done') ||
|
||||
value.includes('event: job-failed')
|
||||
) {
|
||||
controller.close();
|
||||
break;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
reader.releaseLock();
|
||||
controller.close();
|
||||
try {
|
||||
controller.close();
|
||||
} catch {
|
||||
// Stream may already be closed after a terminal event.
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('SSE stream error:', err);
|
||||
controller.close();
|
||||
try {
|
||||
controller.close();
|
||||
} catch {
|
||||
// Stream may already be closed.
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -18,6 +18,7 @@ import type { ProgressBroadcaster as BroadcasterType } from '$lib/server/pipelin
|
||||
let db: Database.Database;
|
||||
// Closed over by the vi.mock factory below.
|
||||
let mockBroadcaster: BroadcasterType | null = null;
|
||||
let mockPool: { getStatus: () => object; setMaxConcurrency?: (value: number) => void } | null = null;
|
||||
|
||||
vi.mock('$lib/server/db/client', () => ({
|
||||
getClient: () => db
|
||||
@@ -29,12 +30,12 @@ vi.mock('$lib/server/db/client.js', () => ({
|
||||
|
||||
vi.mock('$lib/server/pipeline/startup', () => ({
|
||||
getQueue: () => null,
|
||||
getPool: () => null
|
||||
getPool: () => mockPool
|
||||
}));
|
||||
|
||||
vi.mock('$lib/server/pipeline/startup.js', () => ({
|
||||
getQueue: () => null,
|
||||
getPool: () => null
|
||||
getPool: () => mockPool
|
||||
}));
|
||||
|
||||
vi.mock('$lib/server/pipeline/progress-broadcaster', async (importOriginal) => {
|
||||
@@ -58,9 +59,11 @@ vi.mock('$lib/server/pipeline/progress-broadcaster.js', async (importOriginal) =
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
import { ProgressBroadcaster } from '$lib/server/pipeline/progress-broadcaster.js';
|
||||
import { GET as getJobsList } from './jobs/+server.js';
|
||||
import { GET as getJobStream } from './jobs/[id]/stream/+server.js';
|
||||
import { GET as getJobsStream } from './jobs/stream/+server.js';
|
||||
import { GET as getIndexingSettings, PUT as putIndexingSettings } from './settings/indexing/+server.js';
|
||||
import { GET as getWorkers } from './workers/+server.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DB factory
|
||||
@@ -306,6 +309,25 @@ describe('GET /api/v1/jobs/:id/stream', () => {
|
||||
// The replay event should include the cached event data
|
||||
expect(fullText).toContain('progress');
|
||||
});
|
||||
|
||||
it('closes after receiving the broadcaster job-done event', async () => {
|
||||
seedRepo(db);
|
||||
const jobId = seedJob(db, { status: 'running', stage: 'parsing', progress: 10 });
|
||||
|
||||
const response = await getJobStream(makeEvent({ params: { id: jobId } }));
|
||||
const reader = response.body!.getReader();
|
||||
|
||||
const initialChunk = await reader.read();
|
||||
expect(String(initialChunk.value ?? '')).toContain('event: job-progress');
|
||||
|
||||
mockBroadcaster!.broadcast(jobId, '/test/repo', 'job-done', { jobId, status: 'done' });
|
||||
|
||||
const completionChunk = await reader.read();
|
||||
expect(String(completionChunk.value ?? '')).toContain('event: job-done');
|
||||
|
||||
const closed = await reader.read();
|
||||
expect(closed.done).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -377,12 +399,125 @@ describe('GET /api/v1/jobs/stream', () => {
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test group 3: GET /api/v1/settings/indexing
|
||||
// Test group 3: GET /api/v1/jobs
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('GET /api/v1/jobs', () => {
|
||||
beforeEach(() => {
|
||||
db = createTestDb();
|
||||
});
|
||||
|
||||
it('supports repository prefix and comma-separated status filters', async () => {
|
||||
seedRepo(db, '/facebook/react');
|
||||
seedRepo(db, '/facebook/react-native');
|
||||
seedRepo(db, '/vitejs/vite');
|
||||
|
||||
seedJob(db, { repository_id: '/facebook/react', status: 'queued' });
|
||||
seedJob(db, { repository_id: '/facebook/react-native', status: 'running' });
|
||||
seedJob(db, { repository_id: '/facebook/react', status: 'done' });
|
||||
seedJob(db, { repository_id: '/vitejs/vite', status: 'queued' });
|
||||
|
||||
const response = await getJobsList(
|
||||
makeEvent<Parameters<typeof getJobsList>[0]>({
|
||||
url: 'http://localhost/api/v1/jobs?repositoryId=%2Ffacebook&status=queued,%20running'
|
||||
})
|
||||
);
|
||||
const body = await response.json();
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(body.total).toBe(2);
|
||||
expect(body.jobs).toHaveLength(2);
|
||||
expect(body.jobs.map((job: { repositoryId: string }) => job.repositoryId).sort()).toEqual([
|
||||
'/facebook/react',
|
||||
'/facebook/react-native'
|
||||
]);
|
||||
expect(body.jobs.map((job: { status: string }) => job.status).sort()).toEqual([
|
||||
'queued',
|
||||
'running'
|
||||
]);
|
||||
});
|
||||
|
||||
it('keeps exact-match behavior for specific repository IDs', async () => {
|
||||
seedRepo(db, '/facebook/react');
|
||||
seedRepo(db, '/facebook/react-native');
|
||||
|
||||
seedJob(db, { repository_id: '/facebook/react', status: 'queued' });
|
||||
seedJob(db, { repository_id: '/facebook/react-native', status: 'queued' });
|
||||
|
||||
const response = await getJobsList(
|
||||
makeEvent<Parameters<typeof getJobsList>[0]>({
|
||||
url: 'http://localhost/api/v1/jobs?repositoryId=%2Ffacebook%2Freact&status=queued'
|
||||
})
|
||||
);
|
||||
const body = await response.json();
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(body.total).toBe(1);
|
||||
expect(body.jobs).toHaveLength(1);
|
||||
expect(body.jobs[0].repositoryId).toBe('/facebook/react');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test group 4: GET /api/v1/workers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('GET /api/v1/workers', () => {
|
||||
beforeEach(() => {
|
||||
mockPool = null;
|
||||
});
|
||||
|
||||
it('returns 503 when the worker pool is not initialized', async () => {
|
||||
const response = await getWorkers(makeEvent<Parameters<typeof getWorkers>[0]>({}));
|
||||
|
||||
expect(response.status).toBe(503);
|
||||
});
|
||||
|
||||
it('returns the current worker status snapshot', async () => {
|
||||
mockPool = {
|
||||
getStatus: () => ({
|
||||
concurrency: 2,
|
||||
active: 1,
|
||||
idle: 1,
|
||||
workers: [
|
||||
{
|
||||
index: 0,
|
||||
state: 'running',
|
||||
jobId: 'job-1',
|
||||
repositoryId: '/test/repo',
|
||||
versionId: null
|
||||
},
|
||||
{
|
||||
index: 1,
|
||||
state: 'idle',
|
||||
jobId: null,
|
||||
repositoryId: null,
|
||||
versionId: null
|
||||
}
|
||||
]
|
||||
})
|
||||
};
|
||||
|
||||
const response = await getWorkers(makeEvent<Parameters<typeof getWorkers>[0]>({}));
|
||||
const body = await response.json();
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(body.active).toBe(1);
|
||||
expect(body.workers[0].jobId).toBe('job-1');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test group 5: GET /api/v1/settings/indexing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('GET /api/v1/settings/indexing', () => {
|
||||
beforeEach(() => {
|
||||
db = createTestDb();
|
||||
mockPool = {
|
||||
getStatus: () => ({ concurrency: 2, active: 0, idle: 2, workers: [] }),
|
||||
setMaxConcurrency: vi.fn()
|
||||
};
|
||||
});
|
||||
|
||||
it('returns { concurrency: 2 } when no setting exists in DB', async () => {
|
||||
@@ -417,12 +552,16 @@ describe('GET /api/v1/settings/indexing', () => {
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test group 4: PUT /api/v1/settings/indexing
|
||||
// Test group 6: PUT /api/v1/settings/indexing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('PUT /api/v1/settings/indexing', () => {
|
||||
beforeEach(() => {
|
||||
db = createTestDb();
|
||||
mockPool = {
|
||||
getStatus: () => ({ concurrency: 2, active: 0, idle: 2, workers: [] }),
|
||||
setMaxConcurrency: vi.fn()
|
||||
};
|
||||
});
|
||||
|
||||
function makePutEvent(body: unknown) {
|
||||
|
||||
16
src/routes/api/v1/workers/+server.ts
Normal file
16
src/routes/api/v1/workers/+server.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
import type { RequestHandler } from './$types';
|
||||
import { getPool } from '$lib/server/pipeline/startup.js';
|
||||
import { handleServiceError } from '$lib/server/utils/validation.js';
|
||||
|
||||
export const GET: RequestHandler = () => {
|
||||
try {
|
||||
const pool = getPool();
|
||||
if (!pool) {
|
||||
return new Response('Service unavailable', { status: 503 });
|
||||
}
|
||||
|
||||
return Response.json(pool.getStatus());
|
||||
} catch (error) {
|
||||
return handleServiceError(error);
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user