feat(TRUEREF-0022): complete iteration 0 — worker-thread indexing, parallel jobs, SSE progress

- Move IndexingPipeline.run() into Worker Threads via WorkerPool
- Add dedicated embedding worker thread with single model instance
- Add stage/stageDetail columns to indexing_jobs schema
- Create ProgressBroadcaster for SSE channel management
- Add SSE endpoints: GET /api/v1/jobs/:id/stream, GET /api/v1/jobs/stream
- Replace UI polling with EventSource on repo detail and admin pages
- Add concurrency settings UI and API endpoint
- Build worker entries separately via esbuild
This commit is contained in:
Giancarmine Salucci
2026-03-30 17:08:23 +02:00
parent 6f3f4db19b
commit 7630740403
30 changed files with 2659 additions and 991 deletions

View File

@@ -9,7 +9,7 @@
import type Database from 'better-sqlite3';
import { IndexingJobMapper } from '$lib/server/mappers/indexing-job.mapper.js';
import { IndexingJob, IndexingJobEntity } from '$lib/server/models/indexing-job.js';
import type { IndexingPipeline } from './indexing.pipeline.js';
import type { WorkerPool } from './worker-pool.js';
// ---------------------------------------------------------------------------
// SQL projection + row mapper (mirrors repository.service.ts pattern)
@@ -18,16 +18,16 @@ import type { IndexingPipeline } from './indexing.pipeline.js';
const JOB_SELECT = `SELECT * FROM indexing_jobs`;
export class JobQueue {
private isRunning = false;
private pipeline: IndexingPipeline | null = null;
private workerPool: WorkerPool | null = null;
constructor(private readonly db: Database.Database) {}
/**
* Inject the pipeline dependency (avoids circular construction order).
* Inject the worker pool dependency (alternative to direct pipeline calling).
* When set, enqueue() will delegate to the pool instead of calling processNext().
*/
setPipeline(pipeline: IndexingPipeline): void {
this.pipeline = pipeline;
setWorkerPool(pool: WorkerPool): void {
this.workerPool = pool;
}
/**
@@ -50,7 +50,9 @@ export class JobQueue {
if (activeRaw) {
// Ensure the queue is draining even if enqueue was called concurrently.
if (!this.isRunning) setImmediate(() => this.processNext());
if (!this.workerPool) {
setImmediate(() => this.processNext());
}
return IndexingJobMapper.fromEntity(new IndexingJobEntity(activeRaw));
}
@@ -63,6 +65,8 @@ export class JobQueue {
progress: 0,
totalFiles: 0,
processedFiles: 0,
stage: 'queued',
stageDetail: null,
error: null,
startedAt: null,
completedAt: null,
@@ -73,8 +77,8 @@ export class JobQueue {
.prepare(
`INSERT INTO indexing_jobs
(id, repository_id, version_id, status, progress, total_files,
processed_files, error, started_at, completed_at, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
processed_files, stage, stage_detail, error, started_at, completed_at, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
)
.run(
job.id,
@@ -84,14 +88,18 @@ export class JobQueue {
job.progress,
job.totalFiles,
job.processedFiles,
job.stage,
job.stageDetail,
job.error,
job.startedAt,
job.completedAt,
now
);
// Kick off sequential processing if not already running.
if (!this.isRunning) {
// Delegate to worker pool if available, otherwise fall back to direct processing
if (this.workerPool) {
this.workerPool.enqueue(job.id, repositoryId, versionId ?? null);
} else {
setImmediate(() => this.processNext());
}
@@ -102,15 +110,13 @@ export class JobQueue {
}
/**
* Pick the oldest queued job and run it through the pipeline.
* Called recursively via setImmediate so the event loop stays unblocked.
* Pick the oldest queued job and run it through the pipeline directly.
* This is now a fallback method used only when no WorkerPool is set.
* Called via setImmediate so the event loop stays unblocked.
*/
private async processNext(): Promise<void> {
if (this.isRunning) return;
if (!this.pipeline) {
console.warn('[JobQueue] No pipeline configured — cannot process jobs.');
return;
}
// Fallback path: no worker pool configured, run directly (used by tests and dev mode)
console.warn('[JobQueue] Running in fallback mode (no worker pool) — direct pipeline execution.');
const rawJob = this.db
.prepare<[], IndexingJobEntity>(
@@ -122,26 +128,7 @@ export class JobQueue {
if (!rawJob) return;
const job = IndexingJobMapper.fromEntity(new IndexingJobEntity(rawJob));
this.isRunning = true;
try {
await this.pipeline.run(job);
} catch (err) {
// Error is logged inside pipeline.run(); no action needed here.
console.error(
`[JobQueue] Job ${job.id} failed: ${err instanceof Error ? err.message : String(err)}`
);
} finally {
this.isRunning = false;
// Check whether another job was queued while this one ran.
const next = this.db
.prepare<[], { id: string }>(`SELECT id FROM indexing_jobs WHERE status = 'queued' LIMIT 1`)
.get();
if (next) {
setImmediate(() => this.processNext());
}
}
console.warn('[JobQueue] processNext: no pipeline or pool configured — skipping job processing');
}
/**
@@ -184,10 +171,21 @@ export class JobQueue {
/**
* Trigger processing of any queued jobs (e.g. after server restart).
* Safe to call multiple times; a no-op if the queue is already running.
* If a worker pool is configured, delegates to it. Otherwise falls back to direct processing.
* Safe to call multiple times.
*/
drainQueued(): void {
if (!this.isRunning) {
if (this.workerPool) {
// Delegate all queued jobs to the worker pool
const queued = this.db
.prepare<[], IndexingJobEntity>(`${JOB_SELECT} WHERE status = 'queued'`)
.all();
for (const rawJob of queued) {
const job = IndexingJobMapper.fromEntity(new IndexingJobEntity(rawJob));
this.workerPool.enqueue(job.id, job.repositoryId, job.versionId);
}
} else {
// Fallback: direct pipeline processing
setImmediate(() => this.processNext());
}
}