fix: harden queue lifecycle and publish gate
- Preserve phase results on partial retry and keep interrupted phase context after restart. - Avoid webhook bookkeeping crashes when retention deletes stale jobs. - Add deeper unit, integration, and e2e coverage around queue seams. - Require verify job to pass before publish runs. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
125
src/JobQueue.ts
125
src/JobQueue.ts
@@ -74,6 +74,7 @@ export class JobQueue<TData extends JobData = JobData> {
|
||||
private readonly handlers = new Map<string, PhaseHandler<TData>>();
|
||||
private readonly serializer = new SseSerializer();
|
||||
private readonly controllers = new Map<string, AbortController>();
|
||||
private readonly pendingWebhookDispatches = new Set<Promise<void>>();
|
||||
private readonly webhookDispatcher?: WebhookDispatcher<TData>;
|
||||
private readonly retentionScheduler?: RetentionScheduler<TData>;
|
||||
private wakeupTimer: NodeJS.Timeout | null = null;
|
||||
@@ -177,6 +178,7 @@ export class JobQueue<TData extends JobData = JobData> {
|
||||
const retried = this.storage.resetForRetry(
|
||||
id,
|
||||
phases,
|
||||
options.fromStart === false ? existing.phaseResults : {},
|
||||
existing.maxAttempts,
|
||||
options.scheduledAt instanceof Date ? options.scheduledAt.toISOString() : options.scheduledAt ?? null,
|
||||
);
|
||||
@@ -194,22 +196,14 @@ export class JobQueue<TData extends JobData = JobData> {
|
||||
return job;
|
||||
}
|
||||
|
||||
const phases = clonePhases(job.phases);
|
||||
const activePhaseIndex = phases.findIndex((phase) => phase.status === 'active');
|
||||
if (activePhaseIndex >= 0) {
|
||||
phases[activePhaseIndex] = {
|
||||
...phases[activePhaseIndex],
|
||||
status: 'cancelled',
|
||||
completedAt: now(),
|
||||
};
|
||||
}
|
||||
const phases = this.markUnfinishedPhasesCancelled(clonePhases(job.phases));
|
||||
|
||||
const controller = this.controllers.get(id);
|
||||
controller?.abort(new CancellationError());
|
||||
|
||||
const cancelled = this.storage.cancelJob(id, phases);
|
||||
this.events.emit('job:cancelled', cancelled);
|
||||
void this.dispatchWebhook('job:cancelled', cancelled);
|
||||
this.queueWebhookDispatch('job:cancelled', cancelled);
|
||||
this.requestPump();
|
||||
|
||||
return cancelled;
|
||||
@@ -378,9 +372,33 @@ export class JobQueue<TData extends JobData = JobData> {
|
||||
this.wakeupTimer = null;
|
||||
}
|
||||
|
||||
await this.workerPool.drain(timeoutMs);
|
||||
this.events.removeAllListeners();
|
||||
this.storage.close();
|
||||
let drainError: unknown;
|
||||
|
||||
try {
|
||||
await this.workerPool.drain(timeoutMs);
|
||||
} catch (error) {
|
||||
drainError = error;
|
||||
|
||||
for (const controller of this.controllers.values()) {
|
||||
if (!controller.signal.aborted) {
|
||||
controller.abort(new CancellationError('Job queue shut down before active jobs finished'));
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await this.workerPool.drain(250);
|
||||
} catch {
|
||||
// Best effort: cleanup still needs to run after timeout.
|
||||
}
|
||||
} finally {
|
||||
await this.drainWebhookDispatches();
|
||||
this.events.removeAllListeners();
|
||||
this.storage.close();
|
||||
}
|
||||
|
||||
if (drainError) {
|
||||
throw drainError;
|
||||
}
|
||||
}
|
||||
|
||||
private async pump(): Promise<void> {
|
||||
@@ -508,19 +526,18 @@ export class JobQueue<TData extends JobData = JobData> {
|
||||
this.events.emit('job:phase:completed', updated, completedPhase);
|
||||
return updated;
|
||||
},
|
||||
onCancelled: async (phaseName, phases) => {
|
||||
const activePhases = clonePhases(phases);
|
||||
if (phaseName) {
|
||||
const active = activePhases.find((phase) => phase.name === phaseName);
|
||||
if (active) {
|
||||
active.status = 'cancelled';
|
||||
active.completedAt = now();
|
||||
}
|
||||
onCancelled: async (_phaseName, phases) => {
|
||||
const current = this.requireJob(jobId);
|
||||
if (current.status === 'cancelled') {
|
||||
return;
|
||||
}
|
||||
|
||||
const cancelled = this.storage.cancelJob(jobId, activePhases);
|
||||
const cancelled = this.storage.cancelJob(
|
||||
jobId,
|
||||
this.markUnfinishedPhasesCancelled(clonePhases(phases)),
|
||||
);
|
||||
this.events.emit('job:cancelled', cancelled);
|
||||
void this.dispatchWebhook('job:cancelled', cancelled);
|
||||
this.queueWebhookDispatch('job:cancelled', cancelled);
|
||||
},
|
||||
});
|
||||
|
||||
@@ -533,14 +550,17 @@ export class JobQueue<TData extends JobData = JobData> {
|
||||
|
||||
const completed = this.storage.completeJob(jobId, result.phases, result.phaseResults);
|
||||
this.events.emit('job:completed', completed);
|
||||
void this.dispatchWebhook('job:completed', completed);
|
||||
this.queueWebhookDispatch('job:completed', completed);
|
||||
} catch (error) {
|
||||
if (error instanceof CancellationError) {
|
||||
const current = this.requireJob(jobId);
|
||||
if (current.status !== 'cancelled') {
|
||||
const cancelled = this.storage.cancelJob(jobId, clonePhases(current.phases));
|
||||
const cancelled = this.storage.cancelJob(
|
||||
jobId,
|
||||
this.markUnfinishedPhasesCancelled(clonePhases(current.phases)),
|
||||
);
|
||||
this.events.emit('job:cancelled', cancelled);
|
||||
void this.dispatchWebhook('job:cancelled', cancelled);
|
||||
this.queueWebhookDispatch('job:cancelled', cancelled);
|
||||
}
|
||||
|
||||
return;
|
||||
@@ -596,7 +616,7 @@ export class JobQueue<TData extends JobData = JobData> {
|
||||
};
|
||||
|
||||
this.events.emit('job:retrying', pending, retry);
|
||||
void this.dispatchWebhook('job:retrying', pending);
|
||||
this.queueWebhookDispatch('job:retrying', pending);
|
||||
this.requestPump();
|
||||
return;
|
||||
}
|
||||
@@ -608,7 +628,7 @@ export class JobQueue<TData extends JobData = JobData> {
|
||||
|
||||
const failed = this.storage.failJob(jobId, phases, failure);
|
||||
this.events.emit('job:failed', failed, failure);
|
||||
void this.dispatchWebhook('job:failed', failed);
|
||||
this.queueWebhookDispatch('job:failed', failed);
|
||||
}
|
||||
|
||||
private async markStaleJobs(cutoffIso: string): Promise<JobRecord<TData>[]> {
|
||||
@@ -620,7 +640,7 @@ export class JobQueue<TData extends JobData = JobData> {
|
||||
}
|
||||
|
||||
this.events.emit('job:stale', job);
|
||||
void this.dispatchWebhook('job:stale', job);
|
||||
this.queueWebhookDispatch('job:stale', job);
|
||||
}
|
||||
|
||||
return staleJobs;
|
||||
@@ -648,8 +668,10 @@ export class JobQueue<TData extends JobData = JobData> {
|
||||
try {
|
||||
const result = await this.webhookDispatcher.dispatch(event, job);
|
||||
this.storage.markWebhookSent(job.id);
|
||||
const refreshed = this.requireJob(job.id);
|
||||
this.events.emit('job:webhook:delivered', refreshed, result);
|
||||
const refreshed = this.getJob(job.id);
|
||||
if (refreshed) {
|
||||
this.events.emit('job:webhook:delivered', refreshed, result);
|
||||
}
|
||||
} catch (error) {
|
||||
const dispatchError =
|
||||
error instanceof Error && 'dispatchError' in error
|
||||
@@ -666,11 +688,48 @@ export class JobQueue<TData extends JobData = JobData> {
|
||||
finalAttempt: 1,
|
||||
};
|
||||
|
||||
const refreshed = this.requireJob(job.id);
|
||||
this.events.emit('job:webhook:failed', refreshed, dispatchError);
|
||||
const refreshed = this.getJob(job.id);
|
||||
if (refreshed) {
|
||||
this.events.emit('job:webhook:failed', refreshed, dispatchError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private queueWebhookDispatch(event: WebhookEventName, job: JobRecord<TData>): void {
|
||||
if (!this.webhookDispatcher?.supports(event, job)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const pending = this.dispatchWebhook(event, job).finally(() => {
|
||||
this.pendingWebhookDispatches.delete(pending);
|
||||
});
|
||||
|
||||
this.pendingWebhookDispatches.add(pending);
|
||||
}
|
||||
|
||||
private async drainWebhookDispatches(): Promise<void> {
|
||||
while (this.pendingWebhookDispatches.size > 0) {
|
||||
await Promise.allSettled([...this.pendingWebhookDispatches]);
|
||||
}
|
||||
}
|
||||
|
||||
private markUnfinishedPhasesCancelled(phases: JobPhaseState[]): JobPhaseState[] {
|
||||
const cancelledAt = now();
|
||||
|
||||
return phases.map((phase) => {
|
||||
if (phase.status === 'completed' || phase.status === 'failed' || phase.status === 'cancelled') {
|
||||
return phase;
|
||||
}
|
||||
|
||||
return {
|
||||
...phase,
|
||||
status: 'cancelled',
|
||||
completedAt: phase.completedAt ?? cancelledAt,
|
||||
error: null,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
private pushJobEvent(
|
||||
controller: ReadableStreamDefaultController<Uint8Array>,
|
||||
encoder: { encode: (input?: string) => Uint8Array },
|
||||
|
||||
Reference in New Issue
Block a user