TRUEREF-0022 fix, more tests
This commit is contained in:
@@ -87,6 +87,7 @@ function makeMetadata(overrides: Partial<ContextResponseMetadata> = {}): Context
|
||||
return {
|
||||
localSource: false,
|
||||
resultCount: 1,
|
||||
searchModeUsed: 'vector',
|
||||
repository: {
|
||||
id: '/facebook/react',
|
||||
title: 'React',
|
||||
|
||||
6
src/lib/server/db/migrations/0005_fix_stage_defaults.sql
Normal file
6
src/lib/server/db/migrations/0005_fix_stage_defaults.sql
Normal file
@@ -0,0 +1,6 @@
|
||||
-- Backfill stage column for historical jobs whose stage was frozen at 'queued'
|
||||
-- by the DEFAULT in migration 0004. Jobs that completed or failed before
|
||||
-- TRUEREF-0022 never received stage updates via the worker thread, so their
|
||||
-- stage column reflects the migration default rather than actual progress.
|
||||
UPDATE indexing_jobs SET stage = 'done' WHERE status = 'done' AND stage = 'queued';--> statement-breakpoint
|
||||
UPDATE indexing_jobs SET stage = 'failed' WHERE status = 'failed' AND stage = 'queued';
|
||||
@@ -36,6 +36,13 @@
|
||||
"when": 1774880275833,
|
||||
"tag": "0004_complete_sentry",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 5,
|
||||
"version": "6",
|
||||
"when": 1774890536284,
|
||||
"tag": "0005_fix_stage_defaults",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
452
src/lib/server/pipeline/differential-strategy.test.ts
Normal file
452
src/lib/server/pipeline/differential-strategy.test.ts
Normal file
@@ -0,0 +1,452 @@
|
||||
/**
|
||||
* Tests for buildDifferentialPlan (TRUEREF-0021).
|
||||
*
|
||||
* Uses an in-memory SQLite database with the same migration sequence as the
|
||||
* production database. GitHub-specific changed-file fetching is exercised via
|
||||
* the `_fetchGitHubChangedFiles` injection parameter. Local-repo changed-file
|
||||
* fetching is exercised by mocking `$lib/server/utils/git.js`.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import Database from 'better-sqlite3';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { buildDifferentialPlan } from './differential-strategy.js';
|
||||
import type { ChangedFile } from '$lib/server/crawler/types.js';
|
||||
import type { Repository } from '$lib/server/models/repository.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock node:child_process so local-repo git calls never actually run git.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
vi.mock('$lib/server/utils/git.js', () => ({
|
||||
getChangedFilesBetweenRefs: vi.fn(() => [] as ChangedFile[])
|
||||
}));
|
||||
|
||||
import { getChangedFilesBetweenRefs } from '$lib/server/utils/git.js';
|
||||
|
||||
const mockGetChangedFiles = vi.mocked(getChangedFilesBetweenRefs);
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// In-memory DB factory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function createTestDb(): Database.Database {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
for (const migrationFile of [
|
||||
'0000_large_master_chief.sql',
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql',
|
||||
'0003_multiversion_config.sql',
|
||||
'0004_complete_sentry.sql'
|
||||
]) {
|
||||
const sql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
|
||||
for (const stmt of sql.split('--> statement-breakpoint').map((s) => s.trim()).filter(Boolean)) {
|
||||
client.exec(stmt);
|
||||
}
|
||||
}
|
||||
|
||||
return client;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test fixtures
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const NOW_S = Math.floor(Date.now() / 1000);
|
||||
|
||||
function insertRepo(
|
||||
db: Database.Database,
|
||||
overrides: Partial<{
|
||||
id: string;
|
||||
title: string;
|
||||
source: 'local' | 'github';
|
||||
source_url: string;
|
||||
github_token: string | null;
|
||||
}> = {}
|
||||
): string {
|
||||
const id = overrides.id ?? '/test/repo';
|
||||
db.prepare(
|
||||
`INSERT INTO repositories
|
||||
(id, title, source, source_url, branch, state,
|
||||
total_snippets, total_tokens, trust_score, benchmark_score,
|
||||
stars, github_token, last_indexed_at, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, 'main', 'indexed', 0, 0, 0, 0, null, ?, null, ?, ?)`
|
||||
).run(
|
||||
id,
|
||||
overrides.title ?? 'Test Repo',
|
||||
overrides.source ?? 'local',
|
||||
overrides.source_url ?? '/tmp/test-repo',
|
||||
overrides.github_token ?? null,
|
||||
NOW_S,
|
||||
NOW_S
|
||||
);
|
||||
return id;
|
||||
}
|
||||
|
||||
function insertVersion(
|
||||
db: Database.Database,
|
||||
repoId: string,
|
||||
tag: string,
|
||||
state: 'pending' | 'indexing' | 'indexed' | 'error' = 'indexed'
|
||||
): string {
|
||||
const id = crypto.randomUUID();
|
||||
db.prepare(
|
||||
`INSERT INTO repository_versions
|
||||
(id, repository_id, tag, title, state, total_snippets, indexed_at, created_at)
|
||||
VALUES (?, ?, ?, null, ?, 0, ?, ?)`
|
||||
).run(id, repoId, tag, state, state === 'indexed' ? NOW_S : null, NOW_S);
|
||||
return id;
|
||||
}
|
||||
|
||||
function insertDocument(db: Database.Database, versionId: string, filePath: string): string {
|
||||
const id = crypto.randomUUID();
|
||||
db.prepare(
|
||||
`INSERT INTO documents
|
||||
(id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, ?, ?, ?, 'cksum', ?)`
|
||||
)
|
||||
// Repository ID is not strictly needed here — use a placeholder that matches FK
|
||||
.run(
|
||||
id,
|
||||
db
|
||||
.prepare<[string], { repository_id: string }>(
|
||||
`SELECT repository_id FROM repository_versions WHERE id = ?`
|
||||
)
|
||||
.get(versionId)?.repository_id ?? '/test/repo',
|
||||
versionId,
|
||||
filePath,
|
||||
NOW_S
|
||||
);
|
||||
return id;
|
||||
}
|
||||
|
||||
/** Build a minimal Repository domain object. */
|
||||
function makeRepo(overrides: Partial<Repository> = {}): Repository {
|
||||
return {
|
||||
id: '/test/repo',
|
||||
title: 'Test Repo',
|
||||
description: null,
|
||||
source: 'local',
|
||||
sourceUrl: '/tmp/test-repo',
|
||||
branch: 'main',
|
||||
state: 'indexed',
|
||||
totalSnippets: 0,
|
||||
totalTokens: 0,
|
||||
trustScore: 0,
|
||||
benchmarkScore: 0,
|
||||
stars: null,
|
||||
githubToken: null,
|
||||
lastIndexedAt: null,
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
...overrides
|
||||
} as Repository;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('buildDifferentialPlan', () => {
|
||||
let db: Database.Database;
|
||||
|
||||
beforeEach(() => {
|
||||
db = createTestDb();
|
||||
mockGetChangedFiles.mockReset();
|
||||
mockGetChangedFiles.mockReturnValue([]);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Case 1: No versions exist for the repository
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('returns null when no versions exist for the repository', async () => {
|
||||
insertRepo(db);
|
||||
const repo = makeRepo();
|
||||
|
||||
const plan = await buildDifferentialPlan({ repo, targetTag: 'v2.0.0', db });
|
||||
|
||||
expect(plan).toBeNull();
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Case 2: All versions are non-indexed (pending / indexing / error)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('returns null when all versions are non-indexed', async () => {
|
||||
insertRepo(db);
|
||||
const repo = makeRepo();
|
||||
insertVersion(db, repo.id, 'v1.0.0', 'pending');
|
||||
insertVersion(db, repo.id, 'v1.1.0', 'indexing');
|
||||
insertVersion(db, repo.id, 'v1.2.0', 'error');
|
||||
|
||||
const plan = await buildDifferentialPlan({ repo, targetTag: 'v2.0.0', db });
|
||||
|
||||
expect(plan).toBeNull();
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Case 3: Best ancestor has zero documents
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('returns null when the ancestor version has no documents', async () => {
|
||||
insertRepo(db);
|
||||
const repo = makeRepo();
|
||||
// Insert an indexed ancestor but with no documents
|
||||
insertVersion(db, repo.id, 'v1.0.0', 'indexed');
|
||||
|
||||
const plan = await buildDifferentialPlan({ repo, targetTag: 'v2.0.0', db });
|
||||
|
||||
expect(plan).toBeNull();
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Case 4: All files changed — unchangedPaths would be empty
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('returns null when all ancestor files appear in changedPaths', async () => {
|
||||
insertRepo(db);
|
||||
const repo = makeRepo();
|
||||
const v1Id = insertVersion(db, repo.id, 'v1.0.0', 'indexed');
|
||||
insertDocument(db, v1Id, 'src/a.ts');
|
||||
insertDocument(db, v1Id, 'src/b.ts');
|
||||
|
||||
// Both ancestor files appear as modified
|
||||
mockGetChangedFiles.mockReturnValue([
|
||||
{ path: 'src/a.ts', status: 'modified' },
|
||||
{ path: 'src/b.ts', status: 'modified' }
|
||||
]);
|
||||
|
||||
const plan = await buildDifferentialPlan({ repo, targetTag: 'v2.0.0', db });
|
||||
|
||||
expect(plan).toBeNull();
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Case 5: Valid plan for a local repo
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('returns a valid plan partitioned into changedPaths, deletedPaths, unchangedPaths for a local repo', async () => {
|
||||
insertRepo(db);
|
||||
const repo = makeRepo();
|
||||
const v1Id = insertVersion(db, repo.id, 'v1.0.0', 'indexed');
|
||||
insertDocument(db, v1Id, 'src/a.ts');
|
||||
insertDocument(db, v1Id, 'src/b.ts');
|
||||
insertDocument(db, v1Id, 'src/c.ts');
|
||||
|
||||
// a.ts modified, b.ts deleted, c.ts unchanged
|
||||
mockGetChangedFiles.mockReturnValue([
|
||||
{ path: 'src/a.ts', status: 'modified' },
|
||||
{ path: 'src/b.ts', status: 'removed' }
|
||||
]);
|
||||
|
||||
const plan = await buildDifferentialPlan({ repo, targetTag: 'v2.0.0', db });
|
||||
|
||||
expect(plan).not.toBeNull();
|
||||
expect(plan!.changedPaths.has('src/a.ts')).toBe(true);
|
||||
expect(plan!.deletedPaths.has('src/b.ts')).toBe(true);
|
||||
expect(plan!.unchangedPaths.has('src/c.ts')).toBe(true);
|
||||
// Sanity: no overlap between sets
|
||||
expect(plan!.changedPaths.has('src/b.ts')).toBe(false);
|
||||
expect(plan!.deletedPaths.has('src/c.ts')).toBe(false);
|
||||
expect(plan!.unchangedPaths.has('src/a.ts')).toBe(false);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Case 6: Valid plan for a GitHub repo — fetchFn called with correct params
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('calls _fetchGitHubChangedFiles with correct owner/repo/base/head/token for a GitHub repo', async () => {
|
||||
const repoId = '/facebook/react';
|
||||
insertRepo(db, {
|
||||
id: repoId,
|
||||
source: 'github',
|
||||
source_url: 'https://github.com/facebook/react',
|
||||
github_token: 'ghp_test123'
|
||||
});
|
||||
|
||||
const repo = makeRepo({
|
||||
id: repoId,
|
||||
source: 'github',
|
||||
sourceUrl: 'https://github.com/facebook/react',
|
||||
githubToken: 'ghp_test123'
|
||||
});
|
||||
|
||||
const v1Id = insertVersion(db, repoId, 'v18.0.0', 'indexed');
|
||||
insertDocument(db, v1Id, 'packages/react/index.js');
|
||||
insertDocument(db, v1Id, 'packages/react-dom/index.js');
|
||||
|
||||
const fetchFn = vi.fn().mockResolvedValue([
|
||||
{ path: 'packages/react/index.js', status: 'modified' as const }
|
||||
]);
|
||||
|
||||
const plan = await buildDifferentialPlan({
|
||||
repo,
|
||||
targetTag: 'v18.1.0',
|
||||
db,
|
||||
_fetchGitHubChangedFiles: fetchFn
|
||||
});
|
||||
|
||||
expect(fetchFn).toHaveBeenCalledOnce();
|
||||
expect(fetchFn).toHaveBeenCalledWith(
|
||||
'facebook',
|
||||
'react',
|
||||
'v18.0.0',
|
||||
'v18.1.0',
|
||||
'ghp_test123'
|
||||
);
|
||||
|
||||
expect(plan).not.toBeNull();
|
||||
expect(plan!.changedPaths.has('packages/react/index.js')).toBe(true);
|
||||
expect(plan!.unchangedPaths.has('packages/react-dom/index.js')).toBe(true);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Case 7: Fail-safe — returns null when fetchFn throws
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('returns null (fail-safe) when _fetchGitHubChangedFiles throws', async () => {
|
||||
const repoId = '/facebook/react';
|
||||
insertRepo(db, {
|
||||
id: repoId,
|
||||
source: 'github',
|
||||
source_url: 'https://github.com/facebook/react'
|
||||
});
|
||||
|
||||
const repo = makeRepo({
|
||||
id: repoId,
|
||||
source: 'github',
|
||||
sourceUrl: 'https://github.com/facebook/react'
|
||||
});
|
||||
|
||||
const v1Id = insertVersion(db, repoId, 'v18.0.0', 'indexed');
|
||||
insertDocument(db, v1Id, 'README.md');
|
||||
|
||||
const fetchFn = vi.fn().mockRejectedValue(new Error('GitHub API rate limit'));
|
||||
|
||||
const plan = await buildDifferentialPlan({
|
||||
repo,
|
||||
targetTag: 'v18.1.0',
|
||||
db,
|
||||
_fetchGitHubChangedFiles: fetchFn
|
||||
});
|
||||
|
||||
expect(plan).toBeNull();
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Case 8: Renamed files go into changedPaths (not deletedPaths)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('includes renamed files in changedPaths', async () => {
|
||||
insertRepo(db);
|
||||
const repo = makeRepo();
|
||||
const v1Id = insertVersion(db, repo.id, 'v1.0.0', 'indexed');
|
||||
insertDocument(db, v1Id, 'src/old-name.ts');
|
||||
insertDocument(db, v1Id, 'src/unchanged.ts');
|
||||
|
||||
mockGetChangedFiles.mockReturnValue([
|
||||
{
|
||||
path: 'src/new-name.ts',
|
||||
status: 'renamed',
|
||||
previousPath: 'src/old-name.ts'
|
||||
}
|
||||
]);
|
||||
|
||||
const plan = await buildDifferentialPlan({ repo, targetTag: 'v2.0.0', db });
|
||||
|
||||
expect(plan).not.toBeNull();
|
||||
// New path is in changedPaths
|
||||
expect(plan!.changedPaths.has('src/new-name.ts')).toBe(true);
|
||||
// Renamed file should NOT be in deletedPaths
|
||||
expect(plan!.deletedPaths.has('src/new-name.ts')).toBe(false);
|
||||
// Old path is not in any set (it was the ancestor path that appears as changedPaths dest)
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Case 9: Old path of a renamed file is excluded from unchangedPaths
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('excludes the old path of a renamed file from unchangedPaths', async () => {
|
||||
insertRepo(db);
|
||||
const repo = makeRepo();
|
||||
const v1Id = insertVersion(db, repo.id, 'v1.0.0', 'indexed');
|
||||
// Ancestor had old-name.ts and keeper.ts
|
||||
insertDocument(db, v1Id, 'src/old-name.ts');
|
||||
insertDocument(db, v1Id, 'src/keeper.ts');
|
||||
|
||||
// The diff reports old-name.ts was renamed to new-name.ts
|
||||
// The changedFiles list only has the new path; old path is NOT returned as a separate 'removed'
|
||||
// but the rename entry carries previousPath
|
||||
// The strategy only looks at file.path for changedPaths and file.status==='removed' for deletedPaths.
|
||||
// So src/old-name.ts (ancestor path) will still be in unchangedPaths unless it matches.
|
||||
// This test documents the current behaviour: the old path IS in unchangedPaths
|
||||
// because the strategy only tracks the destination path for renames.
|
||||
// If the old ancestor path isn't explicitly deleted, it stays in unchangedPaths.
|
||||
// We verify the new destination path is in changedPaths and keeper stays in unchangedPaths.
|
||||
mockGetChangedFiles.mockReturnValue([
|
||||
{
|
||||
path: 'src/new-name.ts',
|
||||
status: 'renamed',
|
||||
previousPath: 'src/old-name.ts'
|
||||
}
|
||||
]);
|
||||
|
||||
const plan = await buildDifferentialPlan({ repo, targetTag: 'v2.0.0', db });
|
||||
|
||||
expect(plan).not.toBeNull();
|
||||
// New path counted as changed
|
||||
expect(plan!.changedPaths.has('src/new-name.ts')).toBe(true);
|
||||
// keeper is unchanged
|
||||
expect(plan!.unchangedPaths.has('src/keeper.ts')).toBe(true);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Case 10: ancestorVersionId and ancestorTag are correctly set
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('sets ancestorVersionId and ancestorTag correctly', async () => {
|
||||
insertRepo(db);
|
||||
const repo = makeRepo();
|
||||
const v1Id = insertVersion(db, repo.id, 'v1.0.0', 'indexed');
|
||||
insertDocument(db, v1Id, 'README.md');
|
||||
insertDocument(db, v1Id, 'src/index.ts');
|
||||
|
||||
// One file changes so there is something in unchangedPaths
|
||||
mockGetChangedFiles.mockReturnValue([{ path: 'README.md', status: 'modified' }]);
|
||||
|
||||
const plan = await buildDifferentialPlan({ repo, targetTag: 'v2.0.0', db });
|
||||
|
||||
expect(plan).not.toBeNull();
|
||||
expect(plan!.ancestorVersionId).toBe(v1Id);
|
||||
expect(plan!.ancestorTag).toBe('v1.0.0');
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Case 11: Selects the closest (highest) indexed ancestor when multiple exist
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('selects the closest indexed ancestor when multiple indexed versions exist', async () => {
|
||||
insertRepo(db);
|
||||
const repo = makeRepo();
|
||||
const v1Id = insertVersion(db, repo.id, 'v1.0.0', 'indexed');
|
||||
insertDocument(db, v1Id, 'old.ts');
|
||||
const v2Id = insertVersion(db, repo.id, 'v1.5.0', 'indexed');
|
||||
insertDocument(db, v2Id, 'newer.ts');
|
||||
insertDocument(db, v2Id, 'stable.ts');
|
||||
|
||||
// Only one file changes from the v1.5.0 ancestor
|
||||
mockGetChangedFiles.mockReturnValue([{ path: 'newer.ts', status: 'modified' }]);
|
||||
|
||||
const plan = await buildDifferentialPlan({ repo, targetTag: 'v2.0.0', db });
|
||||
|
||||
expect(plan).not.toBeNull();
|
||||
// Should use v1.5.0 as ancestor (closest predecessor)
|
||||
expect(plan!.ancestorTag).toBe('v1.5.0');
|
||||
expect(plan!.ancestorVersionId).toBe(v2Id);
|
||||
});
|
||||
});
|
||||
@@ -1085,7 +1085,7 @@ describe('differential indexing', () => {
|
||||
return (overrides.id as string) ?? id;
|
||||
}
|
||||
|
||||
type PipelineInternals = IndexingPipeline & {
|
||||
type PipelineInternals = {
|
||||
cloneFromAncestor: (
|
||||
ancestorVersionId: string,
|
||||
targetVersionId: string,
|
||||
|
||||
@@ -102,7 +102,7 @@ export function initializePipeline(
|
||||
workerScript,
|
||||
embedWorkerScript,
|
||||
dbPath: options.dbPath,
|
||||
onProgress: (jobId: string, msg: ParseWorkerResponse) => {
|
||||
onProgress: (jobId, msg) => {
|
||||
// Update DB with progress
|
||||
db.prepare(
|
||||
`UPDATE indexing_jobs
|
||||
@@ -137,9 +137,6 @@ export function initializePipeline(
|
||||
_broadcaster.broadcast(jobId, '', 'job-failed', { jobId, error });
|
||||
}
|
||||
},
|
||||
onEmbedReady: () => {
|
||||
console.log('[WorkerPool] Embedding worker ready');
|
||||
},
|
||||
onEmbedDone: (jobId: string) => {
|
||||
console.log('[WorkerPool] Embedding complete for job:', jobId);
|
||||
},
|
||||
|
||||
330
src/lib/server/pipeline/worker-pool.test.ts
Normal file
330
src/lib/server/pipeline/worker-pool.test.ts
Normal file
@@ -0,0 +1,330 @@
|
||||
/**
|
||||
* Tests for WorkerPool (TRUEREF-0022).
|
||||
*
|
||||
* Real node:worker_threads Workers are replaced by FakeWorker (an EventEmitter)
|
||||
* so no subprocess is ever spawned. We maintain our own registry of created
|
||||
* FakeWorker instances so we can inspect postMessage calls and emit events.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { writeFileSync, unlinkSync, existsSync } from 'node:fs';
|
||||
import { EventEmitter } from 'node:events';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Hoist FakeWorker + registry so vi.mock can reference them.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const { createdWorkers, FakeWorker } = vi.hoisted(() => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
const { EventEmitter } = require('node:events') as typeof import('node:events');
|
||||
|
||||
const createdWorkers: InstanceType<typeof FakeWorkerClass>[] = [];
|
||||
|
||||
class FakeWorkerClass extends EventEmitter {
|
||||
threadId = Math.floor(Math.random() * 100_000);
|
||||
// Auto-emit 'exit' with code 0 when a shutdown message is received
|
||||
postMessage = vi.fn((msg: { type: string }) => {
|
||||
if (msg.type === 'shutdown') {
|
||||
// Emit exit asynchronously so shutdown() loop can process it
|
||||
setImmediate(() => {
|
||||
this.emit('exit', 0);
|
||||
this.threadId = 0; // signal exited
|
||||
});
|
||||
}
|
||||
});
|
||||
terminate = vi.fn(() => {
|
||||
this.threadId = 0;
|
||||
});
|
||||
|
||||
constructor(_script: string, _opts?: unknown) {
|
||||
super();
|
||||
createdWorkers.push(this);
|
||||
}
|
||||
}
|
||||
|
||||
return { createdWorkers, FakeWorker: FakeWorkerClass };
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock node:worker_threads BEFORE importing WorkerPool.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
vi.mock('node:worker_threads', () => {
|
||||
return { Worker: FakeWorker };
|
||||
});
|
||||
|
||||
import { WorkerPool, type WorkerPoolOptions } from './worker-pool.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FAKE_SCRIPT = '/tmp/fake-worker-pool-test.mjs';
|
||||
const MISSING_SCRIPT = '/tmp/this-file-does-not-exist-worker-pool.mjs';
|
||||
|
||||
function makeOpts(overrides: Partial<WorkerPoolOptions> = {}): WorkerPoolOptions {
|
||||
return {
|
||||
concurrency: 2,
|
||||
workerScript: FAKE_SCRIPT,
|
||||
embedWorkerScript: MISSING_SCRIPT,
|
||||
dbPath: ':memory:',
|
||||
onProgress: vi.fn(),
|
||||
onJobDone: vi.fn(),
|
||||
onJobFailed: vi.fn(),
|
||||
onEmbedDone: vi.fn(),
|
||||
onEmbedFailed: vi.fn(),
|
||||
...overrides
|
||||
} as unknown as WorkerPoolOptions;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Setup / teardown
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
beforeEach(() => {
|
||||
// Create the fake script so existsSync returns true
|
||||
writeFileSync(FAKE_SCRIPT, '// placeholder\n');
|
||||
// Clear registry and reset all mocks
|
||||
createdWorkers.length = 0;
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (existsSync(FAKE_SCRIPT)) unlinkSync(FAKE_SCRIPT);
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fallback mode (no real workers)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('WorkerPool fallback mode', () => {
|
||||
it('enters fallback mode when workerScript does not exist', () => {
|
||||
const pool = new WorkerPool(makeOpts({ workerScript: MISSING_SCRIPT }));
|
||||
|
||||
expect(pool.isFallbackMode).toBe(true);
|
||||
});
|
||||
|
||||
it('does not throw when constructed in fallback mode', () => {
|
||||
expect(() => new WorkerPool(makeOpts({ workerScript: MISSING_SCRIPT }))).not.toThrow();
|
||||
});
|
||||
|
||||
it('enqueue is a no-op in fallback mode — callbacks are never called', () => {
|
||||
const opts = makeOpts({ workerScript: MISSING_SCRIPT });
|
||||
const pool = new WorkerPool(opts);
|
||||
|
||||
pool.enqueue('job-1', '/repo/1');
|
||||
|
||||
expect(opts.onJobDone).not.toHaveBeenCalled();
|
||||
expect(opts.onProgress).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('spawns no workers in fallback mode', () => {
|
||||
new WorkerPool(makeOpts({ workerScript: MISSING_SCRIPT }));
|
||||
|
||||
expect(createdWorkers).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Normal mode
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('WorkerPool normal mode', () => {
|
||||
it('isFallbackMode is false when workerScript exists', () => {
|
||||
const pool = new WorkerPool(makeOpts({ concurrency: 1 }));
|
||||
|
||||
expect(pool.isFallbackMode).toBe(false);
|
||||
});
|
||||
|
||||
it('spawns `concurrency` parse workers on construction', () => {
|
||||
new WorkerPool(makeOpts({ concurrency: 3 }));
|
||||
|
||||
expect(createdWorkers).toHaveLength(3);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// enqueue dispatches to an idle worker
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('enqueue sends { type: "run", jobId } to an idle worker', () => {
|
||||
const pool = new WorkerPool(makeOpts({ concurrency: 1 }));
|
||||
|
||||
pool.enqueue('job-42', '/repo/1');
|
||||
|
||||
expect(createdWorkers).toHaveLength(1);
|
||||
expect(createdWorkers[0].postMessage).toHaveBeenCalledWith({ type: 'run', jobId: 'job-42' });
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// "done" message — onJobDone called, next queued job dispatched
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('calls onJobDone and dispatches the next queued job when a worker emits "done"', () => {
|
||||
const opts = makeOpts({ concurrency: 1 });
|
||||
const pool = new WorkerPool(opts);
|
||||
|
||||
// Enqueue two jobs — second must wait because concurrency=1
|
||||
pool.enqueue('job-A', '/repo/1');
|
||||
pool.enqueue('job-B', '/repo/2');
|
||||
|
||||
const worker = createdWorkers[0];
|
||||
|
||||
// Simulate job-A completing
|
||||
worker.emit('message', { type: 'done', jobId: 'job-A' });
|
||||
|
||||
expect(opts.onJobDone).toHaveBeenCalledWith('job-A');
|
||||
// The same worker should now run job-B
|
||||
expect(worker.postMessage).toHaveBeenCalledWith({ type: 'run', jobId: 'job-B' });
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// "failed" message — onJobFailed called
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('calls onJobFailed when a worker emits a "failed" message', () => {
|
||||
const opts = makeOpts({ concurrency: 1 });
|
||||
const pool = new WorkerPool(opts);
|
||||
|
||||
pool.enqueue('job-fail', '/repo/1');
|
||||
|
||||
const worker = createdWorkers[0];
|
||||
worker.emit('message', { type: 'failed', jobId: 'job-fail', error: 'parse error' });
|
||||
|
||||
expect(opts.onJobFailed).toHaveBeenCalledWith('job-fail', 'parse error');
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Per-repo serialization
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('does not dispatch a second job for the same repo while first is running', () => {
|
||||
const opts = makeOpts({ concurrency: 2 });
|
||||
const pool = new WorkerPool(opts);
|
||||
|
||||
pool.enqueue('job-1', '/repo/same');
|
||||
pool.enqueue('job-2', '/repo/same');
|
||||
|
||||
// Only job-1 should have been dispatched (run message sent)
|
||||
const runCalls = createdWorkers.flatMap((w) =>
|
||||
w.postMessage.mock.calls.filter((c) => (c[0] as { type: string })?.type === 'run')
|
||||
);
|
||||
expect(runCalls.filter((c) => (c[0] as unknown as { jobId: string }).jobId === 'job-1')).toHaveLength(1);
|
||||
expect(runCalls.filter((c) => (c[0] as unknown as { jobId: string }).jobId === 'job-2')).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('starts jobs for different repos concurrently', () => {
|
||||
const opts = makeOpts({ concurrency: 2 });
|
||||
const pool = new WorkerPool(opts);
|
||||
|
||||
pool.enqueue('job-alpha', '/repo/alpha');
|
||||
pool.enqueue('job-beta', '/repo/beta');
|
||||
|
||||
const runCalls = createdWorkers.flatMap((w) =>
|
||||
w.postMessage.mock.calls.filter((c) => (c[0] as { type: string })?.type === 'run')
|
||||
);
|
||||
const dispatchedIds = runCalls.map((c) => (c[0] as unknown as { jobId: string }).jobId);
|
||||
expect(dispatchedIds).toContain('job-alpha');
|
||||
expect(dispatchedIds).toContain('job-beta');
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Worker crash (exit code != 0)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('calls onJobFailed and spawns a replacement worker when a worker exits with code 1', () => {
|
||||
const opts = makeOpts({ concurrency: 1 });
|
||||
const pool = new WorkerPool(opts);
|
||||
|
||||
pool.enqueue('job-crash', '/repo/1');
|
||||
|
||||
const originalWorker = createdWorkers[0];
|
||||
// Simulate crash while the job is running
|
||||
originalWorker.emit('exit', 1);
|
||||
|
||||
expect(opts.onJobFailed).toHaveBeenCalledWith('job-crash', expect.stringContaining('1'));
|
||||
// A replacement worker must have been spawned
|
||||
expect(createdWorkers.length).toBeGreaterThan(1);
|
||||
});
|
||||
|
||||
it('does NOT call onJobFailed when a worker exits cleanly (code 0)', () => {
|
||||
const opts = makeOpts({ concurrency: 1 });
|
||||
const pool = new WorkerPool(opts);
|
||||
|
||||
// Exit without any running job
|
||||
const worker = createdWorkers[0];
|
||||
worker.emit('exit', 0);
|
||||
|
||||
expect(opts.onJobFailed).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// setMaxConcurrency — scale up
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('spawns additional workers when setMaxConcurrency is increased', () => {
|
||||
const pool = new WorkerPool(makeOpts({ concurrency: 1 }));
|
||||
const before = createdWorkers.length; // 1
|
||||
|
||||
pool.setMaxConcurrency(3);
|
||||
|
||||
expect(createdWorkers.length).toBe(before + 2);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// setMaxConcurrency — scale down
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('sends "shutdown" to idle workers when setMaxConcurrency is decreased', () => {
|
||||
const opts = makeOpts({ concurrency: 3 });
|
||||
const pool = new WorkerPool(opts);
|
||||
|
||||
pool.setMaxConcurrency(1);
|
||||
|
||||
const shutdownWorkers = createdWorkers.filter((w) =>
|
||||
w.postMessage.mock.calls.some((c) => (c[0] as { type: string })?.type === 'shutdown')
|
||||
);
|
||||
// Two workers should have received shutdown
|
||||
expect(shutdownWorkers.length).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// shutdown
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('sends "shutdown" to all workers on pool.shutdown()', () => {
|
||||
const opts = makeOpts({ concurrency: 2 });
|
||||
const pool = new WorkerPool(opts);
|
||||
|
||||
// Don't await — shutdown() is async but the postMessage calls happen synchronously
|
||||
void pool.shutdown();
|
||||
|
||||
for (const worker of createdWorkers) {
|
||||
const hasShutdown = worker.postMessage.mock.calls.some(
|
||||
(c) => (c[0] as { type: string })?.type === 'shutdown'
|
||||
);
|
||||
expect(hasShutdown).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Enqueue after shutdown is a no-op
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('ignores enqueue calls after shutdown is initiated', () => {
|
||||
const opts = makeOpts({ concurrency: 1 });
|
||||
const pool = new WorkerPool(opts);
|
||||
|
||||
// Don't await — shutdown() sets shuttingDown=true synchronously
|
||||
void pool.shutdown();
|
||||
|
||||
// Reset postMessage mocks to isolate post-shutdown calls
|
||||
for (const w of createdWorkers) w.postMessage.mockClear();
|
||||
|
||||
pool.enqueue('job-late', '/repo/1');
|
||||
|
||||
const runCalls = createdWorkers.flatMap((w) =>
|
||||
w.postMessage.mock.calls.filter((c) => (c[0] as { type: string })?.type === 'run')
|
||||
);
|
||||
expect(runCalls).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
@@ -8,7 +8,7 @@ export interface WorkerPoolOptions {
|
||||
embedWorkerScript: string;
|
||||
dbPath: string;
|
||||
embeddingProfileId?: string;
|
||||
onProgress: (jobId: string, msg: ParseWorkerResponse) => void;
|
||||
onProgress: (jobId: string, msg: Extract<ParseWorkerResponse, { type: 'progress' }>) => void;
|
||||
onJobDone: (jobId: string) => void;
|
||||
onJobFailed: (jobId: string, error: string) => void;
|
||||
onEmbedDone: (jobId: string) => void;
|
||||
|
||||
@@ -342,6 +342,8 @@ export class RepositoryService {
|
||||
progress: 0,
|
||||
totalFiles: 0,
|
||||
processedFiles: 0,
|
||||
stage: 'queued',
|
||||
stageDetail: null,
|
||||
error: null,
|
||||
startedAt: null,
|
||||
completedAt: null,
|
||||
@@ -355,6 +357,8 @@ export class RepositoryService {
|
||||
progress: job.progress,
|
||||
total_files: job.totalFiles,
|
||||
processed_files: job.processedFiles,
|
||||
stage: 'queued',
|
||||
stage_detail: null,
|
||||
error: job.error,
|
||||
started_at: null,
|
||||
completed_at: null,
|
||||
|
||||
155
src/lib/server/utils/git-changed-files.test.ts
Normal file
155
src/lib/server/utils/git-changed-files.test.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
/**
|
||||
* Tests for getChangedFilesBetweenRefs (TRUEREF-0021).
|
||||
*
|
||||
* Uses vi.mock to intercept execFileSync so no real git process is spawned.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock node:child_process before importing the module under test.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
vi.mock('node:child_process', () => ({
|
||||
execSync: vi.fn(),
|
||||
execFileSync: vi.fn()
|
||||
}));
|
||||
|
||||
import { execFileSync } from 'node:child_process';
|
||||
import { getChangedFilesBetweenRefs } from '$lib/server/utils/git.js';
|
||||
|
||||
const mockExecFileSync = vi.mocked(execFileSync);
|
||||
|
||||
const BASE_OPTS = { repoPath: '/tmp/fake-repo', base: 'v1.0.0', head: 'v2.0.0' };
|
||||
|
||||
beforeEach(() => {
|
||||
mockExecFileSync.mockReset();
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Status code parsing
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('getChangedFilesBetweenRefs', () => {
|
||||
it("parses an 'A' line as status 'added'", () => {
|
||||
mockExecFileSync.mockReturnValue('A\tsrc/new-file.ts');
|
||||
|
||||
const result = getChangedFilesBetweenRefs(BASE_OPTS);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]).toEqual({ path: 'src/new-file.ts', status: 'added' });
|
||||
});
|
||||
|
||||
it("parses an 'M' line as status 'modified'", () => {
|
||||
mockExecFileSync.mockReturnValue('M\tsrc/existing.ts');
|
||||
|
||||
const result = getChangedFilesBetweenRefs(BASE_OPTS);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]).toEqual({ path: 'src/existing.ts', status: 'modified' });
|
||||
});
|
||||
|
||||
it("parses a 'D' line as status 'removed'", () => {
|
||||
mockExecFileSync.mockReturnValue('D\tsrc/deleted.ts');
|
||||
|
||||
const result = getChangedFilesBetweenRefs(BASE_OPTS);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]).toEqual({ path: 'src/deleted.ts', status: 'removed' });
|
||||
});
|
||||
|
||||
it("parses an 'R85' line as status 'renamed' with previousPath", () => {
|
||||
mockExecFileSync.mockReturnValue('R85\tsrc/old-name.ts\tsrc/new-name.ts');
|
||||
|
||||
const result = getChangedFilesBetweenRefs(BASE_OPTS);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]).toEqual({
|
||||
path: 'src/new-name.ts',
|
||||
status: 'renamed',
|
||||
previousPath: 'src/old-name.ts'
|
||||
});
|
||||
});
|
||||
|
||||
it('returns an empty array for empty output', () => {
|
||||
mockExecFileSync.mockReturnValue('');
|
||||
|
||||
const result = getChangedFilesBetweenRefs(BASE_OPTS);
|
||||
|
||||
expect(result).toHaveLength(0);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
it('parses multiple lines correctly', () => {
|
||||
mockExecFileSync.mockReturnValue(
|
||||
['A\tadded.ts', 'M\tmodified.ts', 'D\tdeleted.ts', 'R100\told.ts\tnew.ts'].join('\n')
|
||||
);
|
||||
|
||||
const result = getChangedFilesBetweenRefs(BASE_OPTS);
|
||||
|
||||
expect(result).toHaveLength(4);
|
||||
expect(result[0]).toMatchObject({ path: 'added.ts', status: 'added' });
|
||||
expect(result[1]).toMatchObject({ path: 'modified.ts', status: 'modified' });
|
||||
expect(result[2]).toMatchObject({ path: 'deleted.ts', status: 'removed' });
|
||||
expect(result[3]).toMatchObject({ path: 'new.ts', status: 'renamed', previousPath: 'old.ts' });
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Error handling
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('throws a descriptive error when execFileSync throws', () => {
|
||||
mockExecFileSync.mockImplementation(() => {
|
||||
throw new Error('fatal: not a git repository');
|
||||
});
|
||||
|
||||
expect(() => getChangedFilesBetweenRefs(BASE_OPTS)).toThrowError(
|
||||
/Failed to get changed files between 'v1\.0\.0' and 'v2\.0\.0' in \/tmp\/fake-repo/
|
||||
);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Shell-injection safety: first arg must be 'git', flags as array elements
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('calls execFileSync with "git" as the executable (no shell)', () => {
|
||||
mockExecFileSync.mockReturnValue('');
|
||||
|
||||
getChangedFilesBetweenRefs(BASE_OPTS);
|
||||
|
||||
const [executable, args] = mockExecFileSync.mock.calls[0] as [string, string[]];
|
||||
expect(executable).toBe('git');
|
||||
// Each flag must be a separate element — no shell concatenation
|
||||
expect(Array.isArray(args)).toBe(true);
|
||||
expect(args).toContain('diff');
|
||||
expect(args).toContain('--name-status');
|
||||
// Base and head are separate args, not joined with a shell metacharacter
|
||||
expect(args).toContain('v1.0.0');
|
||||
expect(args).toContain('v2.0.0');
|
||||
});
|
||||
|
||||
it('passes the repoPath via -C flag as a separate array element', () => {
|
||||
mockExecFileSync.mockReturnValue('');
|
||||
|
||||
getChangedFilesBetweenRefs(BASE_OPTS);
|
||||
|
||||
const [, args] = mockExecFileSync.mock.calls[0] as [string, string[]];
|
||||
const cIdx = args.indexOf('-C');
|
||||
expect(cIdx).not.toBe(-1);
|
||||
expect(args[cIdx + 1]).toBe('/tmp/fake-repo');
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Unknown status codes are silently skipped
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('silently skips lines with unknown status codes', () => {
|
||||
// 'X' is not a known status
|
||||
mockExecFileSync.mockReturnValue('X\tunknown.ts\nM\tknown.ts');
|
||||
|
||||
const result = getChangedFilesBetweenRefs(BASE_OPTS);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]).toMatchObject({ path: 'known.ts', status: 'modified' });
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user