TRUEREF-0023 rewrite indexing pipeline - parallel reads - serialized writes

This commit is contained in:
Giancarmine Salucci
2026-04-02 09:49:38 +02:00
parent 9525c58e9a
commit f86be4106b
68 changed files with 5042 additions and 3131 deletions

View File

@@ -466,12 +466,15 @@ describe('IndexingPipeline', () => {
const job1 = makeJob();
await pipeline.run(job1 as never);
const firstSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as { id: string }[])
.map((row) => row.id);
const firstSnippetIds = (
db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as { id: string }[]
).map((row) => row.id);
expect(firstSnippetIds.length).toBeGreaterThan(0);
const firstEmbeddingCount = (
db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as {
db
.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`)
.get() as {
n: number;
}
).n;
@@ -483,11 +486,15 @@ describe('IndexingPipeline', () => {
const job2 = db.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`).get(job2Id) as never;
await pipeline.run(job2);
const secondSnippetIds = (db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as {
id: string;
}[]).map((row) => row.id);
const secondSnippetIds = (
db.prepare(`SELECT id FROM snippets ORDER BY id`).all() as {
id: string;
}[]
).map((row) => row.id);
const secondEmbeddingCount = (
db.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`).get() as {
db
.prepare(`SELECT COUNT(*) as n FROM snippet_embeddings WHERE profile_id = 'local-default'`)
.get() as {
n: number;
}
).n;
@@ -918,9 +925,9 @@ describe('IndexingPipeline', () => {
await pipeline.run(job as never);
const docs = db
.prepare(`SELECT file_path FROM documents ORDER BY file_path`)
.all() as { file_path: string }[];
const docs = db.prepare(`SELECT file_path FROM documents ORDER BY file_path`).all() as {
file_path: string;
}[];
const filePaths = docs.map((d) => d.file_path);
// migration-guide.md and docs/legacy-api.md must be absent.
@@ -956,7 +963,10 @@ describe('IndexingPipeline', () => {
expect(row).toBeDefined();
const rules = JSON.parse(row!.rules);
expect(rules).toEqual(['Always use TypeScript strict mode', 'Prefer async/await over callbacks']);
expect(rules).toEqual([
'Always use TypeScript strict mode',
'Prefer async/await over callbacks'
]);
});
it('persists version-specific rules under (repositoryId, versionId) when job has versionId', async () => {
@@ -1219,12 +1229,7 @@ describe('differential indexing', () => {
insertSnippet(db, doc1Id, { repository_id: '/test/repo', version_id: ancestorVersionId });
insertSnippet(db, doc2Id, { repository_id: '/test/repo', version_id: ancestorVersionId });
const pipeline = new IndexingPipeline(
db,
vi.fn() as never,
{ crawl: vi.fn() } as never,
null
);
const pipeline = new IndexingPipeline(db, vi.fn() as never, { crawl: vi.fn() } as never, null);
(pipeline as unknown as PipelineInternals).cloneFromAncestor(
ancestorVersionId,
targetVersionId,
@@ -1236,9 +1241,7 @@ describe('differential indexing', () => {
.prepare(`SELECT * FROM documents WHERE version_id = ?`)
.all(targetVersionId) as { id: string; file_path: string }[];
expect(targetDocs).toHaveLength(2);
expect(targetDocs.map((d) => d.file_path).sort()).toEqual(
['README.md', 'src/index.ts'].sort()
);
expect(targetDocs.map((d) => d.file_path).sort()).toEqual(['README.md', 'src/index.ts'].sort());
// New IDs must differ from ancestor doc IDs.
const targetDocIds = targetDocs.map((d) => d.id);
expect(targetDocIds).not.toContain(doc1Id);
@@ -1261,12 +1264,7 @@ describe('differential indexing', () => {
checksum: 'sha-main'
});
const pipeline = new IndexingPipeline(
db,
vi.fn() as never,
{ crawl: vi.fn() } as never,
null
);
const pipeline = new IndexingPipeline(db, vi.fn() as never, { crawl: vi.fn() } as never, null);
(pipeline as unknown as PipelineInternals).cloneFromAncestor(
ancestorVersionId,
targetVersionId,
@@ -1323,9 +1321,9 @@ describe('differential indexing', () => {
await pipeline.run(job);
const updatedJob = db
.prepare(`SELECT status FROM indexing_jobs WHERE id = ?`)
.get(jobId) as { status: string };
const updatedJob = db.prepare(`SELECT status FROM indexing_jobs WHERE id = ?`).get(jobId) as {
status: string;
};
expect(updatedJob.status).toBe('done');
const docs = db
@@ -1375,9 +1373,7 @@ describe('differential indexing', () => {
deletedPaths: new Set<string>(),
unchangedPaths: new Set(['unchanged.md'])
};
const spy = vi
.spyOn(diffStrategy, 'buildDifferentialPlan')
.mockResolvedValueOnce(mockPlan);
const spy = vi.spyOn(diffStrategy, 'buildDifferentialPlan').mockResolvedValueOnce(mockPlan);
const pipeline = new IndexingPipeline(
db,
@@ -1398,9 +1394,9 @@ describe('differential indexing', () => {
spy.mockRestore();
// 6. Assert job completed and both docs exist under the target version.
const finalJob = db
.prepare(`SELECT status FROM indexing_jobs WHERE id = ?`)
.get(jobId) as { status: string };
const finalJob = db.prepare(`SELECT status FROM indexing_jobs WHERE id = ?`).get(jobId) as {
status: string;
};
expect(finalJob.status).toBe('done');
const targetDocs = db