123 lines
4.0 KiB
TypeScript
123 lines
4.0 KiB
TypeScript
/**
|
|
* Differential indexing strategy coordinator (TRUEREF-0021).
|
|
*
|
|
* Determines whether differential indexing can be used for a given version tag,
|
|
* and if so, builds a plan describing which files to clone from the ancestor
|
|
* and which files to crawl fresh.
|
|
*/
|
|
import type Database from 'better-sqlite3';
|
|
import type { Repository } from '$lib/server/models/repository.js';
|
|
import type { RepositoryVersion } from '$lib/server/models/repository-version.js';
|
|
import { RepositoryVersionMapper } from '$lib/server/mappers/repository-version.mapper.js';
|
|
import type { RepositoryVersionEntity } from '$lib/server/models/repository-version.js';
|
|
import { findBestAncestorVersion } from '$lib/server/utils/tag-order.js';
|
|
import { fetchGitHubChangedFiles } from '$lib/server/crawler/github-compare.js';
|
|
import { getChangedFilesBetweenRefs } from '$lib/server/utils/git.js';
|
|
import type { ChangedFile } from '$lib/server/crawler/types.js';
|
|
|
|
export interface DifferentialPlan {
|
|
/** Version ID of the closest already-indexed predecessor tag */
|
|
ancestorVersionId: string;
|
|
/** Ancestor tag name (needed for git diff / GitHub compare calls) */
|
|
ancestorTag: string;
|
|
/** File paths that changed (added + modified + renamed-destination) */
|
|
changedPaths: Set<string>;
|
|
/** File paths that were deleted in the target vs ancestor */
|
|
deletedPaths: Set<string>;
|
|
/** File paths present in ancestor that are unchanged in target — must be cloned */
|
|
unchangedPaths: Set<string>;
|
|
}
|
|
|
|
export async function buildDifferentialPlan(params: {
|
|
repo: Repository;
|
|
targetTag: string;
|
|
db: Database.Database;
|
|
/** Override for testing only */
|
|
_fetchGitHubChangedFiles?: typeof fetchGitHubChangedFiles;
|
|
}): Promise<DifferentialPlan | null> {
|
|
const { repo, targetTag, db } = params;
|
|
const fetchFn = params._fetchGitHubChangedFiles ?? fetchGitHubChangedFiles;
|
|
|
|
try {
|
|
// 1. Load all indexed versions for this repository
|
|
const rows = db
|
|
.prepare(
|
|
`SELECT * FROM repository_versions WHERE repository_id = ? AND state = 'indexed'`
|
|
)
|
|
.all(repo.id) as RepositoryVersionEntity[];
|
|
|
|
const indexedVersions: RepositoryVersion[] = rows.map((row) =>
|
|
RepositoryVersionMapper.fromEntity(row)
|
|
);
|
|
|
|
// 2. Find the best ancestor version
|
|
const ancestor = findBestAncestorVersion(targetTag, indexedVersions);
|
|
if (!ancestor) return null;
|
|
|
|
// 3. Load ancestor's document file paths
|
|
const docRows = db
|
|
.prepare(`SELECT DISTINCT file_path FROM documents WHERE version_id = ?`)
|
|
.all(ancestor.id) as Array<{ file_path: string }>;
|
|
|
|
const ancestorFilePaths = new Set(docRows.map((r) => r.file_path));
|
|
if (ancestorFilePaths.size === 0) return null;
|
|
|
|
// 4. Fetch changed files between ancestor and target
|
|
let changedFiles: ChangedFile[];
|
|
|
|
if (repo.source === 'github') {
|
|
const url = new URL(repo.sourceUrl);
|
|
const parts = url.pathname.split('/').filter(Boolean);
|
|
const owner = parts[0];
|
|
const repoName = parts[1];
|
|
changedFiles = await fetchFn(
|
|
owner,
|
|
repoName,
|
|
ancestor.tag,
|
|
targetTag,
|
|
repo.githubToken ?? undefined
|
|
);
|
|
} else {
|
|
changedFiles = getChangedFilesBetweenRefs({
|
|
repoPath: repo.sourceUrl,
|
|
base: ancestor.tag,
|
|
head: targetTag
|
|
});
|
|
}
|
|
|
|
// 5. Partition changed files into changed and deleted sets
|
|
const changedPaths = new Set<string>();
|
|
const deletedPaths = new Set<string>();
|
|
|
|
for (const file of changedFiles) {
|
|
if (file.status === 'removed') {
|
|
deletedPaths.add(file.path);
|
|
} else {
|
|
changedPaths.add(file.path);
|
|
}
|
|
}
|
|
|
|
// 6. Compute unchanged paths: ancestor paths minus changed minus deleted
|
|
const unchangedPaths = new Set<string>();
|
|
for (const p of ancestorFilePaths) {
|
|
if (!changedPaths.has(p) && !deletedPaths.has(p)) {
|
|
unchangedPaths.add(p);
|
|
}
|
|
}
|
|
|
|
// 7. Return null when there's nothing to clone (all files changed)
|
|
if (unchangedPaths.size === 0) return null;
|
|
|
|
return {
|
|
ancestorVersionId: ancestor.id,
|
|
ancestorTag: ancestor.tag,
|
|
changedPaths,
|
|
deletedPaths,
|
|
unchangedPaths
|
|
};
|
|
} catch {
|
|
// Fail-safe: fall back to full crawl on any error
|
|
return null;
|
|
}
|
|
}
|