chore(FEEDBACK-0001): linting
This commit is contained in:
@@ -30,39 +30,39 @@ Optimize re-indexing by skipping files that haven't changed since the last index
|
||||
|
||||
```typescript
|
||||
interface FileDiff {
|
||||
added: CrawledFile[]; // new files not in DB
|
||||
modified: CrawledFile[]; // files with changed checksum
|
||||
deleted: string[]; // file paths in DB but not in crawl
|
||||
unchanged: string[]; // file paths with matching checksum
|
||||
added: CrawledFile[]; // new files not in DB
|
||||
modified: CrawledFile[]; // files with changed checksum
|
||||
deleted: string[]; // file paths in DB but not in crawl
|
||||
unchanged: string[]; // file paths with matching checksum
|
||||
}
|
||||
|
||||
function computeDiff(
|
||||
crawledFiles: CrawledFile[],
|
||||
existingDocs: Document[] // documents currently in DB for this repo
|
||||
crawledFiles: CrawledFile[],
|
||||
existingDocs: Document[] // documents currently in DB for this repo
|
||||
): FileDiff {
|
||||
const existingMap = new Map(existingDocs.map(d => [d.filePath, d]));
|
||||
const crawledMap = new Map(crawledFiles.map(f => [f.path, f]));
|
||||
const existingMap = new Map(existingDocs.map((d) => [d.filePath, d]));
|
||||
const crawledMap = new Map(crawledFiles.map((f) => [f.path, f]));
|
||||
|
||||
const added: CrawledFile[] = [];
|
||||
const modified: CrawledFile[] = [];
|
||||
const unchanged: string[] = [];
|
||||
const added: CrawledFile[] = [];
|
||||
const modified: CrawledFile[] = [];
|
||||
const unchanged: string[] = [];
|
||||
|
||||
for (const file of crawledFiles) {
|
||||
const existing = existingMap.get(file.path);
|
||||
if (!existing) {
|
||||
added.push(file);
|
||||
} else if (existing.checksum !== file.sha) {
|
||||
modified.push(file);
|
||||
} else {
|
||||
unchanged.push(file.path);
|
||||
}
|
||||
}
|
||||
for (const file of crawledFiles) {
|
||||
const existing = existingMap.get(file.path);
|
||||
if (!existing) {
|
||||
added.push(file);
|
||||
} else if (existing.checksum !== file.sha) {
|
||||
modified.push(file);
|
||||
} else {
|
||||
unchanged.push(file.path);
|
||||
}
|
||||
}
|
||||
|
||||
const deleted = existingDocs
|
||||
.filter(doc => !crawledMap.has(doc.filePath))
|
||||
.map(doc => doc.filePath);
|
||||
const deleted = existingDocs
|
||||
.filter((doc) => !crawledMap.has(doc.filePath))
|
||||
.map((doc) => doc.filePath);
|
||||
|
||||
return { added, modified, deleted, unchanged };
|
||||
return { added, modified, deleted, unchanged };
|
||||
}
|
||||
```
|
||||
|
||||
@@ -78,7 +78,7 @@ const diff = computeDiff(crawledResult.files, existingDocs);
|
||||
|
||||
// Log diff summary
|
||||
this.updateJob(job.id, {
|
||||
totalFiles: crawledResult.files.length,
|
||||
totalFiles: crawledResult.files.length
|
||||
});
|
||||
|
||||
// Process only changed/new files
|
||||
@@ -89,29 +89,29 @@ const docIdsToDelete: string[] = [];
|
||||
|
||||
// Map modified files to their existing document IDs for deletion
|
||||
for (const file of diff.modified) {
|
||||
const existing = existingDocs.find(d => d.filePath === file.path);
|
||||
if (existing) docIdsToDelete.push(existing.id);
|
||||
const existing = existingDocs.find((d) => d.filePath === file.path);
|
||||
if (existing) docIdsToDelete.push(existing.id);
|
||||
}
|
||||
|
||||
// Map deleted file paths to document IDs
|
||||
for (const filePath of diff.deleted) {
|
||||
const existing = existingDocs.find(d => d.filePath === filePath);
|
||||
if (existing) docIdsToDelete.push(existing.id);
|
||||
const existing = existingDocs.find((d) => d.filePath === filePath);
|
||||
if (existing) docIdsToDelete.push(existing.id);
|
||||
}
|
||||
|
||||
// Parse new/modified files
|
||||
for (const [i, file] of filesToProcess.entries()) {
|
||||
const docId = crypto.randomUUID();
|
||||
newDocuments.push({ id: docId, ...buildDocument(file, repo.id, job.versionId) });
|
||||
newSnippets.push(...parseFile(file, { repositoryId: repo.id, documentId: docId }));
|
||||
const docId = crypto.randomUUID();
|
||||
newDocuments.push({ id: docId, ...buildDocument(file, repo.id, job.versionId) });
|
||||
newSnippets.push(...parseFile(file, { repositoryId: repo.id, documentId: docId }));
|
||||
|
||||
// Count ALL files (including skipped) in progress
|
||||
const totalProcessed = diff.unchanged.length + i + 1;
|
||||
const progress = Math.round((totalProcessed / crawledResult.files.length) * 80);
|
||||
this.updateJob(job.id, {
|
||||
processedFiles: totalProcessed,
|
||||
progress,
|
||||
});
|
||||
// Count ALL files (including skipped) in progress
|
||||
const totalProcessed = diff.unchanged.length + i + 1;
|
||||
const progress = Math.round((totalProcessed / crawledResult.files.length) * 80);
|
||||
this.updateJob(job.id, {
|
||||
processedFiles: totalProcessed,
|
||||
progress
|
||||
});
|
||||
}
|
||||
|
||||
// Atomic replacement of only changed documents
|
||||
@@ -123,6 +123,7 @@ this.replaceSnippets(repo.id, docIdsToDelete, newDocuments, newSnippets);
|
||||
## Performance Impact
|
||||
|
||||
For a typical repository with 1,000 files where 50 changed:
|
||||
|
||||
- **Without incremental**: 1,000 files parsed + 1,000 embed batches
|
||||
- **With incremental**: 50 files parsed + 50 embed batches
|
||||
- Estimated speedup: ~20x for re-indexing
|
||||
|
||||
Reference in New Issue
Block a user