feat(MULTIVERSION-0001): wire trueref.json into pipeline + per-version rules
- Add migration 0003: recreate repository_configs with nullable version_id column and two partial unique indexes (repo-wide: version_id IS NULL, per-version: (repository_id, version_id) WHERE version_id IS NOT NULL) - Update schema.ts to reflect the new composite structure with uniqueIndex partial constraints via drizzle-orm sql helper - IndexingPipeline: parse trueref.json / context7.json after crawl, apply excludeFiles filter before diff computation, update totalFiles accordingly - IndexingPipeline: persist repo-wide rules (version_id=null) and version-specific rules (when versionId set) via upsertRepoConfig helper - Add matchesExcludePattern static helper supporting plain filename, glob prefix (docs/legacy*), and exact path patterns - context endpoint: split getRules into repo-wide + version-specific lookup with dedup merge; pass versionId at call site - Update test DB loaders to include migration 0003 - Add pipeline tests for excludeFiles, repo-wide rules persistence, and per-version rules persistence - Add integration tests for merged rules, repo-only rules, and dedup logic Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,7 +26,8 @@ function createTestDb(): Database.Database {
|
||||
for (const migrationFile of [
|
||||
'0000_large_master_chief.sql',
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql'
|
||||
'0002_silky_stellaris.sql',
|
||||
'0003_multiversion_config.sql'
|
||||
]) {
|
||||
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
|
||||
|
||||
@@ -771,4 +772,117 @@ describe('IndexingPipeline', () => {
|
||||
ref: undefined
|
||||
});
|
||||
});
|
||||
|
||||
it('excludes files matching excludeFiles patterns from trueref.json', async () => {
|
||||
const truerefConfig = JSON.stringify({
|
||||
excludeFiles: ['migration-guide.md', 'docs/legacy*']
|
||||
});
|
||||
const files = [
|
||||
{
|
||||
path: 'trueref.json',
|
||||
content: truerefConfig,
|
||||
sha: 'sha-config',
|
||||
language: 'json'
|
||||
},
|
||||
{
|
||||
path: 'README.md',
|
||||
content: '# Hello\n\nThis is documentation.',
|
||||
sha: 'sha-readme',
|
||||
language: 'markdown'
|
||||
},
|
||||
{
|
||||
path: 'migration-guide.md',
|
||||
content: '# Migration Guide\n\nThis should be excluded.',
|
||||
sha: 'sha-migration',
|
||||
language: 'markdown'
|
||||
},
|
||||
{
|
||||
path: 'docs/legacy-api.md',
|
||||
content: '# Legacy API\n\nShould be excluded by glob prefix.',
|
||||
sha: 'sha-legacy',
|
||||
language: 'markdown'
|
||||
}
|
||||
];
|
||||
const pipeline = makePipeline({ files, totalFiles: files.length });
|
||||
const job = makeJob();
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
const docs = db
|
||||
.prepare(`SELECT file_path FROM documents ORDER BY file_path`)
|
||||
.all() as { file_path: string }[];
|
||||
const filePaths = docs.map((d) => d.file_path);
|
||||
|
||||
// migration-guide.md and docs/legacy-api.md must be absent.
|
||||
expect(filePaths).not.toContain('migration-guide.md');
|
||||
expect(filePaths).not.toContain('docs/legacy-api.md');
|
||||
|
||||
// README.md must still be indexed.
|
||||
expect(filePaths).toContain('README.md');
|
||||
});
|
||||
|
||||
it('persists repo-wide rules from trueref.json to repository_configs after indexing', async () => {
|
||||
const truerefConfig = JSON.stringify({
|
||||
rules: ['Always use TypeScript strict mode', 'Prefer async/await over callbacks']
|
||||
});
|
||||
const files = [
|
||||
{
|
||||
path: 'trueref.json',
|
||||
content: truerefConfig,
|
||||
sha: 'sha-config',
|
||||
language: 'json'
|
||||
}
|
||||
];
|
||||
const pipeline = makePipeline({ files, totalFiles: files.length });
|
||||
const job = makeJob();
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
const row = db
|
||||
.prepare(
|
||||
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
|
||||
)
|
||||
.get() as { rules: string } | undefined;
|
||||
|
||||
expect(row).toBeDefined();
|
||||
const rules = JSON.parse(row!.rules);
|
||||
expect(rules).toEqual(['Always use TypeScript strict mode', 'Prefer async/await over callbacks']);
|
||||
});
|
||||
|
||||
it('persists version-specific rules under (repositoryId, versionId) when job has versionId', async () => {
|
||||
const versionId = insertVersion(db, { tag: 'v2.0.0', state: 'pending' });
|
||||
const truerefConfig = JSON.stringify({
|
||||
rules: ['This is v2. Use the new Builder API.']
|
||||
});
|
||||
const files = [
|
||||
{
|
||||
path: 'trueref.json',
|
||||
content: truerefConfig,
|
||||
sha: 'sha-config',
|
||||
language: 'json'
|
||||
}
|
||||
];
|
||||
const pipeline = makePipeline({ files, totalFiles: files.length });
|
||||
const job = makeJob('/test/repo', versionId);
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
// Repo-wide row (version_id IS NULL) must exist.
|
||||
const repoRow = db
|
||||
.prepare(
|
||||
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id IS NULL`
|
||||
)
|
||||
.get() as { rules: string } | undefined;
|
||||
expect(repoRow).toBeDefined();
|
||||
|
||||
// Version-specific row must also exist.
|
||||
const versionRow = db
|
||||
.prepare(
|
||||
`SELECT rules FROM repository_configs WHERE repository_id = '/test/repo' AND version_id = ?`
|
||||
)
|
||||
.get(versionId) as { rules: string } | undefined;
|
||||
expect(versionRow).toBeDefined();
|
||||
const rules = JSON.parse(versionRow!.rules);
|
||||
expect(rules).toEqual(['This is v2. Use the new Builder API.']);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -22,6 +22,7 @@ import type { EmbeddingService } from '$lib/server/embeddings/embedding.service.
|
||||
import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
|
||||
import { IndexingJob } from '$lib/server/models/indexing-job.js';
|
||||
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
|
||||
import { resolveConfig } from '$lib/server/config/config-parser.js';
|
||||
import { parseFile } from '$lib/server/parser/index.js';
|
||||
import { computeTrustScore } from '$lib/server/search/trust-score.js';
|
||||
import { computeDiff } from './diff.js';
|
||||
@@ -99,15 +100,32 @@ export class IndexingPipeline {
|
||||
? this.getVersionTag(normJob.versionId)
|
||||
: undefined;
|
||||
const crawlResult = await this.crawl(repo, versionTag);
|
||||
const totalFiles = crawlResult.totalFiles;
|
||||
|
||||
// Parse trueref.json / context7.json if present in the crawl results.
|
||||
const configFile = crawlResult.files.find(
|
||||
(f) => f.path === 'trueref.json' || f.path === 'context7.json'
|
||||
);
|
||||
const parsedConfig = configFile
|
||||
? resolveConfig([{ filename: configFile.path, content: configFile.content }])
|
||||
: null;
|
||||
const excludeFiles: string[] = parsedConfig?.config.excludeFiles ?? [];
|
||||
|
||||
// Filter out excluded files before diff computation.
|
||||
const filteredFiles =
|
||||
excludeFiles.length > 0
|
||||
? crawlResult.files.filter(
|
||||
(f) => !excludeFiles.some((pattern) => IndexingPipeline.matchesExcludePattern(f.path, pattern))
|
||||
)
|
||||
: crawlResult.files;
|
||||
|
||||
const totalFiles = filteredFiles.length;
|
||||
this.updateJob(job.id, { totalFiles });
|
||||
|
||||
// ---- Stage 2: Parse & diff ------------------------------------------
|
||||
// Load all existing documents for this repo so computeDiff can
|
||||
// classify every crawled file and detect deletions.
|
||||
const existingDocs = this.getExistingDocuments(repo.id, normJob.versionId);
|
||||
const diff = computeDiff(crawlResult.files, existingDocs);
|
||||
const diff = computeDiff(filteredFiles, existingDocs);
|
||||
|
||||
// Accumulate new documents/snippets; skip unchanged files.
|
||||
const newDocuments: NewDocument[] = [];
|
||||
@@ -244,6 +262,16 @@ export class IndexingPipeline {
|
||||
});
|
||||
}
|
||||
|
||||
// ---- Stage 6: Persist rules from config ----------------------------
|
||||
if (parsedConfig?.config.rules?.length) {
|
||||
// Repo-wide rules (versionId = null).
|
||||
this.upsertRepoConfig(repo.id, null, parsedConfig.config.rules);
|
||||
// Version-specific rules stored separately when indexing a version.
|
||||
if (normJob.versionId) {
|
||||
this.upsertRepoConfig(repo.id, normJob.versionId, parsedConfig.config.rules);
|
||||
}
|
||||
}
|
||||
|
||||
this.updateJob(job.id, {
|
||||
status: 'done',
|
||||
progress: 100,
|
||||
@@ -476,6 +504,65 @@ export class IndexingPipeline {
|
||||
const values = [...Object.values(fields), id];
|
||||
this.db.prepare(`UPDATE repository_versions SET ${sets} WHERE id = ?`).run(...values);
|
||||
}
|
||||
|
||||
private upsertRepoConfig(
|
||||
repositoryId: string,
|
||||
versionId: string | null,
|
||||
rules: string[]
|
||||
): void {
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
// Use DELETE + INSERT because ON CONFLICT … DO UPDATE doesn't work reliably
|
||||
// with partial unique indexes in all SQLite versions.
|
||||
if (versionId === null) {
|
||||
this.db
|
||||
.prepare(
|
||||
`DELETE FROM repository_configs WHERE repository_id = ? AND version_id IS NULL`
|
||||
)
|
||||
.run(repositoryId);
|
||||
} else {
|
||||
this.db
|
||||
.prepare(
|
||||
`DELETE FROM repository_configs WHERE repository_id = ? AND version_id = ?`
|
||||
)
|
||||
.run(repositoryId, versionId);
|
||||
}
|
||||
this.db
|
||||
.prepare(
|
||||
`INSERT INTO repository_configs (repository_id, version_id, rules, updated_at)
|
||||
VALUES (?, ?, ?, ?)`
|
||||
)
|
||||
.run(repositoryId, versionId, JSON.stringify(rules), now);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Private — static helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns true when `filePath` matches the given exclude `pattern`.
|
||||
*
|
||||
* Supported patterns:
|
||||
* - Plain filename: `migration-guide.md` matches any path ending in `/migration-guide.md`
|
||||
* or equal to `migration-guide.md`.
|
||||
* - Glob prefix with wildcard: `docs/migration*` matches paths that start with `docs/migration`.
|
||||
* - Exact path: `src/legacy/old-api.ts` matches exactly that path.
|
||||
*/
|
||||
private static matchesExcludePattern(filePath: string, pattern: string): boolean {
|
||||
if (pattern.includes('*')) {
|
||||
// Glob-style: treat everything before the '*' as a required prefix.
|
||||
const prefix = pattern.slice(0, pattern.indexOf('*'));
|
||||
return filePath.startsWith(prefix);
|
||||
}
|
||||
|
||||
// No wildcard — treat as plain name or exact path.
|
||||
if (!pattern.includes('/')) {
|
||||
// Plain filename: match basename (path ends with /<pattern> or equals pattern).
|
||||
return filePath === pattern || filePath.endsWith('/' + pattern);
|
||||
}
|
||||
|
||||
// Contains a slash — exact path match.
|
||||
return filePath === pattern;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user