feat(MULTIVERSION-0001): wire trueref.json into pipeline + per-version rules

- Add migration 0003: recreate repository_configs with nullable version_id
  column and two partial unique indexes (repo-wide: version_id IS NULL,
  per-version: (repository_id, version_id) WHERE version_id IS NOT NULL)
- Update schema.ts to reflect the new composite structure with uniqueIndex
  partial constraints via drizzle-orm sql helper
- IndexingPipeline: parse trueref.json / context7.json after crawl, apply
  excludeFiles filter before diff computation, update totalFiles accordingly
- IndexingPipeline: persist repo-wide rules (version_id=null) and
  version-specific rules (when versionId set) via upsertRepoConfig helper
- Add matchesExcludePattern static helper supporting plain filename,
  glob prefix (docs/legacy*), and exact path patterns
- context endpoint: split getRules into repo-wide + version-specific lookup
  with dedup merge; pass versionId at call site
- Update test DB loaders to include migration 0003
- Add pipeline tests for excludeFiles, repo-wide rules persistence, and
  per-version rules persistence
- Add integration tests for merged rules, repo-only rules, and dedup logic

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Giancarmine Salucci
2026-03-28 10:44:30 +01:00
parent 255838dcc0
commit 666ec7d55f
7 changed files with 418 additions and 32 deletions

View File

@@ -54,24 +54,52 @@ interface RawRepoConfig {
rules: string | null;
}
function getRules(db: ReturnType<typeof getClient>, repositoryId: string): string[] {
const row = db
.prepare<
[string],
RawRepoConfig
>(`SELECT rules FROM repository_configs WHERE repository_id = ?`)
.get(repositoryId);
if (!row?.rules) return [];
function parseRulesJson(raw: string | null | undefined): string[] {
if (!raw) return [];
try {
const parsed = JSON.parse(row.rules);
const parsed = JSON.parse(raw);
return Array.isArray(parsed) ? (parsed as string[]) : [];
} catch {
return [];
}
}
function getRules(
db: ReturnType<typeof getClient>,
repositoryId: string,
versionId?: string
): string[] {
// Repo-wide rules (version_id IS NULL).
const repoRow = db
.prepare<
[string],
RawRepoConfig
>(`SELECT rules FROM repository_configs WHERE repository_id = ? AND version_id IS NULL`)
.get(repositoryId);
const repoRules = parseRulesJson(repoRow?.rules);
if (!versionId) return repoRules;
// Version-specific rules.
const versionRow = db
.prepare<
[string, string],
RawRepoConfig
>(`SELECT rules FROM repository_configs WHERE repository_id = ? AND version_id = ?`)
.get(repositoryId, versionId);
const versionRules = parseRulesJson(versionRow?.rules);
// Merge: repo-wide first, then version-specific (deduped by content).
const seen = new Set(repoRules);
const merged = [...repoRules];
for (const r of versionRules) {
if (!seen.has(r)) merged.push(r);
}
return merged;
}
interface RawRepoState {
state: 'pending' | 'indexing' | 'indexed' | 'error';
id: string;
@@ -283,8 +311,8 @@ export const GET: RequestHandler = async ({ url }) => {
snippetVersions
};
// Load rules from repository_configs.
const rules = getRules(db, parsed.repositoryId);
// Load rules from repository_configs (repo-wide + version-specific merged).
const rules = getRules(db, parsed.repositoryId, versionId);
if (responseType === 'txt') {
const text = formatContextTxt(selectedResults, rules, metadata);