fix(MULTIVERSION-0001): fix version isolation, 404 on unknown version, commit-hash lookup, and searchModeUsed
Bug 1: Thread version tag from run() into crawl() via getVersionTag() helper so
LocalCrawler and GithubCrawler receive the correct ref when indexing a named
version instead of always crawling HEAD.
Bug 2: Return HTTP 404 with code VERSION_NOT_FOUND when a requested version tag
is not found in repository_versions, instead of silently falling back to a
cross-version mixed result set.
Bug 4: Before returning 404, attempt a commit_hash prefix match (min 7 chars)
so callers can request a version by full or short SHA.
Bug 3: Change HybridSearchService.search() to return
{ results, searchModeUsed } and propagate searchModeUsed through
ContextResponseMetadata and ContextJsonResponseDto so callers can see which
strategy (keyword / semantic / hybrid / keyword_fallback) was actually used.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -143,6 +143,9 @@ export function formatContextTxt(
|
||||
}
|
||||
|
||||
noResults.push(`Result count: ${metadata?.resultCount ?? 0}`);
|
||||
if (metadata?.searchModeUsed) {
|
||||
noResults.push(`Search mode: ${metadata.searchModeUsed}`);
|
||||
}
|
||||
parts.push(noResults.join('\n'));
|
||||
|
||||
return parts.join('\n\n');
|
||||
|
||||
@@ -15,6 +15,7 @@ import { LibrarySearchResult, SnippetSearchResult } from '$lib/server/models/sea
|
||||
export interface ContextResponseMetadata {
|
||||
localSource: boolean;
|
||||
resultCount: number;
|
||||
searchModeUsed: string;
|
||||
repository: {
|
||||
id: string;
|
||||
title: string;
|
||||
@@ -130,7 +131,8 @@ export class ContextResponseMapper {
|
||||
id: metadata.version.id
|
||||
})
|
||||
: null,
|
||||
resultCount: metadata?.resultCount ?? snippets.length
|
||||
resultCount: metadata?.resultCount ?? snippets.length,
|
||||
searchModeUsed: metadata?.searchModeUsed ?? 'keyword'
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,6 +173,7 @@ export class ContextJsonResponseDto {
|
||||
repository: ContextRepositoryJsonDto | null;
|
||||
version: ContextVersionJsonDto | null;
|
||||
resultCount: number;
|
||||
searchModeUsed: string;
|
||||
|
||||
constructor(props: ContextJsonResponseDto) {
|
||||
this.snippets = props.snippets;
|
||||
@@ -182,5 +183,6 @@ export class ContextJsonResponseDto {
|
||||
this.repository = props.repository;
|
||||
this.version = props.version;
|
||||
this.resultCount = props.resultCount;
|
||||
this.searchModeUsed = props.searchModeUsed;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -729,4 +729,46 @@ describe('IndexingPipeline', () => {
|
||||
// State should remain 'pending' — pipeline with no versionId must not touch it
|
||||
expect(version.state).toBe('pending');
|
||||
});
|
||||
|
||||
it('calls LocalCrawler with ref=v1.2.0 when job has a versionId with tag v1.2.0', async () => {
|
||||
const versionId = insertVersion(db, { tag: 'v1.2.0', state: 'pending' });
|
||||
|
||||
const crawl = vi.fn().mockResolvedValue({
|
||||
files: [],
|
||||
totalFiles: 0,
|
||||
skippedFiles: 0,
|
||||
branch: 'main',
|
||||
commitSha: 'abc'
|
||||
});
|
||||
|
||||
const pipeline = new IndexingPipeline(db, vi.fn() as never, { crawl } as never, null);
|
||||
const job = makeJob('/test/repo', versionId);
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
expect(crawl).toHaveBeenCalledWith({
|
||||
rootPath: '/tmp/test-repo',
|
||||
ref: 'v1.2.0'
|
||||
});
|
||||
});
|
||||
|
||||
it('calls LocalCrawler with ref=undefined when job has no versionId (main-branch)', async () => {
|
||||
const crawl = vi.fn().mockResolvedValue({
|
||||
files: [],
|
||||
totalFiles: 0,
|
||||
skippedFiles: 0,
|
||||
branch: 'main',
|
||||
commitSha: 'abc'
|
||||
});
|
||||
|
||||
const pipeline = new IndexingPipeline(db, vi.fn() as never, { crawl } as never, null);
|
||||
const job = makeJob('/test/repo'); // no versionId
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
expect(crawl).toHaveBeenCalledWith({
|
||||
rootPath: '/tmp/test-repo',
|
||||
ref: undefined
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -95,7 +95,10 @@ export class IndexingPipeline {
|
||||
}
|
||||
|
||||
// ---- Stage 1: Crawl -------------------------------------------------
|
||||
const crawlResult = await this.crawl(repo);
|
||||
const versionTag = normJob.versionId
|
||||
? this.getVersionTag(normJob.versionId)
|
||||
: undefined;
|
||||
const crawlResult = await this.crawl(repo, versionTag);
|
||||
const totalFiles = crawlResult.totalFiles;
|
||||
|
||||
this.updateJob(job.id, { totalFiles });
|
||||
@@ -270,7 +273,7 @@ export class IndexingPipeline {
|
||||
// Private — crawl
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private async crawl(repo: Repository): Promise<{
|
||||
private async crawl(repo: Repository, ref?: string): Promise<{
|
||||
files: Array<{ path: string; content: string; sha: string; size: number; language: string }>;
|
||||
totalFiles: number;
|
||||
}> {
|
||||
@@ -287,7 +290,7 @@ export class IndexingPipeline {
|
||||
const result = await this.githubCrawl({
|
||||
owner,
|
||||
repo: repoName,
|
||||
ref: repo.branch ?? undefined,
|
||||
ref: ref ?? repo.branch ?? undefined,
|
||||
token: repo.githubToken ?? undefined
|
||||
});
|
||||
|
||||
@@ -296,13 +299,20 @@ export class IndexingPipeline {
|
||||
// Local filesystem crawl.
|
||||
const result = await this.localCrawler.crawl({
|
||||
rootPath: repo.sourceUrl,
|
||||
ref: repo.branch !== 'main' ? (repo.branch ?? undefined) : undefined
|
||||
ref: ref ?? (repo.branch !== 'main' ? (repo.branch ?? undefined) : undefined)
|
||||
});
|
||||
|
||||
return { files: result.files, totalFiles: result.totalFiles };
|
||||
}
|
||||
}
|
||||
|
||||
private getVersionTag(versionId: string): string | undefined {
|
||||
const row = this.db
|
||||
.prepare<[string], { tag: string }>(`SELECT tag FROM repository_versions WHERE id = ?`)
|
||||
.get(versionId);
|
||||
return row?.tag;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Private — atomic snippet replacement
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@@ -395,7 +395,7 @@ describe('HybridSearchService', () => {
|
||||
seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'hello world' });
|
||||
|
||||
const svc = new HybridSearchService(client, searchService, null);
|
||||
const results = await svc.search('hello', { repositoryId: repoId });
|
||||
const { results } = await svc.search('hello', { repositoryId: repoId });
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
expect(results[0].snippet.content).toBe('hello world');
|
||||
@@ -406,14 +406,14 @@ describe('HybridSearchService', () => {
|
||||
|
||||
const provider = makeMockProvider([[1, 0]]);
|
||||
const svc = new HybridSearchService(client, searchService, provider);
|
||||
const results = await svc.search('alpha zero', { repositoryId: repoId, alpha: 0 });
|
||||
const { results } = await svc.search('alpha zero', { repositoryId: repoId, alpha: 0 });
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('returns empty array when FTS5 query is blank and no provider', async () => {
|
||||
const svc = new HybridSearchService(client, searchService, null);
|
||||
const results = await svc.search(' ', { repositoryId: repoId });
|
||||
const { results } = await svc.search(' ', { repositoryId: repoId });
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
|
||||
@@ -425,7 +425,7 @@ describe('HybridSearchService', () => {
|
||||
});
|
||||
|
||||
const svc = new HybridSearchService(client, searchService, makeNoopProvider());
|
||||
const results = await svc.search('noop fallback', { repositoryId: repoId });
|
||||
const { results } = await svc.search('noop fallback', { repositoryId: repoId });
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
});
|
||||
@@ -445,7 +445,7 @@ describe('HybridSearchService', () => {
|
||||
const provider = makeMockProvider([[1, 0, 0, 0]]);
|
||||
const svc = new HybridSearchService(client, searchService, provider);
|
||||
|
||||
const results = await svc.search('hybrid search', {
|
||||
const { results } = await svc.search('hybrid search', {
|
||||
repositoryId: repoId,
|
||||
alpha: 0.5
|
||||
});
|
||||
@@ -464,7 +464,7 @@ describe('HybridSearchService', () => {
|
||||
const provider = makeMockProvider([[1, 0]]);
|
||||
const svc = new HybridSearchService(client, searchService, provider);
|
||||
|
||||
const results = await svc.search('deduplicate snippet', {
|
||||
const { results } = await svc.search('deduplicate snippet', {
|
||||
repositoryId: repoId,
|
||||
alpha: 0.5
|
||||
});
|
||||
@@ -487,7 +487,7 @@ describe('HybridSearchService', () => {
|
||||
const provider = makeMockProvider([[1, 0]]);
|
||||
const svc = new HybridSearchService(client, searchService, provider);
|
||||
|
||||
const results = await svc.search('pagination test', {
|
||||
const { results } = await svc.search('pagination test', {
|
||||
repositoryId: repoId,
|
||||
limit: 3,
|
||||
alpha: 0.5
|
||||
@@ -519,7 +519,7 @@ describe('HybridSearchService', () => {
|
||||
const provider = makeMockProvider([[1, 0]]);
|
||||
const svc = new HybridSearchService(client, searchService, provider);
|
||||
|
||||
const results = await svc.search('anything', {
|
||||
const { results } = await svc.search('anything', {
|
||||
repositoryId: repoId,
|
||||
alpha: 1
|
||||
});
|
||||
@@ -543,7 +543,7 @@ describe('HybridSearchService', () => {
|
||||
const provider = makeMockProvider([[1, 0]]);
|
||||
const svc = new HybridSearchService(client, searchService, provider);
|
||||
|
||||
const results = await svc.search('metadata check', {
|
||||
const { results } = await svc.search('metadata check', {
|
||||
repositoryId: repoId,
|
||||
alpha: 0.5
|
||||
});
|
||||
@@ -580,7 +580,7 @@ describe('HybridSearchService', () => {
|
||||
const provider = makeMockProvider([[1, 0]]);
|
||||
const svc = new HybridSearchService(client, searchService, provider);
|
||||
|
||||
const results = await svc.search('repository keyword', {
|
||||
const { results } = await svc.search('repository keyword', {
|
||||
repositoryId: repoId,
|
||||
alpha: 0.5
|
||||
});
|
||||
@@ -607,7 +607,7 @@ describe('HybridSearchService', () => {
|
||||
const provider = makeMockProvider([[1, 0]]);
|
||||
const svc = new HybridSearchService(client, searchService, provider);
|
||||
|
||||
const codeResults = await svc.search('function example', {
|
||||
const { results: codeResults } = await svc.search('function example', {
|
||||
repositoryId: repoId,
|
||||
type: 'code',
|
||||
alpha: 0.5
|
||||
@@ -632,7 +632,7 @@ describe('HybridSearchService', () => {
|
||||
const svc = new HybridSearchService(client, searchService, provider);
|
||||
|
||||
// Should not throw and should return results.
|
||||
const results = await svc.search('default alpha hybrid', { repositoryId: repoId });
|
||||
const { results } = await svc.search('default alpha hybrid', { repositoryId: repoId });
|
||||
expect(Array.isArray(results)).toBe(true);
|
||||
});
|
||||
|
||||
@@ -761,7 +761,7 @@ describe('HybridSearchService', () => {
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
const results = await hybridService.search('keyword', {
|
||||
const { results } = await hybridService.search('keyword', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'keyword'
|
||||
});
|
||||
@@ -820,7 +820,7 @@ describe('HybridSearchService', () => {
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
const results = await hybridService.search('semantic', {
|
||||
const { results } = await hybridService.search('semantic', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'semantic',
|
||||
profileId: 'test-profile'
|
||||
@@ -848,7 +848,7 @@ describe('HybridSearchService', () => {
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, null);
|
||||
|
||||
const results = await hybridService.search('test query', {
|
||||
const { results } = await hybridService.search('test query', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'semantic'
|
||||
});
|
||||
@@ -867,7 +867,7 @@ describe('HybridSearchService', () => {
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
const results = await hybridService.search(' ', {
|
||||
const { results } = await hybridService.search(' ', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'semantic'
|
||||
});
|
||||
@@ -885,7 +885,7 @@ describe('HybridSearchService', () => {
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, noopProvider);
|
||||
|
||||
const results = await hybridService.search('test query', {
|
||||
const { results } = await hybridService.search('test query', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'semantic'
|
||||
});
|
||||
@@ -951,7 +951,7 @@ describe('HybridSearchService', () => {
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
// Query with heavy punctuation that preprocesses to nothing.
|
||||
const results = await hybridService.search('!!!@@@###', {
|
||||
const { results } = await hybridService.search('!!!@@@###', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'auto',
|
||||
profileId: 'test-profile'
|
||||
@@ -978,7 +978,7 @@ describe('HybridSearchService', () => {
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
const results = await hybridService.search('hello', {
|
||||
const { results } = await hybridService.search('hello', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'auto'
|
||||
});
|
||||
@@ -1038,7 +1038,7 @@ describe('HybridSearchService', () => {
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
// Query that won't match through FTS after punctuation normalization.
|
||||
const results = await hybridService.search('%%%vector%%%', {
|
||||
const { results } = await hybridService.search('%%%vector%%%', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'hybrid',
|
||||
alpha: 0.5,
|
||||
@@ -1064,7 +1064,7 @@ describe('HybridSearchService', () => {
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, null);
|
||||
|
||||
const results = await hybridService.search('!!!@@@###$$$', {
|
||||
const { results } = await hybridService.search('!!!@@@###$$$', {
|
||||
repositoryId: repoId
|
||||
});
|
||||
|
||||
|
||||
@@ -101,9 +101,12 @@ export class HybridSearchService {
|
||||
*
|
||||
* @param query - Raw search string (preprocessing handled by SearchService).
|
||||
* @param options - Search parameters including repositoryId and alpha blend.
|
||||
* @returns Ranked array of SnippetSearchResult, deduplicated by snippet ID.
|
||||
* @returns Object with ranked results array and the search mode actually used.
|
||||
*/
|
||||
async search(query: string, options: HybridSearchOptions): Promise<SnippetSearchResult[]> {
|
||||
async search(
|
||||
query: string,
|
||||
options: HybridSearchOptions
|
||||
): Promise<{ results: SnippetSearchResult[]; searchModeUsed: string }> {
|
||||
const limit = options.limit ?? 20;
|
||||
const mode = options.searchMode ?? 'auto';
|
||||
|
||||
@@ -127,12 +130,12 @@ export class HybridSearchService {
|
||||
// Semantic mode: skip FTS entirely and use vector search only.
|
||||
if (mode === 'semantic') {
|
||||
if (!this.embeddingProvider || !query.trim()) {
|
||||
return [];
|
||||
return { results: [], searchModeUsed: 'semantic' };
|
||||
}
|
||||
|
||||
const embeddings = await this.embeddingProvider.embed([query]);
|
||||
if (embeddings.length === 0) {
|
||||
return [];
|
||||
return { results: [], searchModeUsed: 'semantic' };
|
||||
}
|
||||
|
||||
const queryEmbedding = embeddings[0].values;
|
||||
@@ -144,7 +147,10 @@ export class HybridSearchService {
|
||||
});
|
||||
|
||||
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||
return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
searchModeUsed: 'semantic'
|
||||
};
|
||||
}
|
||||
|
||||
// FTS5 mode (keyword) or hybrid/auto modes: try FTS first.
|
||||
@@ -157,7 +163,7 @@ export class HybridSearchService {
|
||||
|
||||
// Degenerate cases: no provider or pure FTS5 mode.
|
||||
if (!this.embeddingProvider || alpha === 0) {
|
||||
return ftsResults.slice(0, limit);
|
||||
return { results: ftsResults.slice(0, limit), searchModeUsed: 'keyword' };
|
||||
}
|
||||
|
||||
// For auto/hybrid modes: if FTS yielded results, use them; otherwise try vector.
|
||||
@@ -168,14 +174,14 @@ export class HybridSearchService {
|
||||
// No FTS results: try vector search as a fallback in auto/hybrid modes.
|
||||
if (!query.trim()) {
|
||||
// Query is empty; no point embedding it.
|
||||
return [];
|
||||
return { results: [], searchModeUsed: 'keyword_fallback' };
|
||||
}
|
||||
|
||||
const embeddings = await this.embeddingProvider.embed([query]);
|
||||
|
||||
// If provider fails (Noop returns empty array), we're done.
|
||||
if (embeddings.length === 0) {
|
||||
return [];
|
||||
return { results: [], searchModeUsed: 'keyword_fallback' };
|
||||
}
|
||||
|
||||
const queryEmbedding = embeddings[0].values;
|
||||
@@ -187,7 +193,10 @@ export class HybridSearchService {
|
||||
});
|
||||
|
||||
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||
return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
searchModeUsed: 'keyword_fallback'
|
||||
};
|
||||
}
|
||||
|
||||
// FTS has results: use RRF to blend with vector search (if alpha < 1).
|
||||
@@ -195,7 +204,7 @@ export class HybridSearchService {
|
||||
|
||||
// Provider may be a Noop (returns empty array) — fall back to FTS gracefully.
|
||||
if (embeddings.length === 0) {
|
||||
return ftsResults.slice(0, limit);
|
||||
return { results: ftsResults.slice(0, limit), searchModeUsed: 'keyword' };
|
||||
}
|
||||
|
||||
const queryEmbedding = embeddings[0].values;
|
||||
@@ -210,7 +219,10 @@ export class HybridSearchService {
|
||||
// Pure vector mode: skip RRF and return vector results directly.
|
||||
if (alpha === 1) {
|
||||
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||
return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
searchModeUsed: 'semantic'
|
||||
};
|
||||
}
|
||||
|
||||
// Build ranked lists for RRF. Score field is unused by RRF — only
|
||||
@@ -221,7 +233,10 @@ export class HybridSearchService {
|
||||
const fused = reciprocalRankFusion(ftsRanked, vecRanked);
|
||||
|
||||
const topIds = fused.slice(0, limit).map((r) => r.id);
|
||||
return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
|
||||
return {
|
||||
results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type),
|
||||
searchModeUsed: 'hybrid'
|
||||
};
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user