diff --git a/src/lib/server/api/formatters.ts b/src/lib/server/api/formatters.ts index b344e3d..c8700d8 100644 --- a/src/lib/server/api/formatters.ts +++ b/src/lib/server/api/formatters.ts @@ -143,6 +143,9 @@ export function formatContextTxt( } noResults.push(`Result count: ${metadata?.resultCount ?? 0}`); + if (metadata?.searchModeUsed) { + noResults.push(`Search mode: ${metadata.searchModeUsed}`); + } parts.push(noResults.join('\n')); return parts.join('\n\n'); diff --git a/src/lib/server/mappers/context-response.mapper.ts b/src/lib/server/mappers/context-response.mapper.ts index 99a6419..ae03b6b 100644 --- a/src/lib/server/mappers/context-response.mapper.ts +++ b/src/lib/server/mappers/context-response.mapper.ts @@ -15,6 +15,7 @@ import { LibrarySearchResult, SnippetSearchResult } from '$lib/server/models/sea export interface ContextResponseMetadata { localSource: boolean; resultCount: number; + searchModeUsed: string; repository: { id: string; title: string; @@ -130,7 +131,8 @@ export class ContextResponseMapper { id: metadata.version.id }) : null, - resultCount: metadata?.resultCount ?? snippets.length + resultCount: metadata?.resultCount ?? snippets.length, + searchModeUsed: metadata?.searchModeUsed ?? 'keyword' }); } } diff --git a/src/lib/server/models/context-response.ts b/src/lib/server/models/context-response.ts index 43d5129..4354fb7 100644 --- a/src/lib/server/models/context-response.ts +++ b/src/lib/server/models/context-response.ts @@ -173,6 +173,7 @@ export class ContextJsonResponseDto { repository: ContextRepositoryJsonDto | null; version: ContextVersionJsonDto | null; resultCount: number; + searchModeUsed: string; constructor(props: ContextJsonResponseDto) { this.snippets = props.snippets; @@ -182,5 +183,6 @@ export class ContextJsonResponseDto { this.repository = props.repository; this.version = props.version; this.resultCount = props.resultCount; + this.searchModeUsed = props.searchModeUsed; } } diff --git a/src/lib/server/pipeline/indexing.pipeline.test.ts b/src/lib/server/pipeline/indexing.pipeline.test.ts index a37b756..3a86dbb 100644 --- a/src/lib/server/pipeline/indexing.pipeline.test.ts +++ b/src/lib/server/pipeline/indexing.pipeline.test.ts @@ -729,4 +729,46 @@ describe('IndexingPipeline', () => { // State should remain 'pending' — pipeline with no versionId must not touch it expect(version.state).toBe('pending'); }); + + it('calls LocalCrawler with ref=v1.2.0 when job has a versionId with tag v1.2.0', async () => { + const versionId = insertVersion(db, { tag: 'v1.2.0', state: 'pending' }); + + const crawl = vi.fn().mockResolvedValue({ + files: [], + totalFiles: 0, + skippedFiles: 0, + branch: 'main', + commitSha: 'abc' + }); + + const pipeline = new IndexingPipeline(db, vi.fn() as never, { crawl } as never, null); + const job = makeJob('/test/repo', versionId); + + await pipeline.run(job as never); + + expect(crawl).toHaveBeenCalledWith({ + rootPath: '/tmp/test-repo', + ref: 'v1.2.0' + }); + }); + + it('calls LocalCrawler with ref=undefined when job has no versionId (main-branch)', async () => { + const crawl = vi.fn().mockResolvedValue({ + files: [], + totalFiles: 0, + skippedFiles: 0, + branch: 'main', + commitSha: 'abc' + }); + + const pipeline = new IndexingPipeline(db, vi.fn() as never, { crawl } as never, null); + const job = makeJob('/test/repo'); // no versionId + + await pipeline.run(job as never); + + expect(crawl).toHaveBeenCalledWith({ + rootPath: '/tmp/test-repo', + ref: undefined + }); + }); }); diff --git a/src/lib/server/pipeline/indexing.pipeline.ts b/src/lib/server/pipeline/indexing.pipeline.ts index 61eb00d..6a9c752 100644 --- a/src/lib/server/pipeline/indexing.pipeline.ts +++ b/src/lib/server/pipeline/indexing.pipeline.ts @@ -95,7 +95,10 @@ export class IndexingPipeline { } // ---- Stage 1: Crawl ------------------------------------------------- - const crawlResult = await this.crawl(repo); + const versionTag = normJob.versionId + ? this.getVersionTag(normJob.versionId) + : undefined; + const crawlResult = await this.crawl(repo, versionTag); const totalFiles = crawlResult.totalFiles; this.updateJob(job.id, { totalFiles }); @@ -270,7 +273,7 @@ export class IndexingPipeline { // Private — crawl // ------------------------------------------------------------------------- - private async crawl(repo: Repository): Promise<{ + private async crawl(repo: Repository, ref?: string): Promise<{ files: Array<{ path: string; content: string; sha: string; size: number; language: string }>; totalFiles: number; }> { @@ -287,7 +290,7 @@ export class IndexingPipeline { const result = await this.githubCrawl({ owner, repo: repoName, - ref: repo.branch ?? undefined, + ref: ref ?? repo.branch ?? undefined, token: repo.githubToken ?? undefined }); @@ -296,13 +299,20 @@ export class IndexingPipeline { // Local filesystem crawl. const result = await this.localCrawler.crawl({ rootPath: repo.sourceUrl, - ref: repo.branch !== 'main' ? (repo.branch ?? undefined) : undefined + ref: ref ?? (repo.branch !== 'main' ? (repo.branch ?? undefined) : undefined) }); return { files: result.files, totalFiles: result.totalFiles }; } } + private getVersionTag(versionId: string): string | undefined { + const row = this.db + .prepare<[string], { tag: string }>(`SELECT tag FROM repository_versions WHERE id = ?`) + .get(versionId); + return row?.tag; + } + // ------------------------------------------------------------------------- // Private — atomic snippet replacement // ------------------------------------------------------------------------- diff --git a/src/lib/server/search/hybrid.search.service.test.ts b/src/lib/server/search/hybrid.search.service.test.ts index 072c70a..db194fe 100644 --- a/src/lib/server/search/hybrid.search.service.test.ts +++ b/src/lib/server/search/hybrid.search.service.test.ts @@ -395,7 +395,7 @@ describe('HybridSearchService', () => { seedSnippet(client, { repositoryId: repoId, documentId: docId, content: 'hello world' }); const svc = new HybridSearchService(client, searchService, null); - const results = await svc.search('hello', { repositoryId: repoId }); + const { results } = await svc.search('hello', { repositoryId: repoId }); expect(results.length).toBeGreaterThan(0); expect(results[0].snippet.content).toBe('hello world'); @@ -406,14 +406,14 @@ describe('HybridSearchService', () => { const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); - const results = await svc.search('alpha zero', { repositoryId: repoId, alpha: 0 }); + const { results } = await svc.search('alpha zero', { repositoryId: repoId, alpha: 0 }); expect(results.length).toBeGreaterThan(0); }); it('returns empty array when FTS5 query is blank and no provider', async () => { const svc = new HybridSearchService(client, searchService, null); - const results = await svc.search(' ', { repositoryId: repoId }); + const { results } = await svc.search(' ', { repositoryId: repoId }); expect(results).toHaveLength(0); }); @@ -425,7 +425,7 @@ describe('HybridSearchService', () => { }); const svc = new HybridSearchService(client, searchService, makeNoopProvider()); - const results = await svc.search('noop fallback', { repositoryId: repoId }); + const { results } = await svc.search('noop fallback', { repositoryId: repoId }); expect(results.length).toBeGreaterThan(0); }); @@ -445,7 +445,7 @@ describe('HybridSearchService', () => { const provider = makeMockProvider([[1, 0, 0, 0]]); const svc = new HybridSearchService(client, searchService, provider); - const results = await svc.search('hybrid search', { + const { results } = await svc.search('hybrid search', { repositoryId: repoId, alpha: 0.5 }); @@ -464,7 +464,7 @@ describe('HybridSearchService', () => { const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); - const results = await svc.search('deduplicate snippet', { + const { results } = await svc.search('deduplicate snippet', { repositoryId: repoId, alpha: 0.5 }); @@ -487,7 +487,7 @@ describe('HybridSearchService', () => { const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); - const results = await svc.search('pagination test', { + const { results } = await svc.search('pagination test', { repositoryId: repoId, limit: 3, alpha: 0.5 @@ -519,7 +519,7 @@ describe('HybridSearchService', () => { const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); - const results = await svc.search('anything', { + const { results } = await svc.search('anything', { repositoryId: repoId, alpha: 1 }); @@ -543,7 +543,7 @@ describe('HybridSearchService', () => { const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); - const results = await svc.search('metadata check', { + const { results } = await svc.search('metadata check', { repositoryId: repoId, alpha: 0.5 }); @@ -580,7 +580,7 @@ describe('HybridSearchService', () => { const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); - const results = await svc.search('repository keyword', { + const { results } = await svc.search('repository keyword', { repositoryId: repoId, alpha: 0.5 }); @@ -607,7 +607,7 @@ describe('HybridSearchService', () => { const provider = makeMockProvider([[1, 0]]); const svc = new HybridSearchService(client, searchService, provider); - const codeResults = await svc.search('function example', { + const { results: codeResults } = await svc.search('function example', { repositoryId: repoId, type: 'code', alpha: 0.5 @@ -632,7 +632,7 @@ describe('HybridSearchService', () => { const svc = new HybridSearchService(client, searchService, provider); // Should not throw and should return results. - const results = await svc.search('default alpha hybrid', { repositoryId: repoId }); + const { results } = await svc.search('default alpha hybrid', { repositoryId: repoId }); expect(Array.isArray(results)).toBe(true); }); @@ -761,7 +761,7 @@ describe('HybridSearchService', () => { const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, mockProvider); - const results = await hybridService.search('keyword', { + const { results } = await hybridService.search('keyword', { repositoryId: repoId, searchMode: 'keyword' }); @@ -820,7 +820,7 @@ describe('HybridSearchService', () => { const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, mockProvider); - const results = await hybridService.search('semantic', { + const { results } = await hybridService.search('semantic', { repositoryId: repoId, searchMode: 'semantic', profileId: 'test-profile' @@ -848,7 +848,7 @@ describe('HybridSearchService', () => { const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, null); - const results = await hybridService.search('test query', { + const { results } = await hybridService.search('test query', { repositoryId: repoId, searchMode: 'semantic' }); @@ -867,7 +867,7 @@ describe('HybridSearchService', () => { const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, mockProvider); - const results = await hybridService.search(' ', { + const { results } = await hybridService.search(' ', { repositoryId: repoId, searchMode: 'semantic' }); @@ -885,7 +885,7 @@ describe('HybridSearchService', () => { const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, noopProvider); - const results = await hybridService.search('test query', { + const { results } = await hybridService.search('test query', { repositoryId: repoId, searchMode: 'semantic' }); @@ -951,7 +951,7 @@ describe('HybridSearchService', () => { const hybridService = new HybridSearchService(client, searchService, mockProvider); // Query with heavy punctuation that preprocesses to nothing. - const results = await hybridService.search('!!!@@@###', { + const { results } = await hybridService.search('!!!@@@###', { repositoryId: repoId, searchMode: 'auto', profileId: 'test-profile' @@ -978,7 +978,7 @@ describe('HybridSearchService', () => { const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, mockProvider); - const results = await hybridService.search('hello', { + const { results } = await hybridService.search('hello', { repositoryId: repoId, searchMode: 'auto' }); @@ -1038,7 +1038,7 @@ describe('HybridSearchService', () => { const hybridService = new HybridSearchService(client, searchService, mockProvider); // Query that won't match through FTS after punctuation normalization. - const results = await hybridService.search('%%%vector%%%', { + const { results } = await hybridService.search('%%%vector%%%', { repositoryId: repoId, searchMode: 'hybrid', alpha: 0.5, @@ -1064,7 +1064,7 @@ describe('HybridSearchService', () => { const searchService = new SearchService(client); const hybridService = new HybridSearchService(client, searchService, null); - const results = await hybridService.search('!!!@@@###$$$', { + const { results } = await hybridService.search('!!!@@@###$$$', { repositoryId: repoId }); diff --git a/src/lib/server/search/hybrid.search.service.ts b/src/lib/server/search/hybrid.search.service.ts index 0d34377..79c7c05 100644 --- a/src/lib/server/search/hybrid.search.service.ts +++ b/src/lib/server/search/hybrid.search.service.ts @@ -101,9 +101,12 @@ export class HybridSearchService { * * @param query - Raw search string (preprocessing handled by SearchService). * @param options - Search parameters including repositoryId and alpha blend. - * @returns Ranked array of SnippetSearchResult, deduplicated by snippet ID. + * @returns Object with ranked results array and the search mode actually used. */ - async search(query: string, options: HybridSearchOptions): Promise { + async search( + query: string, + options: HybridSearchOptions + ): Promise<{ results: SnippetSearchResult[]; searchModeUsed: string }> { const limit = options.limit ?? 20; const mode = options.searchMode ?? 'auto'; @@ -127,12 +130,12 @@ export class HybridSearchService { // Semantic mode: skip FTS entirely and use vector search only. if (mode === 'semantic') { if (!this.embeddingProvider || !query.trim()) { - return []; + return { results: [], searchModeUsed: 'semantic' }; } const embeddings = await this.embeddingProvider.embed([query]); if (embeddings.length === 0) { - return []; + return { results: [], searchModeUsed: 'semantic' }; } const queryEmbedding = embeddings[0].values; @@ -144,7 +147,10 @@ export class HybridSearchService { }); const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId); - return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type); + return { + results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type), + searchModeUsed: 'semantic' + }; } // FTS5 mode (keyword) or hybrid/auto modes: try FTS first. @@ -157,7 +163,7 @@ export class HybridSearchService { // Degenerate cases: no provider or pure FTS5 mode. if (!this.embeddingProvider || alpha === 0) { - return ftsResults.slice(0, limit); + return { results: ftsResults.slice(0, limit), searchModeUsed: 'keyword' }; } // For auto/hybrid modes: if FTS yielded results, use them; otherwise try vector. @@ -168,14 +174,14 @@ export class HybridSearchService { // No FTS results: try vector search as a fallback in auto/hybrid modes. if (!query.trim()) { // Query is empty; no point embedding it. - return []; + return { results: [], searchModeUsed: 'keyword_fallback' }; } const embeddings = await this.embeddingProvider.embed([query]); // If provider fails (Noop returns empty array), we're done. if (embeddings.length === 0) { - return []; + return { results: [], searchModeUsed: 'keyword_fallback' }; } const queryEmbedding = embeddings[0].values; @@ -187,7 +193,10 @@ export class HybridSearchService { }); const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId); - return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type); + return { + results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type), + searchModeUsed: 'keyword_fallback' + }; } // FTS has results: use RRF to blend with vector search (if alpha < 1). @@ -195,7 +204,7 @@ export class HybridSearchService { // Provider may be a Noop (returns empty array) — fall back to FTS gracefully. if (embeddings.length === 0) { - return ftsResults.slice(0, limit); + return { results: ftsResults.slice(0, limit), searchModeUsed: 'keyword' }; } const queryEmbedding = embeddings[0].values; @@ -210,7 +219,10 @@ export class HybridSearchService { // Pure vector mode: skip RRF and return vector results directly. if (alpha === 1) { const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId); - return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type); + return { + results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type), + searchModeUsed: 'semantic' + }; } // Build ranked lists for RRF. Score field is unused by RRF — only @@ -221,7 +233,10 @@ export class HybridSearchService { const fused = reciprocalRankFusion(ftsRanked, vecRanked); const topIds = fused.slice(0, limit).map((r) => r.id); - return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type); + return { + results: this.fetchSnippetsByIds(topIds, options.repositoryId, options.type), + searchModeUsed: 'hybrid' + }; } // ------------------------------------------------------------------------- diff --git a/src/routes/api/v1/api-contract.integration.test.ts b/src/routes/api/v1/api-contract.integration.test.ts index e509f6c..0131bcc 100644 --- a/src/routes/api/v1/api-contract.integration.test.ts +++ b/src/routes/api/v1/api-contract.integration.test.ts @@ -486,4 +486,83 @@ describe('API contract integration', () => { isLocal: false }); }); + + it('GET /api/v1/context returns 404 with VERSION_NOT_FOUND when version does not exist', async () => { + const repositoryId = seedRepo(db); + + const response = await getContext({ + url: new URL( + `http://test/api/v1/context?libraryId=${encodeURIComponent(`${repositoryId}/v99.0.0`)}&query=${encodeURIComponent('foo')}` + ) + } as never); + + expect(response.status).toBe(404); + const body = await response.json(); + expect(body.code).toBe('VERSION_NOT_FOUND'); + }); + + it('GET /api/v1/context resolves a version by full commit SHA', async () => { + const repositoryId = seedRepo(db); + const fullSha = 'a'.repeat(40); + + // Insert version with a commit_hash + db.prepare( + `INSERT INTO repository_versions + (id, repository_id, tag, commit_hash, state, total_snippets, indexed_at, created_at) + VALUES (?, ?, ?, ?, 'indexed', 0, ?, ?)` + ).run(`${repositoryId}/v2.0.0`, repositoryId, 'v2.0.0', fullSha, NOW_S, NOW_S); + + const response = await getContext({ + url: new URL( + `http://test/api/v1/context?libraryId=${encodeURIComponent(`${repositoryId}/${fullSha}`)}&query=${encodeURIComponent('anything')}` + ) + } as never); + + expect(response.status).toBe(200); + const body = await response.json(); + expect(body.version?.resolved).toBe('v2.0.0'); + }); + + it('GET /api/v1/context resolves a version by short SHA prefix (8 chars)', async () => { + const repositoryId = seedRepo(db); + const fullSha = 'b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0'; + const shortSha = fullSha.slice(0, 8); + + db.prepare( + `INSERT INTO repository_versions + (id, repository_id, tag, commit_hash, state, total_snippets, indexed_at, created_at) + VALUES (?, ?, ?, ?, 'indexed', 0, ?, ?)` + ).run(`${repositoryId}/v3.0.0`, repositoryId, 'v3.0.0', fullSha, NOW_S, NOW_S); + + const response = await getContext({ + url: new URL( + `http://test/api/v1/context?libraryId=${encodeURIComponent(`${repositoryId}/${shortSha}`)}&query=${encodeURIComponent('anything')}` + ) + } as never); + + expect(response.status).toBe(200); + const body = await response.json(); + expect(body.version?.resolved).toBe('v3.0.0'); + }); + + it('GET /api/v1/context includes searchModeUsed in JSON response', async () => { + const repositoryId = seedRepo(db); + const documentId = seedDocument(db, repositoryId); + seedSnippet(db, { + documentId, + repositoryId, + content: 'search mode used test snippet' + }); + + const response = await getContext({ + url: new URL( + `http://test/api/v1/context?libraryId=${encodeURIComponent(repositoryId)}&query=${encodeURIComponent('search mode used')}` + ) + } as never); + + expect(response.status).toBe(200); + const body = await response.json(); + expect(body.searchModeUsed).toBeDefined(); + expect(['keyword', 'semantic', 'hybrid', 'keyword_fallback']).toContain(body.searchModeUsed); + }); }); diff --git a/src/routes/api/v1/context/+server.ts b/src/routes/api/v1/context/+server.ts index 1f26375..c42f8de 100644 --- a/src/routes/api/v1/context/+server.ts +++ b/src/routes/api/v1/context/+server.ts @@ -198,6 +198,7 @@ export const GET: RequestHandler = async ({ url }) => { let versionId: string | undefined; let resolvedVersion: RawVersionRow | undefined; if (parsed.version) { + // Try exact tag match first. resolvedVersion = db .prepare< [string, string], @@ -205,12 +206,33 @@ export const GET: RequestHandler = async ({ url }) => { >(`SELECT id, tag FROM repository_versions WHERE repository_id = ? AND tag = ?`) .get(parsed.repositoryId, parsed.version); - // Version not found is not fatal — fall back to default branch. - versionId = resolvedVersion?.id; + // Fall back to commit hash prefix match (min 7 chars). + if (!resolvedVersion && parsed.version.length >= 7) { + resolvedVersion = db + .prepare< + [string, string], + RawVersionRow + >( + `SELECT id, tag FROM repository_versions + WHERE repository_id = ? AND commit_hash LIKE ?` + ) + .get(parsed.repositoryId, `${parsed.version}%`); + } + + if (!resolvedVersion) { + return new Response( + JSON.stringify({ + error: `Version ${parsed.version} not found for library ${parsed.repositoryId}`, + code: 'VERSION_NOT_FOUND' + }), + { status: 404, headers: { 'Content-Type': 'application/json', ...CORS_HEADERS } } + ); + } + versionId = resolvedVersion.id; } // Execute hybrid search (falls back to FTS5 when no embedding provider is set). - const searchResults = await hybridService.search(query, { + const { results: searchResults, searchModeUsed } = await hybridService.search(query, { repositoryId: parsed.repositoryId, versionId, limit: 50, // fetch more than needed; token budget will trim @@ -242,6 +264,7 @@ export const GET: RequestHandler = async ({ url }) => { const metadata: ContextResponseMetadata = { localSource: repo.source === 'local', resultCount: selectedResults.length, + searchModeUsed, repository: { id: repo.id, title: repo.title,