fix(FEEDBACK-0001): complete iteration 0 - harden context search

2026-03-27 01:25:46 +01:00
parent e7a2a83cdb
commit 16436bfab2
15 changed files with 1469 additions and 44 deletions
--- a/src/lib/server/search/hybrid.search.service.ts
+++ b/src/lib/server/search/hybrid.search.service.ts
@@ -88,8 +88,16 @@ export class HybridSearchService {
 	/**
 	 * Execute a hybrid search combining FTS5 and (optionally) vector search.
 	 *
-	 * When `embeddingProvider` is null or `alpha` is 0, the method returns
-	 * FTS5 results directly without embedding the query.
+	 * Search modes:
+	 * - 'keyword'  : FTS5-only (alpha = 0)
+	 * - 'semantic' : Vector-only (alpha = 1), skips FTS entirely
+	 * - 'hybrid'   : Balanced RRF fusion (alpha = 0.5 by default)
+	 * - 'auto'     : Auto-selects: semantic if embedding provider available and FTS
+	 *                yields no results on the preprocessed query. Falls back to FTS
+	 *                for punctuation-heavy queries.
+	 *
+	 * When embeddingProvider is null or alpha is 0, the method returns FTS5 results
+	 * directly without embedding the query.
 	 *
 	 * @param query   - Raw search string (preprocessing handled by SearchService).
 	 * @param options - Search parameters including repositoryId and alpha blend.
@@ -119,7 +127,30 @@ export class HybridSearchService {
 				alpha = options.alpha ?? 0.5;
 		}

-		// Always run FTS5 — it is synchronous and fast.
+		// Semantic mode: skip FTS entirely and use vector search only.
+		if (mode === 'semantic') {
+			if (!this.embeddingProvider || !query.trim()) {
+				return [];
+			}
+
+			const embeddings = await this.embeddingProvider.embed([query]);
+			if (embeddings.length === 0) {
+				return [];
+			}
+
+			const queryEmbedding = embeddings[0].values;
+			const vectorResults = this.vectorSearch.vectorSearch(queryEmbedding, {
+				repositoryId: options.repositoryId,
+				versionId: options.versionId,
+				profileId: options.profileId,
+				limit
+			});
+
+			const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
+			return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
+		}
+
+		// FTS5 mode (keyword) or hybrid/auto modes: try FTS first.
 		const ftsResults = this.searchService.searchSnippets(query, {
 			repositoryId: options.repositoryId,
 			versionId: options.versionId,
@@ -132,10 +163,40 @@ export class HybridSearchService {
 			return ftsResults.slice(0, limit);
 		}

-		// Embed query and run vector search.
+		// For auto/hybrid modes: if FTS yielded results, use them; otherwise try vector.
+		// This handles punctuation-heavy queries that normalize to empty after preprocessing.
+		const hasFtsResults = ftsResults.length > 0;
+
+		if (!hasFtsResults) {
+			// No FTS results: try vector search as a fallback in auto/hybrid modes.
+			if (!query.trim()) {
+				// Query is empty; no point embedding it.
+				return [];
+			}
+
+			const embeddings = await this.embeddingProvider.embed([query]);
+
+			// If provider fails (Noop returns empty array), we're done.
+			if (embeddings.length === 0) {
+				return [];
+			}
+
+			const queryEmbedding = embeddings[0].values;
+			const vectorResults = this.vectorSearch.vectorSearch(queryEmbedding, {
+				repositoryId: options.repositoryId,
+				versionId: options.versionId,
+				profileId: options.profileId,
+				limit
+			});
+
+			const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
+			return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
+		}
+
+		// FTS has results: use RRF to blend with vector search (if alpha < 1).
 		const embeddings = await this.embeddingProvider.embed([query]);

-		// Provider may be a Noop (returns empty array) — fall back gracefully.
+		// Provider may be a Noop (returns empty array) — fall back to FTS gracefully.
 		if (embeddings.length === 0) {
 			return ftsResults.slice(0, limit);
 		}