/** * HybridSearchService — combines FTS5 keyword search with vector similarity * search using Reciprocal Rank Fusion (RRF) to produce a hybrid ranking. * * When no embedding provider is configured (or alpha = 0), the service * transparently falls back to FTS5-only mode with zero overhead. * * Configuration model: * alpha = 0.0 → FTS5 only * alpha = 0.5 → balanced hybrid (default) * alpha = 1.0 → vector only */ import type Database from 'better-sqlite3'; import type { EmbeddingProvider } from '../embeddings/provider.js'; import { SnippetSearchResult, SnippetRepositoryRef } from '$lib/server/models/search-result.js'; import { SnippetEntity } from '$lib/server/models/snippet.js'; import { SearchResultMapper } from '$lib/server/mappers/search-result.mapper.js'; import { SearchService } from './search.service.js'; import { VectorSearch } from './vector.search.js'; import { reciprocalRankFusion } from './rrf.js'; // --------------------------------------------------------------------------- // Public interfaces // --------------------------------------------------------------------------- export interface HybridSearchOptions { repositoryId: string; versionId?: string; type?: 'code' | 'info'; /** Maximum number of results to return. Default: 20. */ limit?: number; /** * Blend weight between FTS5 and vector search. * 0.0 = FTS5 only, 1.0 = vector only, 0.5 = balanced. * Default: 0.5. */ alpha?: number; } /** * Global search configuration stored in the settings table under * `search_config`. */ export interface SearchConfig { /** Blend weight (0.0–1.0). Default: 0.5. */ alpha: number; /** Maximum results per search. Default: 20. */ maxResults: number; /** True when an embedding provider is configured. */ enableHybrid: boolean; } // --------------------------------------------------------------------------- // Raw DB row used when re-fetching snippets by ID // --------------------------------------------------------------------------- interface RawSnippetById extends SnippetEntity { repo_id: string; repo_title: string; } // --------------------------------------------------------------------------- // HybridSearchService // --------------------------------------------------------------------------- export class HybridSearchService { private readonly vectorSearch: VectorSearch; constructor( private readonly db: Database.Database, private readonly searchService: SearchService, private readonly embeddingProvider: EmbeddingProvider | null ) { this.vectorSearch = new VectorSearch(db); } /** * Execute a hybrid search combining FTS5 and (optionally) vector search. * * When `embeddingProvider` is null or `alpha` is 0, the method returns * FTS5 results directly without embedding the query. * * @param query - Raw search string (preprocessing handled by SearchService). * @param options - Search parameters including repositoryId and alpha blend. * @returns Ranked array of SnippetSearchResult, deduplicated by snippet ID. */ async search( query: string, options: HybridSearchOptions ): Promise { const limit = options.limit ?? 20; const alpha = options.alpha ?? 0.5; // Always run FTS5 — it is synchronous and fast. const ftsResults = this.searchService.searchSnippets(query, { repositoryId: options.repositoryId, versionId: options.versionId, type: options.type, limit: limit * 3 // wider candidate pool for fusion }); // Degenerate cases: no provider or pure FTS5 mode. if (!this.embeddingProvider || alpha === 0) { return ftsResults.slice(0, limit); } // Embed query and run vector search. const embeddings = await this.embeddingProvider.embed([query]); // Provider may be a Noop (returns empty array) — fall back gracefully. if (embeddings.length === 0) { return ftsResults.slice(0, limit); } const queryEmbedding = embeddings[0].values; const vectorResults = this.vectorSearch.vectorSearch( queryEmbedding, options.repositoryId, limit * 3 ); // Pure vector mode: skip RRF and return vector results directly. if (alpha === 1) { const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId); return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type); } // Build ranked lists for RRF. Score field is unused by RRF — only // the array index (rank) matters. const ftsRanked = ftsResults.map((r, i) => ({ id: r.snippet.id, score: i })); const vecRanked = vectorResults.map((r, i) => ({ id: r.snippetId, score: i })); const fused = reciprocalRankFusion(ftsRanked, vecRanked); const topIds = fused.slice(0, limit).map((r) => r.id); return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type); } // ------------------------------------------------------------------------- // Private helpers // ------------------------------------------------------------------------- /** * Load full snippet + repository data for the given ordered snippet IDs. * * Results are returned in the same order as `ids` so callers receive the * RRF-ranked list intact. Snippets not found in the database (or filtered * out by optional type constraint) are silently omitted. */ private fetchSnippetsByIds( ids: string[], repositoryId: string, type?: 'code' | 'info' ): SnippetSearchResult[] { if (ids.length === 0) return []; const placeholders = ids.map(() => '?').join(', '); const params: unknown[] = [...ids, repositoryId]; let typeClause = ''; if (type !== undefined) { typeClause = ' AND s.type = ?'; params.push(type); } const rows = this.db .prepare( `SELECT s.id, s.document_id, s.repository_id, s.version_id, s.type, s.title, s.content, s.language, s.breadcrumb, s.token_count, s.created_at, r.id AS repo_id, r.title AS repo_title FROM snippets s JOIN repositories r ON r.id = s.repository_id WHERE s.id IN (${placeholders}) AND s.repository_id = ?${typeClause}` ) .all(...params) as RawSnippetById[]; // Build a map for O(1) lookup, then reconstruct in rank order. const byId = new Map(); for (const row of rows) { byId.set(row.id, row); } const results: SnippetSearchResult[] = []; for (const id of ids) { const row = byId.get(id); if (!row) continue; results.push( new SnippetSearchResult({ snippet: SearchResultMapper.snippetFromEntity( new SnippetEntity(row), new SnippetRepositoryRef({ id: row.repo_id, title: row.repo_title }), 0 ).snippet, score: 0, repository: new SnippetRepositoryRef({ id: row.repo_id, title: row.repo_title }) }) ); } return results; } }