/** * SearchService — FTS5-backed full-text search over snippets and repositories. * * Implements keyword search using SQLite's built-in BM25 ranking via the * `bm25()` function exposed by FTS5 virtual tables. Library search uses * LIKE-based matching on the `repositories` table with a composite relevance * score. */ import type Database from 'better-sqlite3'; import type { Repository, RepositoryVersion, Snippet } from '$lib/types'; import { preprocessQuery } from './query-preprocessor'; // --------------------------------------------------------------------------- // Public interface types // --------------------------------------------------------------------------- export interface SnippetSearchOptions { repositoryId: string; versionId?: string; type?: 'code' | 'info'; /** Number of results to return. Default: 20. */ limit?: number; /** Number of results to skip. Default: 0. */ offset?: number; } export interface SnippetSearchResult { snippet: Snippet; /** BM25 rank — negative value; lower (more negative) = more relevant. */ score: number; repository: Pick; } export interface LibrarySearchOptions { libraryName: string; /** Semantic relevance hint (reserved for future hybrid use). */ query?: string; /** Number of results to return. Default: 10. */ limit?: number; } export interface LibrarySearchResult { repository: Repository; versions: RepositoryVersion[]; /** Composite relevance score. Higher = more relevant. */ score: number; } // --------------------------------------------------------------------------- // Raw DB row types // --------------------------------------------------------------------------- /** Raw row returned by the snippet FTS query (snake_case column names). */ interface RawSnippetRow { id: string; document_id: string; repository_id: string; version_id: string | null; type: 'code' | 'info'; title: string | null; content: string; language: string | null; breadcrumb: string | null; token_count: number | null; created_at: number; repo_id: string; repo_title: string; score: number; } /** Raw row returned by the library search query. */ interface RawRepoRow { id: string; title: string; description: string | null; source: 'github' | 'local'; source_url: string; branch: string | null; state: 'pending' | 'indexing' | 'indexed' | 'error'; total_snippets: number | null; total_tokens: number | null; trust_score: number | null; benchmark_score: number | null; stars: number | null; github_token: string | null; last_indexed_at: number | null; created_at: number; updated_at: number; exact_match: number; prefix_match: number; desc_match: number; snippet_score: number; trust_component: number; } /** Raw row returned by the version query. */ interface RawVersionRow { id: string; repository_id: string; tag: string; title: string | null; state: 'pending' | 'indexing' | 'indexed' | 'error'; total_snippets: number | null; indexed_at: number | null; created_at: number; } // --------------------------------------------------------------------------- // Mappers: raw DB rows → domain types // --------------------------------------------------------------------------- function mapSnippet(row: RawSnippetRow): Snippet { return { id: row.id, documentId: row.document_id, repositoryId: row.repository_id, versionId: row.version_id, type: row.type, title: row.title, content: row.content, language: row.language, breadcrumb: row.breadcrumb, tokenCount: row.token_count, createdAt: new Date(row.created_at * 1000) }; } function mapRepository(row: RawRepoRow): Repository { return { id: row.id, title: row.title, description: row.description, source: row.source, sourceUrl: row.source_url, branch: row.branch, state: row.state, totalSnippets: row.total_snippets, totalTokens: row.total_tokens, trustScore: row.trust_score, benchmarkScore: row.benchmark_score, stars: row.stars, githubToken: row.github_token, lastIndexedAt: row.last_indexed_at ? new Date(row.last_indexed_at * 1000) : null, createdAt: new Date(row.created_at * 1000), updatedAt: new Date(row.updated_at * 1000) }; } function mapVersion(row: RawVersionRow): RepositoryVersion { return { id: row.id, repositoryId: row.repository_id, tag: row.tag, title: row.title, state: row.state, totalSnippets: row.total_snippets, indexedAt: row.indexed_at ? new Date(row.indexed_at * 1000) : null, createdAt: new Date(row.created_at * 1000) }; } // --------------------------------------------------------------------------- // SearchService // --------------------------------------------------------------------------- export class SearchService { constructor(private readonly db: Database.Database) {} // ------------------------------------------------------------------------- // searchSnippets // ------------------------------------------------------------------------- /** * Search snippets within a repository using FTS5 BM25 ranking. * * The query is preprocessed (whitespace normalization + prefix wildcard) * before being passed to the MATCH expression. Results are ordered by BM25 * score ascending (lower = more relevant). */ searchSnippets(query: string, options: SnippetSearchOptions): SnippetSearchResult[] { const { repositoryId, versionId, type, limit = 20, offset = 0 } = options; const processedQuery = preprocessQuery(query); if (!processedQuery) return []; // Build the WHERE clause dynamically based on optional filters. const conditions: string[] = [ 'snippets_fts MATCH ?', 's.repository_id = ?' ]; const params: unknown[] = [processedQuery, repositoryId]; if (versionId !== undefined) { conditions.push('s.version_id = ?'); params.push(versionId); } if (type !== undefined) { conditions.push('s.type = ?'); params.push(type); } params.push(limit, offset); const sql = ` SELECT s.id, s.document_id, s.repository_id, s.version_id, s.type, s.title, s.content, s.language, s.breadcrumb, s.token_count, s.created_at, r.id AS repo_id, r.title AS repo_title, bm25(snippets_fts) AS score FROM snippets_fts JOIN snippets s ON s.rowid = snippets_fts.rowid JOIN repositories r ON r.id = s.repository_id WHERE ${conditions.join(' AND ')} ORDER BY score ASC LIMIT ? OFFSET ? `; const rows = this.db.prepare(sql).all(...params) as RawSnippetRow[]; return rows.map((row) => ({ snippet: mapSnippet(row), score: row.score, repository: { id: row.repo_id, title: row.repo_title } })); } // ------------------------------------------------------------------------- // searchRepositories // ------------------------------------------------------------------------- /** * Search repositories by library name using LIKE-based matching. * * Applies a composite scoring model: * - Exact title match : 100 pts * - Prefix title match : 50 pts * - Description match : 20 pts * - Snippet density : total_snippets / 100 * - Trust score : trust_score * 10 */ searchRepositories(options: LibrarySearchOptions): LibrarySearchResult[] { const { libraryName, limit = 10 } = options; const rows = this.db .prepare( ` SELECT r.*, CASE WHEN LOWER(r.title) = LOWER(?) THEN 100 ELSE 0 END AS exact_match, CASE WHEN LOWER(r.title) LIKE LOWER(?) THEN 50 ELSE 0 END AS prefix_match, CASE WHEN LOWER(r.description) LIKE LOWER(?) THEN 20 ELSE 0 END AS desc_match, (COALESCE(r.total_snippets, 0) / 100.0) AS snippet_score, COALESCE(r.trust_score, 0) * 10 AS trust_component FROM repositories r WHERE r.state = 'indexed' AND ( LOWER(r.title) LIKE LOWER(?) OR LOWER(r.id) LIKE LOWER(?) OR LOWER(r.description) LIKE LOWER(?) ) ORDER BY (exact_match + prefix_match + desc_match + snippet_score + trust_component) DESC LIMIT ? ` ) .all( libraryName, // exact_match `${libraryName}%`, // prefix_match `%${libraryName}%`, // desc_match `%${libraryName}%`, // WHERE title LIKE `%${libraryName}%`, // WHERE id LIKE `%${libraryName}%`, // WHERE description LIKE limit ) as RawRepoRow[]; return rows.map((row) => { const repository = mapRepository(row); const compositeScore = row.exact_match + row.prefix_match + row.desc_match + row.snippet_score + row.trust_component; return { repository, versions: this.getVersions(row.id), score: compositeScore }; }); } // ------------------------------------------------------------------------- // Private helpers // ------------------------------------------------------------------------- private getVersions(repositoryId: string): RepositoryVersion[] { const rows = this.db .prepare( `SELECT * FROM repository_versions WHERE repository_id = ? ORDER BY created_at DESC` ) .all(repositoryId) as RawVersionRow[]; return rows.map(mapVersion); } }