Files
trueref-legacy/src/lib/server/search/hybrid.search.service.ts
Giancarmine Salucci 215cadf070 refactor: introduce domain model classes and mapper layer
Replace ad-hoc inline row casting (snake_case → camelCase) spread across
services, routes, and the indexing pipeline with explicit model classes
(Repository, IndexingJob, RepositoryVersion, Snippet, SearchResult) and
dedicated mapper classes that own the DB → domain conversion.

- Add src/lib/server/models/ with typed model classes for all domain entities
- Add src/lib/server/mappers/ with mapper classes per entity
- Remove duplicated RawRow interfaces and inline map functions from
  job-queue, repository.service, indexing.pipeline, and all API routes
- Add dtoJsonResponse helper to standardise JSON responses via SvelteKit json()
- Add api-contract.integration.test.ts as a regression baseline

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-25 14:29:49 +01:00

209 lines
6.6 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* HybridSearchService — combines FTS5 keyword search with vector similarity
* search using Reciprocal Rank Fusion (RRF) to produce a hybrid ranking.
*
* When no embedding provider is configured (or alpha = 0), the service
* transparently falls back to FTS5-only mode with zero overhead.
*
* Configuration model:
* alpha = 0.0 → FTS5 only
* alpha = 0.5 → balanced hybrid (default)
* alpha = 1.0 → vector only
*/
import type Database from 'better-sqlite3';
import type { EmbeddingProvider } from '../embeddings/provider.js';
import { SnippetSearchResult, SnippetRepositoryRef } from '$lib/server/models/search-result.js';
import { SnippetEntity } from '$lib/server/models/snippet.js';
import { SearchResultMapper } from '$lib/server/mappers/search-result.mapper.js';
import { SearchService } from './search.service.js';
import { VectorSearch } from './vector.search.js';
import { reciprocalRankFusion } from './rrf.js';
// ---------------------------------------------------------------------------
// Public interfaces
// ---------------------------------------------------------------------------
export interface HybridSearchOptions {
repositoryId: string;
versionId?: string;
type?: 'code' | 'info';
/** Maximum number of results to return. Default: 20. */
limit?: number;
/**
* Blend weight between FTS5 and vector search.
* 0.0 = FTS5 only, 1.0 = vector only, 0.5 = balanced.
* Default: 0.5.
*/
alpha?: number;
}
/**
* Global search configuration stored in the settings table under
* `search_config`.
*/
export interface SearchConfig {
/** Blend weight (0.01.0). Default: 0.5. */
alpha: number;
/** Maximum results per search. Default: 20. */
maxResults: number;
/** True when an embedding provider is configured. */
enableHybrid: boolean;
}
// ---------------------------------------------------------------------------
// Raw DB row used when re-fetching snippets by ID
// ---------------------------------------------------------------------------
interface RawSnippetById extends SnippetEntity {
repo_id: string;
repo_title: string;
}
// ---------------------------------------------------------------------------
// HybridSearchService
// ---------------------------------------------------------------------------
export class HybridSearchService {
private readonly vectorSearch: VectorSearch;
constructor(
private readonly db: Database.Database,
private readonly searchService: SearchService,
private readonly embeddingProvider: EmbeddingProvider | null
) {
this.vectorSearch = new VectorSearch(db);
}
/**
* Execute a hybrid search combining FTS5 and (optionally) vector search.
*
* When `embeddingProvider` is null or `alpha` is 0, the method returns
* FTS5 results directly without embedding the query.
*
* @param query - Raw search string (preprocessing handled by SearchService).
* @param options - Search parameters including repositoryId and alpha blend.
* @returns Ranked array of SnippetSearchResult, deduplicated by snippet ID.
*/
async search(
query: string,
options: HybridSearchOptions
): Promise<SnippetSearchResult[]> {
const limit = options.limit ?? 20;
const alpha = options.alpha ?? 0.5;
// Always run FTS5 — it is synchronous and fast.
const ftsResults = this.searchService.searchSnippets(query, {
repositoryId: options.repositoryId,
versionId: options.versionId,
type: options.type,
limit: limit * 3 // wider candidate pool for fusion
});
// Degenerate cases: no provider or pure FTS5 mode.
if (!this.embeddingProvider || alpha === 0) {
return ftsResults.slice(0, limit);
}
// Embed query and run vector search.
const embeddings = await this.embeddingProvider.embed([query]);
// Provider may be a Noop (returns empty array) — fall back gracefully.
if (embeddings.length === 0) {
return ftsResults.slice(0, limit);
}
const queryEmbedding = embeddings[0].values;
const vectorResults = this.vectorSearch.vectorSearch(
queryEmbedding,
options.repositoryId,
limit * 3
);
// Pure vector mode: skip RRF and return vector results directly.
if (alpha === 1) {
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
}
// Build ranked lists for RRF. Score field is unused by RRF — only
// the array index (rank) matters.
const ftsRanked = ftsResults.map((r, i) => ({ id: r.snippet.id, score: i }));
const vecRanked = vectorResults.map((r, i) => ({ id: r.snippetId, score: i }));
const fused = reciprocalRankFusion(ftsRanked, vecRanked);
const topIds = fused.slice(0, limit).map((r) => r.id);
return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
}
// -------------------------------------------------------------------------
// Private helpers
// -------------------------------------------------------------------------
/**
* Load full snippet + repository data for the given ordered snippet IDs.
*
* Results are returned in the same order as `ids` so callers receive the
* RRF-ranked list intact. Snippets not found in the database (or filtered
* out by optional type constraint) are silently omitted.
*/
private fetchSnippetsByIds(
ids: string[],
repositoryId: string,
type?: 'code' | 'info'
): SnippetSearchResult[] {
if (ids.length === 0) return [];
const placeholders = ids.map(() => '?').join(', ');
const params: unknown[] = [...ids, repositoryId];
let typeClause = '';
if (type !== undefined) {
typeClause = ' AND s.type = ?';
params.push(type);
}
const rows = this.db
.prepare<unknown[], RawSnippetById>(
`SELECT
s.id, s.document_id, s.repository_id, s.version_id, s.type,
s.title, s.content, s.language, s.breadcrumb, s.token_count,
s.created_at,
r.id AS repo_id,
r.title AS repo_title
FROM snippets s
JOIN repositories r ON r.id = s.repository_id
WHERE s.id IN (${placeholders})
AND s.repository_id = ?${typeClause}`
)
.all(...params) as RawSnippetById[];
// Build a map for O(1) lookup, then reconstruct in rank order.
const byId = new Map<string, RawSnippetById>();
for (const row of rows) {
byId.set(row.id, row);
}
const results: SnippetSearchResult[] = [];
for (const id of ids) {
const row = byId.get(id);
if (!row) continue;
results.push(
new SnippetSearchResult({
snippet: SearchResultMapper.snippetFromEntity(
new SnippetEntity(row),
new SnippetRepositoryRef({ id: row.repo_id, title: row.repo_title }),
0
).snippet,
score: 0,
repository: new SnippetRepositoryRef({ id: row.repo_id, title: row.repo_title })
})
);
}
return results;
}
}