feat(TRUEREF-0006): implement SQLite FTS5 full-text search engine

- BM25 ranking via SQLite FTS5 bm25() function
- Query preprocessor with wildcard expansion and special char escaping
- Library search with composite scoring (name match, trust score, snippet count)
- Trust score computation from stars, coverage, and source type
- Response formatters for library and snippet results

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Giancarmine Salucci
2026-03-23 09:06:18 +01:00
parent f6be3cfd47
commit 33bdf30709
5 changed files with 1227 additions and 0 deletions

View File

@@ -0,0 +1,34 @@
/**
* Query preprocessor for FTS5 search queries.
*
* Normalizes raw user input into an FTS5-compatible MATCH expression
* with prefix wildcard expansion on the last token.
*/
/**
* Preprocess a raw search query string for FTS5 MATCH.
*
* Steps:
* 1. Trim and normalize internal whitespace.
* 2. Strip FTS5 grouping characters `(` and `)` that would cause parse errors.
* 3. Append a prefix wildcard `*` to the last token when it is >= 3 characters
* and does not already end with `*`. This gives a "typing as you go" feel.
*/
export function preprocessQuery(raw: string): string {
// 1. Trim and collapse whitespace.
let q = raw.trim().replace(/\s+/g, ' ');
// 2. Remove parentheses (not valid in simple FTS5 queries without explicit operators).
q = q.replace(/[()]/g, ' ').replace(/\s+/g, ' ').trim();
if (!q) return q;
// 3. Add prefix wildcard to the last token.
const tokens = q.split(' ');
const lastToken = tokens.at(-1) ?? '';
if (lastToken.length >= 3 && !lastToken.endsWith('*')) {
tokens[tokens.length - 1] = lastToken + '*';
}
return tokens.join(' ');
}