fix(FEEDBACK-0001): complete iteration 0 - harden context search
This commit is contained in:
@@ -16,6 +16,7 @@ import { LibrarySearchResult, SnippetSearchResult } from '$lib/server/models/sea
|
||||
import { Repository } from '$lib/server/models/repository';
|
||||
import { RepositoryVersion } from '$lib/server/models/repository-version';
|
||||
import { Snippet } from '$lib/server/models/snippet';
|
||||
import type { ContextResponseMetadata } from '$lib/server/mappers/context-response.mapper';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
@@ -82,6 +83,25 @@ function makeSnippetResult(snippet: Snippet): SnippetSearchResult {
|
||||
});
|
||||
}
|
||||
|
||||
function makeMetadata(
|
||||
overrides: Partial<ContextResponseMetadata> = {}
|
||||
): ContextResponseMetadata {
|
||||
return {
|
||||
localSource: false,
|
||||
resultCount: 1,
|
||||
repository: {
|
||||
id: '/facebook/react',
|
||||
title: 'React',
|
||||
source: 'github',
|
||||
sourceUrl: 'https://github.com/facebook/react',
|
||||
branch: 'main'
|
||||
},
|
||||
version: null,
|
||||
snippetVersions: {},
|
||||
...overrides
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// mapState
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -220,6 +240,46 @@ describe('formatContextJson', () => {
|
||||
const response = formatContextJson(snippets, []);
|
||||
expect(response.totalTokens).toBe(0);
|
||||
});
|
||||
|
||||
it('adds repository, version, resultCount, and origin metadata additively', () => {
|
||||
const snippet = makeSnippet({ versionId: '/facebook/react/v18.3.0' });
|
||||
const response = formatContextJson(
|
||||
[makeSnippetResult(snippet)],
|
||||
[],
|
||||
makeMetadata({
|
||||
resultCount: 1,
|
||||
version: {
|
||||
requested: 'v18.3.0',
|
||||
resolved: 'v18.3.0',
|
||||
id: '/facebook/react/v18.3.0'
|
||||
},
|
||||
snippetVersions: {
|
||||
'/facebook/react/v18.3.0': 'v18.3.0'
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
expect(response.localSource).toBe(false);
|
||||
expect(response.resultCount).toBe(1);
|
||||
expect(response.repository?.id).toBe('/facebook/react');
|
||||
expect(response.repository?.sourceUrl).toBe('https://github.com/facebook/react');
|
||||
expect(response.version).toEqual({
|
||||
requested: 'v18.3.0',
|
||||
resolved: 'v18.3.0',
|
||||
id: '/facebook/react/v18.3.0'
|
||||
});
|
||||
|
||||
const resultSnippet = response.snippets[0];
|
||||
expect(resultSnippet.origin).toEqual({
|
||||
repositoryId: '/facebook/react',
|
||||
repositoryTitle: 'React',
|
||||
source: 'github',
|
||||
sourceUrl: 'https://github.com/facebook/react',
|
||||
version: 'v18.3.0',
|
||||
versionId: '/facebook/react/v18.3.0',
|
||||
isLocal: false
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -272,8 +332,40 @@ describe('formatContextTxt', () => {
|
||||
expect(txt).toContain('---');
|
||||
});
|
||||
|
||||
it('returns empty string for empty inputs with no rules', () => {
|
||||
const txt = formatContextTxt([], []);
|
||||
expect(txt).toBe('');
|
||||
it('includes origin lines when metadata is provided', () => {
|
||||
const snippet = makeSnippet({ versionId: '/facebook/react/v18.3.0' });
|
||||
const txt = formatContextTxt(
|
||||
[makeSnippetResult(snippet)],
|
||||
[],
|
||||
makeMetadata({
|
||||
snippetVersions: {
|
||||
'/facebook/react/v18.3.0': 'v18.3.0'
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
expect(txt).toContain('Origin: React (/facebook/react) | github | version v18.3.0');
|
||||
});
|
||||
|
||||
it('returns a readable no-results section for empty inputs', () => {
|
||||
const txt = formatContextTxt(
|
||||
[],
|
||||
[],
|
||||
makeMetadata({
|
||||
resultCount: 0,
|
||||
version: {
|
||||
requested: 'v18.3.0',
|
||||
resolved: 'v18.3.0',
|
||||
id: '/facebook/react/v18.3.0'
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
expect(txt).toContain('## Context Results');
|
||||
expect(txt).toContain('No matching snippets found');
|
||||
expect(txt).toContain('Repository: React (/facebook/react)');
|
||||
expect(txt).toContain('Requested version: v18.3.0');
|
||||
expect(txt).toContain('Resolved version: v18.3.0');
|
||||
expect(txt).toContain('Result count: 0');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
*/
|
||||
|
||||
import { ContextResponseMapper } from '$lib/server/mappers/context-response.mapper.js';
|
||||
import type { ContextResponseMetadata } from '$lib/server/mappers/context-response.mapper.js';
|
||||
import { LibrarySearchResult, SnippetSearchResult } from '$lib/server/models/search-result.js';
|
||||
import {
|
||||
ContextJsonResponseDto,
|
||||
@@ -77,9 +78,10 @@ export function formatLibrarySearchJson(results: LibrarySearchResult[]): Library
|
||||
*/
|
||||
export function formatContextJson(
|
||||
snippets: SnippetSearchResult[],
|
||||
rules: string[]
|
||||
rules: string[],
|
||||
metadata?: ContextResponseMetadata
|
||||
): ContextJsonResponseDto {
|
||||
return ContextResponseMapper.toContextJson(snippets, rules);
|
||||
return ContextResponseMapper.toContextJson(snippets, rules, metadata);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -92,7 +94,27 @@ export function formatContextJson(
|
||||
* @param snippets - Ranked snippet search results (already token-budget trimmed).
|
||||
* @param rules - Rules from `trueref.json` / `repository_configs`.
|
||||
*/
|
||||
export function formatContextTxt(snippets: SnippetSearchResult[], rules: string[]): string {
|
||||
function formatOriginLine(result: SnippetSearchResult, metadata?: ContextResponseMetadata): string | null {
|
||||
if (!metadata?.repository) return null;
|
||||
|
||||
const parts = [
|
||||
`Origin: ${metadata.repository.title} (${result.snippet.repositoryId})`,
|
||||
metadata.localSource ? 'local' : metadata.repository.source
|
||||
];
|
||||
|
||||
if (result.snippet.versionId) {
|
||||
const versionTag = metadata.snippetVersions[result.snippet.versionId];
|
||||
parts.push(`version ${versionTag ?? result.snippet.versionId}`);
|
||||
}
|
||||
|
||||
return parts.join(' | ');
|
||||
}
|
||||
|
||||
export function formatContextTxt(
|
||||
snippets: SnippetSearchResult[],
|
||||
rules: string[],
|
||||
metadata?: ContextResponseMetadata
|
||||
): string {
|
||||
const parts: string[] = [];
|
||||
|
||||
if (rules.length > 0) {
|
||||
@@ -100,16 +122,41 @@ export function formatContextTxt(snippets: SnippetSearchResult[], rules: string[
|
||||
parts.push('---');
|
||||
}
|
||||
|
||||
for (const { snippet } of snippets) {
|
||||
if (snippets.length === 0) {
|
||||
const noResults = ['## Context Results', '_No matching snippets found for this request._'];
|
||||
|
||||
if (metadata?.repository) {
|
||||
noResults.push(`Repository: ${metadata.repository.title} (${metadata.repository.id})`);
|
||||
}
|
||||
|
||||
if (metadata?.version?.requested) {
|
||||
noResults.push(`Requested version: ${metadata.version.requested}`);
|
||||
}
|
||||
|
||||
if (metadata?.version?.resolved) {
|
||||
noResults.push(`Resolved version: ${metadata.version.resolved}`);
|
||||
}
|
||||
|
||||
noResults.push(`Result count: ${metadata?.resultCount ?? 0}`);
|
||||
parts.push(noResults.join('\n'));
|
||||
|
||||
return parts.join('\n\n');
|
||||
}
|
||||
|
||||
for (const result of snippets) {
|
||||
const { snippet } = result;
|
||||
const section: string[] = [];
|
||||
const originLine = formatOriginLine(result, metadata);
|
||||
|
||||
if (snippet.type === 'code') {
|
||||
if (snippet.title) section.push(`### ${snippet.title}`);
|
||||
if (snippet.breadcrumb) section.push(`*${snippet.breadcrumb}*`);
|
||||
if (originLine) section.push(originLine);
|
||||
section.push(`\`\`\`${snippet.language ?? ''}\n${snippet.content}\n\`\`\``);
|
||||
} else {
|
||||
if (snippet.title) section.push(`### ${snippet.title}`);
|
||||
if (snippet.breadcrumb) section.push(`*${snippet.breadcrumb}*`);
|
||||
if (originLine) section.push(originLine);
|
||||
section.push(snippet.content);
|
||||
}
|
||||
|
||||
|
||||
@@ -39,10 +39,9 @@ describe('selectSnippetsWithinBudget', () => {
|
||||
|
||||
it('stops adding when next snippet exceeds the budget', () => {
|
||||
const snippets = [makeSnippet('a', 100), makeSnippet('b', 500), makeSnippet('c', 200)];
|
||||
// budget = 550 → a (100) + b (500) = 600 exceeds; only a fits then b would push over
|
||||
// budget = 550 → a fits, b is skipped, c still fits
|
||||
const result = selectSnippetsWithinBudget(snippets, 550);
|
||||
// a(100) fits; a+b=600 > 550, stop
|
||||
expect(result.map((s) => s.id)).toEqual(['a']);
|
||||
expect(result.map((s) => s.id)).toEqual(['a', 'c']);
|
||||
});
|
||||
|
||||
it('includes exactly one snippet when it fits the budget precisely', () => {
|
||||
@@ -51,10 +50,10 @@ describe('selectSnippetsWithinBudget', () => {
|
||||
expect(result.map((s) => s.id)).toEqual(['a']);
|
||||
});
|
||||
|
||||
it('returns empty array when first snippet already exceeds budget', () => {
|
||||
it('skips an oversized first snippet and keeps scanning later ones', () => {
|
||||
const snippets = [makeSnippet('a', 200), makeSnippet('b', 50)];
|
||||
const result = selectSnippetsWithinBudget(snippets, 100);
|
||||
expect(result).toHaveLength(0);
|
||||
expect(result.map((s) => s.id)).toEqual(['b']);
|
||||
});
|
||||
|
||||
it('treats null tokenCount as 0', () => {
|
||||
|
||||
@@ -11,8 +11,8 @@ import type { Snippet } from '$lib/types';
|
||||
* Select snippets from a ranked list up to a maximum token budget.
|
||||
*
|
||||
* Snippets are evaluated in order. A snippet is included when its token count
|
||||
* does not push the running total past `maxTokens`. The loop halts at the
|
||||
* first snippet that would exceed the budget.
|
||||
* does not push the running total past `maxTokens`. Oversized snippets are
|
||||
* skipped so lower-ranked results can still be considered.
|
||||
*
|
||||
* @param snippets - Ranked list of snippets (best first).
|
||||
* @param maxTokens - Inclusive upper bound on total token count.
|
||||
@@ -24,7 +24,7 @@ export function selectSnippetsWithinBudget(snippets: Snippet[], maxTokens: numbe
|
||||
let usedTokens = 0;
|
||||
|
||||
for (const snippet of snippets) {
|
||||
if (usedTokens + (snippet.tokenCount ?? 0) > maxTokens) break;
|
||||
if (usedTokens + (snippet.tokenCount ?? 0) > maxTokens) continue;
|
||||
selected.push(snippet);
|
||||
usedTokens += snippet.tokenCount ?? 0;
|
||||
}
|
||||
|
||||
@@ -1,14 +1,35 @@
|
||||
import {
|
||||
CodeListItemDto,
|
||||
CodeSnippetJsonDto,
|
||||
ContextRepositoryJsonDto,
|
||||
ContextJsonResponseDto,
|
||||
ContextVersionJsonDto,
|
||||
InfoSnippetJsonDto,
|
||||
LibrarySearchJsonResponseDto,
|
||||
LibrarySearchJsonResultDto,
|
||||
SnippetOriginJsonDto,
|
||||
type SnippetJsonDto
|
||||
} from '$lib/server/models/context-response.js';
|
||||
import { LibrarySearchResult, SnippetSearchResult } from '$lib/server/models/search-result.js';
|
||||
|
||||
export interface ContextResponseMetadata {
|
||||
localSource: boolean;
|
||||
resultCount: number;
|
||||
repository: {
|
||||
id: string;
|
||||
title: string;
|
||||
source: 'github' | 'local';
|
||||
sourceUrl: string;
|
||||
branch: string | null;
|
||||
} | null;
|
||||
version: {
|
||||
requested: string | null;
|
||||
resolved: string | null;
|
||||
id: string | null;
|
||||
} | null;
|
||||
snippetVersions: Record<string, string>;
|
||||
}
|
||||
|
||||
export class ContextResponseMapper {
|
||||
static toLibrarySearchJson(results: LibrarySearchResult[]): LibrarySearchJsonResponseDto {
|
||||
return new LibrarySearchJsonResponseDto(
|
||||
@@ -35,8 +56,24 @@ export class ContextResponseMapper {
|
||||
);
|
||||
}
|
||||
|
||||
static toContextJson(snippets: SnippetSearchResult[], rules: string[]): ContextJsonResponseDto {
|
||||
static toContextJson(
|
||||
snippets: SnippetSearchResult[],
|
||||
rules: string[],
|
||||
metadata?: ContextResponseMetadata
|
||||
): ContextJsonResponseDto {
|
||||
const mapped: SnippetJsonDto[] = snippets.map(({ snippet }) => {
|
||||
const origin = metadata?.repository
|
||||
? new SnippetOriginJsonDto({
|
||||
repositoryId: snippet.repositoryId,
|
||||
repositoryTitle: metadata.repository.title,
|
||||
source: metadata.repository.source,
|
||||
sourceUrl: metadata.repository.sourceUrl,
|
||||
version: snippet.versionId ? metadata.snippetVersions[snippet.versionId] ?? null : null,
|
||||
versionId: snippet.versionId,
|
||||
isLocal: metadata.localSource
|
||||
})
|
||||
: null;
|
||||
|
||||
if (snippet.type === 'code') {
|
||||
return new CodeSnippetJsonDto({
|
||||
title: snippet.title ?? null,
|
||||
@@ -50,7 +87,8 @@ export class ContextResponseMapper {
|
||||
],
|
||||
id: snippet.id,
|
||||
tokenCount: snippet.tokenCount ?? null,
|
||||
pageTitle: snippet.breadcrumb ? snippet.breadcrumb.split('>')[0].trim() || null : null
|
||||
pageTitle: snippet.breadcrumb ? snippet.breadcrumb.split('>')[0].trim() || null : null,
|
||||
origin
|
||||
});
|
||||
}
|
||||
|
||||
@@ -58,14 +96,34 @@ export class ContextResponseMapper {
|
||||
text: snippet.content,
|
||||
breadcrumb: snippet.breadcrumb ?? null,
|
||||
pageId: snippet.id,
|
||||
tokenCount: snippet.tokenCount ?? null
|
||||
tokenCount: snippet.tokenCount ?? null,
|
||||
origin
|
||||
});
|
||||
});
|
||||
|
||||
return new ContextJsonResponseDto({
|
||||
snippets: mapped,
|
||||
rules,
|
||||
totalTokens: snippets.reduce((sum, result) => sum + (result.snippet.tokenCount ?? 0), 0)
|
||||
totalTokens: snippets.reduce((sum, result) => sum + (result.snippet.tokenCount ?? 0), 0),
|
||||
localSource: metadata?.localSource ?? false,
|
||||
repository: metadata?.repository
|
||||
? new ContextRepositoryJsonDto({
|
||||
id: metadata.repository.id,
|
||||
title: metadata.repository.title,
|
||||
source: metadata.repository.source,
|
||||
sourceUrl: metadata.repository.sourceUrl,
|
||||
branch: metadata.repository.branch,
|
||||
isLocal: metadata.localSource
|
||||
})
|
||||
: null,
|
||||
version: metadata?.version
|
||||
? new ContextVersionJsonDto({
|
||||
requested: metadata.version.requested,
|
||||
resolved: metadata.version.resolved,
|
||||
id: metadata.version.id
|
||||
})
|
||||
: null,
|
||||
resultCount: metadata?.resultCount ?? snippets.length
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,78 @@
|
||||
export interface ContextRepositoryJsonDtoProps {
|
||||
id: string;
|
||||
title: string;
|
||||
source: 'github' | 'local';
|
||||
sourceUrl: string;
|
||||
branch: string | null;
|
||||
isLocal: boolean;
|
||||
}
|
||||
|
||||
export class ContextRepositoryJsonDto {
|
||||
id: string;
|
||||
title: string;
|
||||
source: 'github' | 'local';
|
||||
sourceUrl: string;
|
||||
branch: string | null;
|
||||
isLocal: boolean;
|
||||
|
||||
constructor(props: ContextRepositoryJsonDtoProps) {
|
||||
this.id = props.id;
|
||||
this.title = props.title;
|
||||
this.source = props.source;
|
||||
this.sourceUrl = props.sourceUrl;
|
||||
this.branch = props.branch;
|
||||
this.isLocal = props.isLocal;
|
||||
}
|
||||
}
|
||||
|
||||
export interface ContextVersionJsonDtoProps {
|
||||
requested: string | null;
|
||||
resolved: string | null;
|
||||
id: string | null;
|
||||
}
|
||||
|
||||
export class ContextVersionJsonDto {
|
||||
requested: string | null;
|
||||
resolved: string | null;
|
||||
id: string | null;
|
||||
|
||||
constructor(props: ContextVersionJsonDtoProps) {
|
||||
this.requested = props.requested;
|
||||
this.resolved = props.resolved;
|
||||
this.id = props.id;
|
||||
}
|
||||
}
|
||||
|
||||
export interface SnippetOriginJsonDtoProps {
|
||||
repositoryId: string;
|
||||
repositoryTitle: string;
|
||||
source: 'github' | 'local';
|
||||
sourceUrl: string;
|
||||
version: string | null;
|
||||
versionId: string | null;
|
||||
isLocal: boolean;
|
||||
}
|
||||
|
||||
export class SnippetOriginJsonDto {
|
||||
repositoryId: string;
|
||||
repositoryTitle: string;
|
||||
source: 'github' | 'local';
|
||||
sourceUrl: string;
|
||||
version: string | null;
|
||||
versionId: string | null;
|
||||
isLocal: boolean;
|
||||
|
||||
constructor(props: SnippetOriginJsonDtoProps) {
|
||||
this.repositoryId = props.repositoryId;
|
||||
this.repositoryTitle = props.repositoryTitle;
|
||||
this.source = props.source;
|
||||
this.sourceUrl = props.sourceUrl;
|
||||
this.version = props.version;
|
||||
this.versionId = props.versionId;
|
||||
this.isLocal = props.isLocal;
|
||||
}
|
||||
}
|
||||
|
||||
export class LibrarySearchJsonResultDto {
|
||||
id: string;
|
||||
title: string;
|
||||
@@ -57,6 +132,7 @@ export class CodeSnippetJsonDto {
|
||||
id: string;
|
||||
tokenCount: number | null;
|
||||
pageTitle: string | null;
|
||||
origin: SnippetOriginJsonDto | null;
|
||||
|
||||
constructor(props: Omit<CodeSnippetJsonDto, 'type'>) {
|
||||
this.title = props.title;
|
||||
@@ -66,6 +142,7 @@ export class CodeSnippetJsonDto {
|
||||
this.id = props.id;
|
||||
this.tokenCount = props.tokenCount;
|
||||
this.pageTitle = props.pageTitle;
|
||||
this.origin = props.origin;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,12 +152,14 @@ export class InfoSnippetJsonDto {
|
||||
breadcrumb: string | null;
|
||||
pageId: string;
|
||||
tokenCount: number | null;
|
||||
origin: SnippetOriginJsonDto | null;
|
||||
|
||||
constructor(props: Omit<InfoSnippetJsonDto, 'type'>) {
|
||||
this.text = props.text;
|
||||
this.breadcrumb = props.breadcrumb;
|
||||
this.pageId = props.pageId;
|
||||
this.tokenCount = props.tokenCount;
|
||||
this.origin = props.origin;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,10 +169,18 @@ export class ContextJsonResponseDto {
|
||||
snippets: SnippetJsonDto[];
|
||||
rules: string[];
|
||||
totalTokens: number;
|
||||
localSource: boolean;
|
||||
repository: ContextRepositoryJsonDto | null;
|
||||
version: ContextVersionJsonDto | null;
|
||||
resultCount: number;
|
||||
|
||||
constructor(props: ContextJsonResponseDto) {
|
||||
this.snippets = props.snippets;
|
||||
this.rules = props.rules;
|
||||
this.totalTokens = props.totalTokens;
|
||||
this.localSource = props.localSource;
|
||||
this.repository = props.repository;
|
||||
this.version = props.version;
|
||||
this.resultCount = props.resultCount;
|
||||
}
|
||||
}
|
||||
@@ -818,4 +818,246 @@ describe('HybridSearchService', () => {
|
||||
// Should return results (alpha=1 pure vector mode)
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Semantic-only mode (searchMode=semantic)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('searchMode=semantic returns empty array when provider is null', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'semantic null provider test'
|
||||
});
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, null);
|
||||
|
||||
const results = await hybridService.search('test query', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'semantic'
|
||||
});
|
||||
|
||||
// No provider: semantic mode should return empty.
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('searchMode=semantic returns empty array for blank query', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
const mockProvider = makeMockProvider([[1, 0, 0, 0]]);
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
const results = await hybridService.search(' ', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'semantic'
|
||||
});
|
||||
|
||||
// Blank query: should return empty.
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('searchMode=semantic falls back to empty when provider fails', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
const noopProvider = makeNoopProvider();
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, noopProvider);
|
||||
|
||||
const results = await hybridService.search('test query', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'semantic'
|
||||
});
|
||||
|
||||
// Provider fails: should return empty (not fall back to FTS).
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Fallback behavior in auto/hybrid modes
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
it('searchMode=auto falls back to vector when FTS has no results and provider available', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
// Create profile
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run('test-profile', 'local-transformers', 'Test', 1, 1, 'test-model', 4, '{}', NOW_S, NOW_S);
|
||||
|
||||
// Seed a snippet that won't match punctuation-heavy query through FTS.
|
||||
const snippetId = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'example content'
|
||||
});
|
||||
|
||||
// Seed embedding for the snippet.
|
||||
const embed = [0.5, 0.5, 0.5, 0.5];
|
||||
const f32 = new Float32Array(embed);
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(snippetId, 'test-profile', 'test-model', 4, Buffer.from(f32.buffer), NOW_S);
|
||||
|
||||
// Mock provider that always returns a matching embedding.
|
||||
const mockProvider: EmbeddingProvider = {
|
||||
name: 'mock',
|
||||
dimensions: 4,
|
||||
model: 'test-model',
|
||||
async embed() {
|
||||
return [
|
||||
{
|
||||
values: new Float32Array([0.5, 0.5, 0.5, 0.5]),
|
||||
dimensions: 4,
|
||||
model: 'test-model'
|
||||
}
|
||||
];
|
||||
},
|
||||
async isAvailable() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
// Query with heavy punctuation that preprocesses to nothing.
|
||||
const results = await hybridService.search('!!!@@@###', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'auto',
|
||||
profileId: 'test-profile'
|
||||
});
|
||||
|
||||
// Should have fallen back to vector search and found the snippet.
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
expect(results[0].snippet.id).toBe(snippetId);
|
||||
});
|
||||
|
||||
it('searchMode=auto continues with FTS results when available', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
// Seed FTS-matchable snippet.
|
||||
seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'hello world example'
|
||||
});
|
||||
|
||||
const mockProvider = makeMockProvider([[1, 0]]);
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
const results = await hybridService.search('hello', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'auto'
|
||||
});
|
||||
|
||||
// Should find results through FTS (not fallback to vector).
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('searchMode=hybrid falls back to vector on no FTS results', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
// Create profile
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO embedding_profiles (id, provider_kind, title, enabled, is_default, model, dimensions, config, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run('test-profile', 'local-transformers', 'Test', 1, 1, 'test-model', 4, '{}', NOW_S, NOW_S);
|
||||
|
||||
// Seed snippet with vector embedding only.
|
||||
const snippetId = seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'vector search test'
|
||||
});
|
||||
|
||||
const embed = [0.7, 0.3, 0.2, 0.1];
|
||||
const f32 = new Float32Array(embed);
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippet_embeddings (snippet_id, profile_id, model, dimensions, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(snippetId, 'test-profile', 'test-model', 4, Buffer.from(f32.buffer), NOW_S);
|
||||
|
||||
const mockProvider: EmbeddingProvider = {
|
||||
name: 'mock',
|
||||
dimensions: 4,
|
||||
model: 'test-model',
|
||||
async embed() {
|
||||
return [
|
||||
{
|
||||
values: new Float32Array([0.7, 0.3, 0.2, 0.1]),
|
||||
dimensions: 4,
|
||||
model: 'test-model'
|
||||
}
|
||||
];
|
||||
},
|
||||
async isAvailable() {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, mockProvider);
|
||||
|
||||
// Query that won't match through FTS after punctuation normalization.
|
||||
const results = await hybridService.search('%%%vector%%%', {
|
||||
repositoryId: repoId,
|
||||
searchMode: 'hybrid',
|
||||
alpha: 0.5,
|
||||
profileId: 'test-profile'
|
||||
});
|
||||
|
||||
// Should fall back to vector and find the snippet.
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('punctuation-heavy query returns empty when no vector provider and FTS preprocesses to nothing', async () => {
|
||||
const client = createTestDb();
|
||||
const repoId = seedRepo(client);
|
||||
const docId = seedDocument(client, repoId);
|
||||
|
||||
// No embeddings or provider.
|
||||
seedSnippet(client, {
|
||||
repositoryId: repoId,
|
||||
documentId: docId,
|
||||
content: 'example content'
|
||||
});
|
||||
|
||||
const searchService = new SearchService(client);
|
||||
const hybridService = new HybridSearchService(client, searchService, null);
|
||||
|
||||
const results = await hybridService.search('!!!@@@###$$$', {
|
||||
repositoryId: repoId
|
||||
});
|
||||
|
||||
// No provider and FTS preprocesses to empty: should return empty.
|
||||
expect(results).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -88,8 +88,16 @@ export class HybridSearchService {
|
||||
/**
|
||||
* Execute a hybrid search combining FTS5 and (optionally) vector search.
|
||||
*
|
||||
* When `embeddingProvider` is null or `alpha` is 0, the method returns
|
||||
* FTS5 results directly without embedding the query.
|
||||
* Search modes:
|
||||
* - 'keyword' : FTS5-only (alpha = 0)
|
||||
* - 'semantic' : Vector-only (alpha = 1), skips FTS entirely
|
||||
* - 'hybrid' : Balanced RRF fusion (alpha = 0.5 by default)
|
||||
* - 'auto' : Auto-selects: semantic if embedding provider available and FTS
|
||||
* yields no results on the preprocessed query. Falls back to FTS
|
||||
* for punctuation-heavy queries.
|
||||
*
|
||||
* When embeddingProvider is null or alpha is 0, the method returns FTS5 results
|
||||
* directly without embedding the query.
|
||||
*
|
||||
* @param query - Raw search string (preprocessing handled by SearchService).
|
||||
* @param options - Search parameters including repositoryId and alpha blend.
|
||||
@@ -119,7 +127,30 @@ export class HybridSearchService {
|
||||
alpha = options.alpha ?? 0.5;
|
||||
}
|
||||
|
||||
// Always run FTS5 — it is synchronous and fast.
|
||||
// Semantic mode: skip FTS entirely and use vector search only.
|
||||
if (mode === 'semantic') {
|
||||
if (!this.embeddingProvider || !query.trim()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const embeddings = await this.embeddingProvider.embed([query]);
|
||||
if (embeddings.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const queryEmbedding = embeddings[0].values;
|
||||
const vectorResults = this.vectorSearch.vectorSearch(queryEmbedding, {
|
||||
repositoryId: options.repositoryId,
|
||||
versionId: options.versionId,
|
||||
profileId: options.profileId,
|
||||
limit
|
||||
});
|
||||
|
||||
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||
return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
|
||||
}
|
||||
|
||||
// FTS5 mode (keyword) or hybrid/auto modes: try FTS first.
|
||||
const ftsResults = this.searchService.searchSnippets(query, {
|
||||
repositoryId: options.repositoryId,
|
||||
versionId: options.versionId,
|
||||
@@ -132,10 +163,40 @@ export class HybridSearchService {
|
||||
return ftsResults.slice(0, limit);
|
||||
}
|
||||
|
||||
// Embed query and run vector search.
|
||||
// For auto/hybrid modes: if FTS yielded results, use them; otherwise try vector.
|
||||
// This handles punctuation-heavy queries that normalize to empty after preprocessing.
|
||||
const hasFtsResults = ftsResults.length > 0;
|
||||
|
||||
if (!hasFtsResults) {
|
||||
// No FTS results: try vector search as a fallback in auto/hybrid modes.
|
||||
if (!query.trim()) {
|
||||
// Query is empty; no point embedding it.
|
||||
return [];
|
||||
}
|
||||
|
||||
const embeddings = await this.embeddingProvider.embed([query]);
|
||||
|
||||
// If provider fails (Noop returns empty array), we're done.
|
||||
if (embeddings.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const queryEmbedding = embeddings[0].values;
|
||||
const vectorResults = this.vectorSearch.vectorSearch(queryEmbedding, {
|
||||
repositoryId: options.repositoryId,
|
||||
versionId: options.versionId,
|
||||
profileId: options.profileId,
|
||||
limit
|
||||
});
|
||||
|
||||
const topIds = vectorResults.slice(0, limit).map((r) => r.snippetId);
|
||||
return this.fetchSnippetsByIds(topIds, options.repositoryId, options.type);
|
||||
}
|
||||
|
||||
// FTS has results: use RRF to blend with vector search (if alpha < 1).
|
||||
const embeddings = await this.embeddingProvider.embed([query]);
|
||||
|
||||
// Provider may be a Noop (returns empty array) — fall back gracefully.
|
||||
// Provider may be a Noop (returns empty array) — fall back to FTS gracefully.
|
||||
if (embeddings.length === 0) {
|
||||
return ftsResults.slice(0, limit);
|
||||
}
|
||||
|
||||
@@ -2,7 +2,8 @@
|
||||
* Query preprocessor for FTS5 search queries.
|
||||
*
|
||||
* Normalizes raw user input into an FTS5-compatible MATCH expression
|
||||
* with prefix wildcard expansion on the last token.
|
||||
* with prefix wildcard expansion on the last token. Handles punctuation-heavy
|
||||
* and code-like queries by extracting searchable alphanumeric/underscore terms.
|
||||
*/
|
||||
|
||||
/**
|
||||
@@ -10,25 +11,104 @@
|
||||
*
|
||||
* Steps:
|
||||
* 1. Trim and normalize internal whitespace.
|
||||
* 2. Strip FTS5 grouping characters `(` and `)` that would cause parse errors.
|
||||
* 3. Append a prefix wildcard `*` to the last token when it is >= 3 characters
|
||||
* and does not already end with `*`. This gives a "typing as you go" feel.
|
||||
* 2. Preserve FTS5 operators (AND, OR, NOT) and extract alphanumeric/underscore terms.
|
||||
* 3. Strip punctuation that breaks FTS5 parsing (parentheses, brackets, special chars).
|
||||
* 4. Preserve searchable code-like patterns (snake_case, dot notation parts, etc.).
|
||||
* 5. Return empty string if no searchable terms remain; otherwise, append a prefix
|
||||
* wildcard `*` to the last token when it is >= 3 characters and does not already
|
||||
* end with `*`.
|
||||
*/
|
||||
export function preprocessQuery(raw: string): string {
|
||||
// 1. Trim and collapse whitespace.
|
||||
let q = raw.trim().replace(/\s+/g, ' ');
|
||||
|
||||
// 2. Remove parentheses (not valid in simple FTS5 queries without explicit operators).
|
||||
q = q.replace(/[()]/g, ' ').replace(/\s+/g, ' ').trim();
|
||||
|
||||
if (!q) return q;
|
||||
|
||||
// 3. Add prefix wildcard to the last token.
|
||||
const tokens = q.split(' ');
|
||||
const lastToken = tokens.at(-1) ?? '';
|
||||
if (lastToken.length >= 3 && !lastToken.endsWith('*')) {
|
||||
tokens[tokens.length - 1] = lastToken + '*';
|
||||
// 2. Split into tokens while preserving FTS operators and extracting searchable terms.
|
||||
const tokens = q.split(/\s+/);
|
||||
const processedTokens: string[] = [];
|
||||
|
||||
for (const token of tokens) {
|
||||
// Preserve FTS operators as-is.
|
||||
if (['AND', 'OR', 'NOT'].includes(token)) {
|
||||
processedTokens.push(token);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract searchable terms from the token:
|
||||
// - Keep alphanumeric sequences and underscores
|
||||
// - Skip pure punctuation
|
||||
// - Handle code-like patterns (foo_bar, foo.bar.baz, etc.)
|
||||
const searchableTerms: string[] = [];
|
||||
|
||||
// Replace common separators with spaces, then split.
|
||||
const sanitized = token
|
||||
.replace(/[()[\]{}]/g, ' ') // Remove grouping characters
|
||||
.replace(/[;:,!?]/g, ' ') // Remove punctuation that breaks FTS
|
||||
.replace(/[<>|]/g, ' ') // Remove comparison/pipe chars
|
||||
.replace(/[\-+*/%]/g, ' ') // Remove operators (but keep underscores)
|
||||
.replace(/[@#$&^\\~\`]/g, ' '); // Remove special chars
|
||||
|
||||
// Split on remaining punctuation (like dots and slashes) but preserve alphanumeric/underscore.
|
||||
const parts = sanitized.split(/[./\s]+/).filter(Boolean);
|
||||
|
||||
for (const part of parts) {
|
||||
// Keep parts that contain at least one alphanumeric character.
|
||||
if (/[a-zA-Z0-9_]/.test(part)) {
|
||||
// Remove leading/trailing non-alphanumeric/underscore characters
|
||||
const cleaned = part.replace(/^[^a-zA-Z0-9_]+|[^a-zA-Z0-9_]+$/g, '');
|
||||
if (cleaned) {
|
||||
searchableTerms.push(cleaned);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add unique searchable terms (avoid duplicates from same token).
|
||||
for (const term of searchableTerms) {
|
||||
if (!processedTokens.includes(term)) {
|
||||
processedTokens.push(term);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tokens.join(' ');
|
||||
// 3. Separate operators from searchable terms.
|
||||
const searchableTerms = processedTokens.filter((t) => !['AND', 'OR', 'NOT'].includes(t));
|
||||
|
||||
if (searchableTerms.length === 0) return '';
|
||||
|
||||
// 4. Reconstruct final tokens keeping operators between searchable terms.
|
||||
const finalTokens: string[] = [];
|
||||
for (const token of processedTokens) {
|
||||
// Keep operators only if we have searchable terms
|
||||
if (['AND', 'OR', 'NOT'].includes(token)) {
|
||||
// Only keep if surrounded by searchable terms or at the boundary
|
||||
if (finalTokens.length > 0) {
|
||||
finalTokens.push(token);
|
||||
}
|
||||
} else {
|
||||
finalTokens.push(token);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove trailing operators
|
||||
while (finalTokens.length > 0 && ['AND', 'OR', 'NOT'].includes(finalTokens[finalTokens.length - 1])) {
|
||||
finalTokens.pop();
|
||||
}
|
||||
|
||||
if (finalTokens.length === 0) return '';
|
||||
|
||||
// 5. Add prefix wildcard to the last non-operator token.
|
||||
let lastIdx = finalTokens.length - 1;
|
||||
while (lastIdx >= 0 && ['AND', 'OR', 'NOT'].includes(finalTokens[lastIdx])) {
|
||||
lastIdx--;
|
||||
}
|
||||
|
||||
if (lastIdx >= 0) {
|
||||
const lastToken = finalTokens[lastIdx];
|
||||
if (lastToken.length >= 3 && !lastToken.endsWith('*')) {
|
||||
finalTokens[lastIdx] = lastToken + '*';
|
||||
}
|
||||
}
|
||||
|
||||
return finalTokens.join(' ');
|
||||
}
|
||||
|
||||
@@ -177,6 +177,85 @@ describe('preprocessQuery', () => {
|
||||
it('handles single short token without wildcard', () => {
|
||||
expect(preprocessQuery('ab')).toBe('ab');
|
||||
});
|
||||
|
||||
// Punctuation-heavy and code-like queries
|
||||
it('normalizes code-like queries with slashes', () => {
|
||||
// "foo/bar/baz" should extract searchable terms
|
||||
const result = preprocessQuery('foo/bar/baz');
|
||||
expect(result).toContain('foo');
|
||||
expect(result).toContain('bar');
|
||||
expect(result).toContain('baz');
|
||||
});
|
||||
|
||||
it('extracts terms from dot-notation queries', () => {
|
||||
// "object.method.name" should extract searchable parts
|
||||
const result = preprocessQuery('object.method.name');
|
||||
expect(result).toContain('object');
|
||||
expect(result).toContain('method');
|
||||
expect(result).toContain('name');
|
||||
});
|
||||
|
||||
it('handles snake_case identifiers', () => {
|
||||
// "my_function_name" should be preserved
|
||||
const result = preprocessQuery('my_function_name');
|
||||
expect(result).toContain('my_function_name');
|
||||
});
|
||||
|
||||
it('removes punctuation from parenthesized expressions', () => {
|
||||
// "(hello world)" → "hello world*"
|
||||
const result = preprocessQuery('(hello world)');
|
||||
expect(result).toContain('hello');
|
||||
expect(result).toContain('world');
|
||||
});
|
||||
|
||||
it('handles bracket-enclosed content', () => {
|
||||
// "[foo bar]" → "foo bar*"
|
||||
const result = preprocessQuery('[foo bar]');
|
||||
expect(result).toContain('foo');
|
||||
expect(result).toContain('bar');
|
||||
});
|
||||
|
||||
it('returns empty string for pure punctuation', () => {
|
||||
expect(preprocessQuery('!@#$%^&*()')).toBe('');
|
||||
});
|
||||
|
||||
it('returns empty string for punctuation with operators only', () => {
|
||||
expect(preprocessQuery('!!! AND *** OR ((()))')).toBe('');
|
||||
});
|
||||
|
||||
it('normalizes C++ style template syntax', () => {
|
||||
// "vector<int>" → "vector int*"
|
||||
const result = preprocessQuery('vector<int>');
|
||||
expect(result).toContain('vector');
|
||||
expect(result).toContain('int');
|
||||
});
|
||||
|
||||
it('handles colons and semicolons in code snippets', () => {
|
||||
// "http://example.com; function()" → extracts searchable terms
|
||||
const result = preprocessQuery('http://example.com; function()');
|
||||
expect(result).toContain('http');
|
||||
expect(result).toContain('example');
|
||||
expect(result).toContain('com');
|
||||
expect(result).toContain('function');
|
||||
});
|
||||
|
||||
it('normalizes arithmetic operators', () => {
|
||||
// "a + b * c" → "a b c*"
|
||||
const result = preprocessQuery('a + b * c');
|
||||
// Should extract terms, but skip operators
|
||||
const terms = result.split(/\s+/).filter((t) => !['AND', 'OR', 'NOT'].includes(t));
|
||||
expect(terms.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('returns single searchable term with wildcard when >=3 chars', () => {
|
||||
const result = preprocessQuery('!!!hello!!!');
|
||||
expect(result).toBe('hello*');
|
||||
});
|
||||
|
||||
it('returns single short term without wildcard', () => {
|
||||
const result = preprocessQuery('!!!ab!!!');
|
||||
expect(result).toBe('ab');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -2,6 +2,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import Database from 'better-sqlite3';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import crypto from 'node:crypto';
|
||||
import { RepositoryService } from '$lib/server/services/repository.service';
|
||||
import { VersionService } from '$lib/server/services/version.service';
|
||||
|
||||
@@ -24,21 +25,34 @@ vi.mock('$lib/server/pipeline/startup.js', () => ({
|
||||
getQueue: () => queue
|
||||
}));
|
||||
|
||||
vi.mock('$lib/server/embeddings/registry', () => ({
|
||||
createProviderFromProfile: () => null
|
||||
}));
|
||||
|
||||
vi.mock('$lib/server/embeddings/registry.js', () => ({
|
||||
createProviderFromProfile: () => null
|
||||
}));
|
||||
|
||||
import { POST as postLibraries } from './libs/+server.js';
|
||||
import { GET as getLibrary } from './libs/[id]/+server.js';
|
||||
import { GET as getJobs } from './jobs/+server.js';
|
||||
import { GET as getJob } from './jobs/[id]/+server.js';
|
||||
import { GET as getVersions, POST as postVersions } from './libs/[id]/versions/+server.js';
|
||||
import { GET as getContext } from './context/+server.js';
|
||||
|
||||
const NOW_S = Math.floor(Date.now() / 1000);
|
||||
|
||||
function createTestDb(): Database.Database {
|
||||
const client = new Database(':memory:');
|
||||
client.pragma('foreign_keys = ON');
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../../../lib/server/db/migrations');
|
||||
const ftsFile = join(import.meta.dirname, '../../../lib/server/db/fts.sql');
|
||||
|
||||
// Apply all migration files in order
|
||||
const migration0 = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
|
||||
const migration1 = readFileSync(join(migrationsFolder, '0001_quick_nighthawk.sql'), 'utf-8');
|
||||
const migration2 = readFileSync(join(migrationsFolder, '0002_silky_stellaris.sql'), 'utf-8');
|
||||
|
||||
// Apply first migration
|
||||
const statements0 = migration0
|
||||
@@ -60,9 +74,126 @@ function createTestDb(): Database.Database {
|
||||
client.exec(statement);
|
||||
}
|
||||
|
||||
const statements2 = migration2
|
||||
.split('--> statement-breakpoint')
|
||||
.map((statement) => statement.trim())
|
||||
.filter(Boolean);
|
||||
|
||||
for (const statement of statements2) {
|
||||
client.exec(statement);
|
||||
}
|
||||
|
||||
client.exec(readFileSync(ftsFile, 'utf-8'));
|
||||
|
||||
return client;
|
||||
}
|
||||
|
||||
function seedRepo(
|
||||
client: Database.Database,
|
||||
overrides: {
|
||||
id?: string;
|
||||
title?: string;
|
||||
source?: 'github' | 'local';
|
||||
sourceUrl?: string;
|
||||
state?: 'pending' | 'indexing' | 'indexed' | 'error';
|
||||
} = {}
|
||||
): string {
|
||||
const id = overrides.id ?? '/facebook/react';
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO repositories
|
||||
(id, title, source, source_url, state, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(
|
||||
id,
|
||||
overrides.title ?? 'React',
|
||||
overrides.source ?? 'github',
|
||||
overrides.sourceUrl ?? 'https://github.com/facebook/react',
|
||||
overrides.state ?? 'indexed',
|
||||
NOW_S,
|
||||
NOW_S
|
||||
);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
function seedVersion(client: Database.Database, repositoryId: string, tag: string): string {
|
||||
const versionId = `${repositoryId}/${tag}`;
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO repository_versions
|
||||
(id, repository_id, tag, state, total_snippets, indexed_at, created_at)
|
||||
VALUES (?, ?, ?, 'indexed', 0, ?, ?)`
|
||||
)
|
||||
.run(versionId, repositoryId, tag, NOW_S, NOW_S);
|
||||
|
||||
return versionId;
|
||||
}
|
||||
|
||||
function seedDocument(
|
||||
client: Database.Database,
|
||||
repositoryId: string,
|
||||
versionId: string | null = null
|
||||
): string {
|
||||
const documentId = crypto.randomUUID();
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO documents (id, repository_id, version_id, file_path, checksum, indexed_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(documentId, repositoryId, versionId, 'README.md', 'checksum', NOW_S);
|
||||
|
||||
return documentId;
|
||||
}
|
||||
|
||||
function seedSnippet(
|
||||
client: Database.Database,
|
||||
options: {
|
||||
documentId: string;
|
||||
repositoryId: string;
|
||||
versionId?: string | null;
|
||||
type?: 'code' | 'info';
|
||||
title?: string | null;
|
||||
content: string;
|
||||
language?: string | null;
|
||||
breadcrumb?: string | null;
|
||||
tokenCount?: number;
|
||||
}
|
||||
): string {
|
||||
const snippetId = crypto.randomUUID();
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO snippets
|
||||
(id, document_id, repository_id, version_id, type, title, content, language, breadcrumb, token_count, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
)
|
||||
.run(
|
||||
snippetId,
|
||||
options.documentId,
|
||||
options.repositoryId,
|
||||
options.versionId ?? null,
|
||||
options.type ?? 'info',
|
||||
options.title ?? null,
|
||||
options.content,
|
||||
options.language ?? null,
|
||||
options.breadcrumb ?? null,
|
||||
options.tokenCount ?? 0,
|
||||
NOW_S
|
||||
);
|
||||
|
||||
return snippetId;
|
||||
}
|
||||
|
||||
function seedRules(client: Database.Database, repositoryId: string, rules: string[]) {
|
||||
client
|
||||
.prepare(
|
||||
`INSERT INTO repository_configs (repository_id, rules, updated_at)
|
||||
VALUES (?, ?, ?)`
|
||||
)
|
||||
.run(repositoryId, JSON.stringify(rules), NOW_S);
|
||||
}
|
||||
|
||||
describe('API contract integration', () => {
|
||||
beforeEach(() => {
|
||||
db = createTestDb();
|
||||
@@ -174,4 +305,78 @@ describe('API contract integration', () => {
|
||||
expect(getBody.versions[0]).not.toHaveProperty('repository_id');
|
||||
expect(getBody.versions[0]).not.toHaveProperty('total_snippets');
|
||||
});
|
||||
|
||||
it('GET /api/v1/context returns informative txt output for empty results', async () => {
|
||||
const repositoryId = seedRepo(db);
|
||||
|
||||
const response = await getContext({
|
||||
url: new URL(
|
||||
`http://test/api/v1/context?libraryId=${encodeURIComponent(repositoryId)}&query=${encodeURIComponent('no matches here')}&type=txt`
|
||||
)
|
||||
} as never);
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(response.headers.get('content-type')).toContain('text/plain');
|
||||
|
||||
const body = await response.text();
|
||||
expect(body).toContain('## Context Results');
|
||||
expect(body).toContain('No matching snippets found');
|
||||
expect(body).toContain('Repository: React (/facebook/react)');
|
||||
expect(body).toContain('Result count: 0');
|
||||
});
|
||||
|
||||
it('GET /api/v1/context returns additive repository and version metadata for versioned results', async () => {
|
||||
const repositoryId = seedRepo(db);
|
||||
const versionId = seedVersion(db, repositoryId, 'v18.3.0');
|
||||
const documentId = seedDocument(db, repositoryId, versionId);
|
||||
seedRules(db, repositoryId, ['Prefer hooks over classes']);
|
||||
seedSnippet(db, {
|
||||
documentId,
|
||||
repositoryId,
|
||||
versionId,
|
||||
type: 'code',
|
||||
title: 'useThing',
|
||||
content: 'export function useThing() { return true; }',
|
||||
language: 'ts',
|
||||
breadcrumb: 'Hooks > useThing',
|
||||
tokenCount: 42
|
||||
});
|
||||
|
||||
const response = await getContext({
|
||||
url: new URL(
|
||||
`http://test/api/v1/context?libraryId=${encodeURIComponent(`${repositoryId}/v18.3.0`)}&query=${encodeURIComponent('useThing')}`
|
||||
)
|
||||
} as never);
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
const body = await response.json();
|
||||
|
||||
expect(body.snippets).toHaveLength(1);
|
||||
expect(body.rules).toEqual(['Prefer hooks over classes']);
|
||||
expect(body.totalTokens).toBe(42);
|
||||
expect(body.localSource).toBe(false);
|
||||
expect(body.resultCount).toBe(1);
|
||||
expect(body.repository).toEqual({
|
||||
id: '/facebook/react',
|
||||
title: 'React',
|
||||
source: 'github',
|
||||
sourceUrl: 'https://github.com/facebook/react',
|
||||
branch: 'main',
|
||||
isLocal: false
|
||||
});
|
||||
expect(body.version).toEqual({
|
||||
requested: 'v18.3.0',
|
||||
resolved: 'v18.3.0',
|
||||
id: '/facebook/react/v18.3.0'
|
||||
});
|
||||
expect(body.snippets[0].origin).toEqual({
|
||||
repositoryId: '/facebook/react',
|
||||
repositoryTitle: 'React',
|
||||
source: 'github',
|
||||
sourceUrl: 'https://github.com/facebook/react',
|
||||
version: 'v18.3.0',
|
||||
versionId: '/facebook/react/v18.3.0',
|
||||
isLocal: false
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -25,6 +25,7 @@ import {
|
||||
formatContextTxt,
|
||||
CORS_HEADERS
|
||||
} from '$lib/server/api/formatters';
|
||||
import type { ContextResponseMetadata } from '$lib/server/mappers/context-response.mapper';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
@@ -67,7 +68,32 @@ function getRules(db: ReturnType<typeof getClient>, repositoryId: string): strin
|
||||
|
||||
interface RawRepoState {
|
||||
state: 'pending' | 'indexing' | 'indexed' | 'error';
|
||||
id: string;
|
||||
title: string;
|
||||
source: 'github' | 'local';
|
||||
source_url: string;
|
||||
branch: string | null;
|
||||
}
|
||||
|
||||
interface RawVersionRow {
|
||||
id: string;
|
||||
tag: string;
|
||||
}
|
||||
|
||||
function getSnippetVersionTags(
|
||||
db: ReturnType<typeof getClient>,
|
||||
versionIds: string[]
|
||||
): Record<string, string> {
|
||||
if (versionIds.length === 0) return {};
|
||||
|
||||
const placeholders = versionIds.map(() => '?').join(', ');
|
||||
const rows = db
|
||||
.prepare<string[], RawVersionRow>(
|
||||
`SELECT id, tag FROM repository_versions WHERE id IN (${placeholders})`
|
||||
)
|
||||
.all(...versionIds);
|
||||
|
||||
return Object.fromEntries(rows.map((row) => [row.id, row.tag]));
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -131,7 +157,9 @@ export const GET: RequestHandler = async ({ url }) => {
|
||||
|
||||
// Verify the repository exists and check its state.
|
||||
const repo = db
|
||||
.prepare<[string], RawRepoState>(`SELECT state, title FROM repositories WHERE id = ?`)
|
||||
.prepare<[string], RawRepoState>(
|
||||
`SELECT id, state, title, source, source_url, branch FROM repositories WHERE id = ?`
|
||||
)
|
||||
.get(parsed.repositoryId);
|
||||
|
||||
if (!repo) {
|
||||
@@ -162,15 +190,16 @@ export const GET: RequestHandler = async ({ url }) => {
|
||||
|
||||
// Resolve version ID if a specific version was requested.
|
||||
let versionId: string | undefined;
|
||||
let resolvedVersion: RawVersionRow | undefined;
|
||||
if (parsed.version) {
|
||||
const versionRow = db
|
||||
.prepare<[string, string], { id: string }>(
|
||||
`SELECT id FROM repository_versions WHERE repository_id = ? AND tag = ?`
|
||||
resolvedVersion = db
|
||||
.prepare<[string, string], RawVersionRow>(
|
||||
`SELECT id, tag FROM repository_versions WHERE repository_id = ? AND tag = ?`
|
||||
)
|
||||
.get(parsed.repositoryId, parsed.version);
|
||||
|
||||
// Version not found is not fatal — fall back to default branch.
|
||||
versionId = versionRow?.id;
|
||||
versionId = resolvedVersion?.id;
|
||||
}
|
||||
|
||||
// Execute hybrid search (falls back to FTS5 when no embedding provider is set).
|
||||
@@ -193,11 +222,39 @@ export const GET: RequestHandler = async ({ url }) => {
|
||||
return found;
|
||||
});
|
||||
|
||||
const snippetVersionIds = Array.from(
|
||||
new Set(
|
||||
selectedResults
|
||||
.map((result) => result.snippet.versionId)
|
||||
.filter((value): value is string => Boolean(value))
|
||||
)
|
||||
);
|
||||
const snippetVersions = getSnippetVersionTags(db, snippetVersionIds);
|
||||
const metadata: ContextResponseMetadata = {
|
||||
localSource: repo.source === 'local',
|
||||
resultCount: selectedResults.length,
|
||||
repository: {
|
||||
id: repo.id,
|
||||
title: repo.title,
|
||||
source: repo.source,
|
||||
sourceUrl: repo.source_url,
|
||||
branch: repo.branch
|
||||
},
|
||||
version: parsed.version || resolvedVersion
|
||||
? {
|
||||
requested: parsed.version ?? null,
|
||||
resolved: resolvedVersion?.tag ?? null,
|
||||
id: resolvedVersion?.id ?? null
|
||||
}
|
||||
: null,
|
||||
snippetVersions
|
||||
};
|
||||
|
||||
// Load rules from repository_configs.
|
||||
const rules = getRules(db, parsed.repositoryId);
|
||||
|
||||
if (responseType === 'txt') {
|
||||
const text = formatContextTxt(selectedResults, rules);
|
||||
const text = formatContextTxt(selectedResults, rules, metadata);
|
||||
return new Response(text, {
|
||||
status: 200,
|
||||
headers: {
|
||||
@@ -208,7 +265,7 @@ export const GET: RequestHandler = async ({ url }) => {
|
||||
}
|
||||
|
||||
// Default: JSON
|
||||
const body = formatContextJson(selectedResults, rules);
|
||||
const body = formatContextJson(selectedResults, rules, metadata);
|
||||
return dtoJsonResponse(body, {
|
||||
status: 200,
|
||||
headers: CORS_HEADERS
|
||||
|
||||
Reference in New Issue
Block a user