chore(FEEDBACK-0001): linting
This commit is contained in:
@@ -2,4 +2,4 @@ import { json } from '@sveltejs/kit';
|
||||
|
||||
export function dtoJsonResponse<T>(payload: T, init?: ResponseInit) {
|
||||
return json(payload, init);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,9 +83,7 @@ function makeSnippetResult(snippet: Snippet): SnippetSearchResult {
|
||||
});
|
||||
}
|
||||
|
||||
function makeMetadata(
|
||||
overrides: Partial<ContextResponseMetadata> = {}
|
||||
): ContextResponseMetadata {
|
||||
function makeMetadata(overrides: Partial<ContextResponseMetadata> = {}): ContextResponseMetadata {
|
||||
return {
|
||||
localSource: false,
|
||||
resultCount: 1,
|
||||
@@ -160,7 +158,11 @@ describe('formatLibrarySearchJson', () => {
|
||||
|
||||
it('maps non-indexed state to initial', () => {
|
||||
const results: LibrarySearchResult[] = [
|
||||
new LibrarySearchResult({ repository: makeRepo({ state: 'pending' }), versions: [], score: 0 })
|
||||
new LibrarySearchResult({
|
||||
repository: makeRepo({ state: 'pending' }),
|
||||
versions: [],
|
||||
score: 0
|
||||
})
|
||||
];
|
||||
const response = formatLibrarySearchJson(results);
|
||||
expect(response.results[0].state).toBe('initial');
|
||||
@@ -168,7 +170,11 @@ describe('formatLibrarySearchJson', () => {
|
||||
|
||||
it('handles null lastIndexedAt', () => {
|
||||
const results: LibrarySearchResult[] = [
|
||||
new LibrarySearchResult({ repository: makeRepo({ lastIndexedAt: null }), versions: [], score: 0 })
|
||||
new LibrarySearchResult({
|
||||
repository: makeRepo({ lastIndexedAt: null }),
|
||||
versions: [],
|
||||
score: 0
|
||||
})
|
||||
];
|
||||
const response = formatLibrarySearchJson(results);
|
||||
expect(response.results[0].lastUpdateDate).toBeNull();
|
||||
|
||||
@@ -66,7 +66,9 @@ export const CORS_HEADERS = {
|
||||
/**
|
||||
* Convert internal LibrarySearchResult[] to the context7-compatible JSON body.
|
||||
*/
|
||||
export function formatLibrarySearchJson(results: LibrarySearchResult[]): LibrarySearchJsonResponseDto {
|
||||
export function formatLibrarySearchJson(
|
||||
results: LibrarySearchResult[]
|
||||
): LibrarySearchJsonResponseDto {
|
||||
return ContextResponseMapper.toLibrarySearchJson(results);
|
||||
}
|
||||
|
||||
@@ -80,7 +82,7 @@ export function formatContextJson(
|
||||
snippets: SnippetSearchResult[],
|
||||
rules: string[],
|
||||
metadata?: ContextResponseMetadata
|
||||
): ContextJsonResponseDto {
|
||||
): ContextJsonResponseDto {
|
||||
return ContextResponseMapper.toContextJson(snippets, rules, metadata);
|
||||
}
|
||||
|
||||
@@ -94,7 +96,10 @@ export function formatContextJson(
|
||||
* @param snippets - Ranked snippet search results (already token-budget trimmed).
|
||||
* @param rules - Rules from `trueref.json` / `repository_configs`.
|
||||
*/
|
||||
function formatOriginLine(result: SnippetSearchResult, metadata?: ContextResponseMetadata): string | null {
|
||||
function formatOriginLine(
|
||||
result: SnippetSearchResult,
|
||||
metadata?: ContextResponseMetadata
|
||||
): string | null {
|
||||
if (!metadata?.repository) return null;
|
||||
|
||||
const parts = [
|
||||
|
||||
@@ -115,10 +115,7 @@ describe('parseConfigFile — description', () => {
|
||||
|
||||
describe('parseConfigFile — array path fields', () => {
|
||||
it('accepts valid folders', () => {
|
||||
const result = parseConfigFile(
|
||||
JSON.stringify({ folders: ['src/', 'docs/'] }),
|
||||
'trueref.json'
|
||||
);
|
||||
const result = parseConfigFile(JSON.stringify({ folders: ['src/', 'docs/'] }), 'trueref.json');
|
||||
expect(result.config.folders).toEqual(['src/', 'docs/']);
|
||||
expect(result.warnings).toHaveLength(0);
|
||||
});
|
||||
@@ -130,10 +127,7 @@ describe('parseConfigFile — array path fields', () => {
|
||||
});
|
||||
|
||||
it('skips non-string entries in folders with a warning', () => {
|
||||
const result = parseConfigFile(
|
||||
JSON.stringify({ folders: ['src/', 42, true] }),
|
||||
'trueref.json'
|
||||
);
|
||||
const result = parseConfigFile(JSON.stringify({ folders: ['src/', 42, true] }), 'trueref.json');
|
||||
expect(result.config.folders).toEqual(['src/']);
|
||||
expect(result.warnings.length).toBeGreaterThan(0);
|
||||
});
|
||||
@@ -174,7 +168,9 @@ describe('parseConfigFile — array path fields', () => {
|
||||
describe('parseConfigFile — rules', () => {
|
||||
it('accepts valid rules', () => {
|
||||
const result = parseConfigFile(
|
||||
JSON.stringify({ rules: ['Always use named imports.', 'Prefer async/await over callbacks.'] }),
|
||||
JSON.stringify({
|
||||
rules: ['Always use named imports.', 'Prefer async/await over callbacks.']
|
||||
}),
|
||||
'trueref.json'
|
||||
);
|
||||
expect(result.config.rules).toHaveLength(2);
|
||||
@@ -204,10 +200,7 @@ describe('parseConfigFile — rules', () => {
|
||||
});
|
||||
|
||||
it('ignores non-array rules with a warning', () => {
|
||||
const result = parseConfigFile(
|
||||
JSON.stringify({ rules: 'use named imports' }),
|
||||
'trueref.json'
|
||||
);
|
||||
const result = parseConfigFile(JSON.stringify({ rules: 'use named imports' }), 'trueref.json');
|
||||
expect(result.config.rules).toBeUndefined();
|
||||
expect(result.warnings.some((w) => /rules must be an array/.test(w))).toBe(true);
|
||||
});
|
||||
@@ -243,10 +236,7 @@ describe('parseConfigFile — previousVersions', () => {
|
||||
it('skips entries missing tag', () => {
|
||||
const result = parseConfigFile(
|
||||
JSON.stringify({
|
||||
previousVersions: [
|
||||
{ title: 'No tag here' },
|
||||
{ tag: 'v1.0.0', title: 'Valid' }
|
||||
]
|
||||
previousVersions: [{ title: 'No tag here' }, { tag: 'v1.0.0', title: 'Valid' }]
|
||||
}),
|
||||
'trueref.json'
|
||||
);
|
||||
@@ -275,10 +265,7 @@ describe('parseConfigFile — previousVersions', () => {
|
||||
});
|
||||
|
||||
it('ignores non-array previousVersions with a warning', () => {
|
||||
const result = parseConfigFile(
|
||||
JSON.stringify({ previousVersions: 'v1.0.0' }),
|
||||
'trueref.json'
|
||||
);
|
||||
const result = parseConfigFile(JSON.stringify({ previousVersions: 'v1.0.0' }), 'trueref.json');
|
||||
expect(result.config.previousVersions).toBeUndefined();
|
||||
expect(result.warnings.some((w) => /previousVersions must be an array/.test(w))).toBe(true);
|
||||
});
|
||||
@@ -294,9 +281,7 @@ describe('resolveConfig', () => {
|
||||
});
|
||||
|
||||
it('returns null when no matching filenames', () => {
|
||||
expect(
|
||||
resolveConfig([{ filename: 'package.json', content: '{"name":"x"}' }])
|
||||
).toBeNull();
|
||||
expect(resolveConfig([{ filename: 'package.json', content: '{"name":"x"}' }])).toBeNull();
|
||||
});
|
||||
|
||||
it('prefers trueref.json over context7.json', () => {
|
||||
|
||||
@@ -65,7 +65,9 @@ export function parseConfigFile(content: string, filename: string): ParsedConfig
|
||||
|
||||
// ---- 2. Root must be an object ------------------------------------------
|
||||
if (typeof raw !== 'object' || raw === null || Array.isArray(raw)) {
|
||||
throw new ConfigParseError(`${filename} must be a JSON object, got ${Array.isArray(raw) ? 'array' : typeof raw}`);
|
||||
throw new ConfigParseError(
|
||||
`${filename} must be a JSON object, got ${Array.isArray(raw) ? 'array' : typeof raw}`
|
||||
);
|
||||
}
|
||||
|
||||
const input = raw as Record<string, unknown>;
|
||||
@@ -131,7 +133,9 @@ export function parseConfigFile(content: string, filename: string): ParsedConfig
|
||||
})
|
||||
.map((item) => {
|
||||
if (item.length > maxLength) {
|
||||
warnings.push(`${field} entry truncated to ${maxLength} characters: "${item.slice(0, 40)}..."`);
|
||||
warnings.push(
|
||||
`${field} entry truncated to ${maxLength} characters: "${item.slice(0, 40)}..."`
|
||||
);
|
||||
return item.slice(0, maxLength);
|
||||
}
|
||||
return item;
|
||||
@@ -160,9 +164,7 @@ export function parseConfigFile(content: string, filename: string): ParsedConfig
|
||||
return false;
|
||||
}
|
||||
if (r.length < minLength) {
|
||||
warnings.push(
|
||||
`rules entry too short (< ${minLength} chars) — skipping: "${r}"`
|
||||
);
|
||||
warnings.push(`rules entry too short (< ${minLength} chars) — skipping: "${r}"`);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
@@ -1,85 +1,85 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"$id": "https://trueref.dev/schema/trueref-config.json",
|
||||
"title": "TrueRef Repository Configuration",
|
||||
"description": "Configuration file for controlling how a repository is indexed and presented by TrueRef. Place as trueref.json (or context7.json for backward compatibility) at the root of your repository.",
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"projectTitle": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 100,
|
||||
"description": "Override the display name for this library. When set, this replaces the repository name in search results and UI."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"minLength": 10,
|
||||
"maxLength": 500,
|
||||
"description": "A short description of the library used for search ranking and display. Should accurately describe the library's purpose."
|
||||
},
|
||||
"folders": {
|
||||
"type": "array",
|
||||
"maxItems": 50,
|
||||
"description": "Allowlist of folder path prefixes or regex strings to include in indexing. If empty or absent, all folders are included. Examples: [\"src/\", \"docs/\", \"^packages/core\"]",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"maxLength": 200,
|
||||
"description": "A path prefix or regex string. Paths are matched against the full relative file path within the repository."
|
||||
}
|
||||
},
|
||||
"excludeFolders": {
|
||||
"type": "array",
|
||||
"maxItems": 50,
|
||||
"description": "Folders to exclude from indexing. Applied after the 'folders' allowlist. Examples: [\"test/\", \"fixtures/\", \"__mocks__\"]",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"maxLength": 200,
|
||||
"description": "A path prefix or regex string for folders to exclude."
|
||||
}
|
||||
},
|
||||
"excludeFiles": {
|
||||
"type": "array",
|
||||
"maxItems": 100,
|
||||
"description": "Exact filenames to exclude (no path, no regex). Examples: [\"README.md\", \"CHANGELOG.md\", \"jest.config.ts\"]",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"maxLength": 200,
|
||||
"description": "An exact filename (not a path). Must not contain path separators."
|
||||
}
|
||||
},
|
||||
"rules": {
|
||||
"type": "array",
|
||||
"maxItems": 20,
|
||||
"description": "Best practices and rules to inject at the top of every query-docs response. These are shown to AI coding assistants to guide correct library usage.",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minLength": 5,
|
||||
"maxLength": 500,
|
||||
"description": "A single best-practice rule or guideline for using this library."
|
||||
}
|
||||
},
|
||||
"previousVersions": {
|
||||
"type": "array",
|
||||
"maxItems": 50,
|
||||
"description": "Previously released versions to make available for versioned documentation queries.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["tag", "title"],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"tag": {
|
||||
"type": "string",
|
||||
"pattern": "^v?\\d+\\.\\d+(\\.\\d+)?(-.*)?$",
|
||||
"description": "Git tag name for this version (e.g. \"v1.2.3\", \"2.0.0-beta.1\")."
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"description": "Human-readable version label (e.g. \"Version 1.2.3\", \"v2 Legacy\")."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"$id": "https://trueref.dev/schema/trueref-config.json",
|
||||
"title": "TrueRef Repository Configuration",
|
||||
"description": "Configuration file for controlling how a repository is indexed and presented by TrueRef. Place as trueref.json (or context7.json for backward compatibility) at the root of your repository.",
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"projectTitle": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 100,
|
||||
"description": "Override the display name for this library. When set, this replaces the repository name in search results and UI."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"minLength": 10,
|
||||
"maxLength": 500,
|
||||
"description": "A short description of the library used for search ranking and display. Should accurately describe the library's purpose."
|
||||
},
|
||||
"folders": {
|
||||
"type": "array",
|
||||
"maxItems": 50,
|
||||
"description": "Allowlist of folder path prefixes or regex strings to include in indexing. If empty or absent, all folders are included. Examples: [\"src/\", \"docs/\", \"^packages/core\"]",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"maxLength": 200,
|
||||
"description": "A path prefix or regex string. Paths are matched against the full relative file path within the repository."
|
||||
}
|
||||
},
|
||||
"excludeFolders": {
|
||||
"type": "array",
|
||||
"maxItems": 50,
|
||||
"description": "Folders to exclude from indexing. Applied after the 'folders' allowlist. Examples: [\"test/\", \"fixtures/\", \"__mocks__\"]",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"maxLength": 200,
|
||||
"description": "A path prefix or regex string for folders to exclude."
|
||||
}
|
||||
},
|
||||
"excludeFiles": {
|
||||
"type": "array",
|
||||
"maxItems": 100,
|
||||
"description": "Exact filenames to exclude (no path, no regex). Examples: [\"README.md\", \"CHANGELOG.md\", \"jest.config.ts\"]",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"maxLength": 200,
|
||||
"description": "An exact filename (not a path). Must not contain path separators."
|
||||
}
|
||||
},
|
||||
"rules": {
|
||||
"type": "array",
|
||||
"maxItems": 20,
|
||||
"description": "Best practices and rules to inject at the top of every query-docs response. These are shown to AI coding assistants to guide correct library usage.",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"minLength": 5,
|
||||
"maxLength": 500,
|
||||
"description": "A single best-practice rule or guideline for using this library."
|
||||
}
|
||||
},
|
||||
"previousVersions": {
|
||||
"type": "array",
|
||||
"maxItems": 50,
|
||||
"description": "Previously released versions to make available for versioned documentation queries.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"required": ["tag", "title"],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"tag": {
|
||||
"type": "string",
|
||||
"pattern": "^v?\\d+\\.\\d+(\\.\\d+)?(-.*)?$",
|
||||
"description": "Git tag name for this version (e.g. \"v1.2.3\", \"2.0.0-beta.1\")."
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"description": "Human-readable version label (e.g. \"Version 1.2.3\", \"v2 Legacy\")."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,27 +74,46 @@ export const MAX_FILE_SIZE_BYTES = 500_000;
|
||||
*/
|
||||
export const IGNORED_DIR_NAMES = new Set([
|
||||
// ── Version control ────────────────────────────────────────────────────
|
||||
'.git', '.hg', '.svn',
|
||||
'.git',
|
||||
'.hg',
|
||||
'.svn',
|
||||
|
||||
// ── JavaScript / TypeScript ─────────────────────────────────────────────
|
||||
'node_modules',
|
||||
'.npm', '.yarn', '.pnpm-store', '.pnp',
|
||||
'.npm',
|
||||
'.yarn',
|
||||
'.pnpm-store',
|
||||
'.pnp',
|
||||
// Build outputs and framework caches
|
||||
'dist', 'build', 'out',
|
||||
'.next', '.nuxt', '.svelte-kit', '.vite',
|
||||
'.turbo', '.parcel-cache', '.webpack',
|
||||
'dist',
|
||||
'build',
|
||||
'out',
|
||||
'.next',
|
||||
'.nuxt',
|
||||
'.svelte-kit',
|
||||
'.vite',
|
||||
'.turbo',
|
||||
'.parcel-cache',
|
||||
'.webpack',
|
||||
|
||||
// ── Python ──────────────────────────────────────────────────────────────
|
||||
'__pycache__',
|
||||
'.venv', 'venv', 'env',
|
||||
'site-packages', '.eggs',
|
||||
'.pytest_cache', '.mypy_cache', '.ruff_cache',
|
||||
'.tox', '.nox',
|
||||
'.venv',
|
||||
'venv',
|
||||
'env',
|
||||
'site-packages',
|
||||
'.eggs',
|
||||
'.pytest_cache',
|
||||
'.mypy_cache',
|
||||
'.ruff_cache',
|
||||
'.tox',
|
||||
'.nox',
|
||||
'htmlcov',
|
||||
|
||||
// ── Java / Kotlin / Scala ───────────────────────────────────────────────
|
||||
'target', // Maven + sbt
|
||||
'.gradle', '.mvn',
|
||||
'target', // Maven + sbt
|
||||
'.gradle',
|
||||
'.mvn',
|
||||
|
||||
// ── Ruby ────────────────────────────────────────────────────────────────
|
||||
'.bundle',
|
||||
@@ -103,19 +122,24 @@ export const IGNORED_DIR_NAMES = new Set([
|
||||
// 'vendor' below covers PHP Composer
|
||||
|
||||
// ── .NET ────────────────────────────────────────────────────────────────
|
||||
'bin', 'obj', 'packages',
|
||||
'bin',
|
||||
'obj',
|
||||
'packages',
|
||||
|
||||
// ── Haskell ─────────────────────────────────────────────────────────────
|
||||
'.stack-work', 'dist-newstyle',
|
||||
'.stack-work',
|
||||
'dist-newstyle',
|
||||
|
||||
// ── Dart / Flutter ──────────────────────────────────────────────────────
|
||||
'.dart_tool',
|
||||
|
||||
// ── Swift / iOS ─────────────────────────────────────────────────────────
|
||||
'Pods', 'DerivedData',
|
||||
'Pods',
|
||||
'DerivedData',
|
||||
|
||||
// ── Elixir / Erlang ─────────────────────────────────────────────────────
|
||||
'_build', 'deps',
|
||||
'_build',
|
||||
'deps',
|
||||
|
||||
// ── Clojure ─────────────────────────────────────────────────────────────
|
||||
'.cpcache',
|
||||
@@ -125,16 +149,25 @@ export const IGNORED_DIR_NAMES = new Set([
|
||||
'vendor',
|
||||
|
||||
// ── Generic caches / temp ───────────────────────────────────────────────
|
||||
'.cache', '.tmp', 'tmp', 'temp', '.temp', '.sass-cache',
|
||||
'.cache',
|
||||
'.tmp',
|
||||
'tmp',
|
||||
'temp',
|
||||
'.temp',
|
||||
'.sass-cache',
|
||||
|
||||
// ── Test coverage ───────────────────────────────────────────────────────
|
||||
'coverage', '.nyc_output',
|
||||
'coverage',
|
||||
'.nyc_output',
|
||||
|
||||
// ── IDE / editor artefacts ──────────────────────────────────────────────
|
||||
'.idea', '.vs',
|
||||
'.idea',
|
||||
'.vs',
|
||||
|
||||
// ── Generated code ──────────────────────────────────────────────────────
|
||||
'generated', '__generated__', '_generated',
|
||||
'generated',
|
||||
'__generated__',
|
||||
'_generated',
|
||||
|
||||
// ── Logs ────────────────────────────────────────────────────────────────
|
||||
'logs'
|
||||
@@ -264,11 +297,7 @@ export function detectLanguage(filePath: string): string {
|
||||
* 7. Must not be under a config.excludeFolders path / regex.
|
||||
* 8. Must be under a config.folders allowlist path / regex (if specified).
|
||||
*/
|
||||
export function shouldIndexFile(
|
||||
filePath: string,
|
||||
fileSize: number,
|
||||
config?: RepoConfig
|
||||
): boolean {
|
||||
export function shouldIndexFile(filePath: string, fileSize: number, config?: RepoConfig): boolean {
|
||||
const ext = extname(filePath).toLowerCase();
|
||||
const base = basename(filePath);
|
||||
|
||||
|
||||
@@ -35,10 +35,9 @@ export async function listGitHubTags(
|
||||
};
|
||||
if (token) headers['Authorization'] = `Bearer ${token}`;
|
||||
|
||||
const response = await fetch(
|
||||
`https://api.github.com/repos/${owner}/${repo}/tags?per_page=100`,
|
||||
{ headers }
|
||||
);
|
||||
const response = await fetch(`https://api.github.com/repos/${owner}/${repo}/tags?per_page=100`, {
|
||||
headers
|
||||
});
|
||||
|
||||
if (!response.ok) throw new GitHubApiError(response.status);
|
||||
return response.json() as Promise<GitHubTag[]>;
|
||||
|
||||
@@ -8,13 +8,14 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
||||
|
||||
import { crawl } from './github.crawler.js';
|
||||
import { shouldIndexFile, detectLanguage, INDEXABLE_EXTENSIONS, MAX_FILE_SIZE_BYTES } from './file-filter.js';
|
||||
import { GitHubRateLimiter, Semaphore, withRetry } from './rate-limiter.js';
|
||||
import {
|
||||
AuthenticationError,
|
||||
PermissionError,
|
||||
RepositoryNotFoundError
|
||||
} from './types.js';
|
||||
shouldIndexFile,
|
||||
detectLanguage,
|
||||
INDEXABLE_EXTENSIONS,
|
||||
MAX_FILE_SIZE_BYTES
|
||||
} from './file-filter.js';
|
||||
import { GitHubRateLimiter, Semaphore, withRetry } from './rate-limiter.js';
|
||||
import { AuthenticationError, PermissionError, RepositoryNotFoundError } from './types.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock fetch helpers
|
||||
@@ -112,7 +113,9 @@ describe('shouldIndexFile()', () => {
|
||||
});
|
||||
|
||||
it('respects config.excludeFolders prefix', () => {
|
||||
expect(shouldIndexFile('internal/config.ts', 100, { excludeFolders: ['internal/'] })).toBe(false);
|
||||
expect(shouldIndexFile('internal/config.ts', 100, { excludeFolders: ['internal/'] })).toBe(
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
it('allows files outside of config.excludeFolders', () => {
|
||||
@@ -169,8 +172,10 @@ describe('detectLanguage()', () => {
|
||||
it('detects markdown', () => expect(detectLanguage('README.md')).toBe('markdown'));
|
||||
it('detects svelte', () => expect(detectLanguage('App.svelte')).toBe('svelte'));
|
||||
it('detects yaml', () => expect(detectLanguage('config.yaml')).toBe('yaml'));
|
||||
it('returns empty string for unknown extension', () => expect(detectLanguage('file.xyz')).toBe(''));
|
||||
it('is case-insensitive for extensions', () => expect(detectLanguage('FILE.TS')).toBe('typescript'));
|
||||
it('returns empty string for unknown extension', () =>
|
||||
expect(detectLanguage('file.xyz')).toBe(''));
|
||||
it('is case-insensitive for extensions', () =>
|
||||
expect(detectLanguage('FILE.TS')).toBe('typescript'));
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -267,9 +272,9 @@ describe('withRetry()', () => {
|
||||
});
|
||||
|
||||
it('throws after exhausting all attempts', async () => {
|
||||
await expect(
|
||||
withRetry(() => Promise.reject(new Error('always fails')), 3)
|
||||
).rejects.toThrow('always fails');
|
||||
await expect(withRetry(() => Promise.reject(new Error('always fails')), 3)).rejects.toThrow(
|
||||
'always fails'
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -425,14 +430,15 @@ describe('crawl()', () => {
|
||||
});
|
||||
|
||||
it('throws AuthenticationError on 401', async () => {
|
||||
stubFetch(() =>
|
||||
new Response('Unauthorized', {
|
||||
status: 401,
|
||||
headers: {
|
||||
'X-RateLimit-Remaining': '0',
|
||||
'X-RateLimit-Reset': String(Math.floor(Date.now() / 1000) + 3600)
|
||||
}
|
||||
})
|
||||
stubFetch(
|
||||
() =>
|
||||
new Response('Unauthorized', {
|
||||
status: 401,
|
||||
headers: {
|
||||
'X-RateLimit-Remaining': '0',
|
||||
'X-RateLimit-Reset': String(Math.floor(Date.now() / 1000) + 3600)
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
await expect(crawl({ owner: 'owner', repo: 'repo', token: 'bad-token' })).rejects.toThrow(
|
||||
@@ -441,14 +447,15 @@ describe('crawl()', () => {
|
||||
});
|
||||
|
||||
it('throws PermissionError on 403 without rate-limit exhaustion', async () => {
|
||||
stubFetch(() =>
|
||||
new Response('Forbidden', {
|
||||
status: 403,
|
||||
headers: {
|
||||
'X-RateLimit-Remaining': '100',
|
||||
'X-RateLimit-Reset': String(Math.floor(Date.now() / 1000) + 3600)
|
||||
}
|
||||
})
|
||||
stubFetch(
|
||||
() =>
|
||||
new Response('Forbidden', {
|
||||
status: 403,
|
||||
headers: {
|
||||
'X-RateLimit-Remaining': '100',
|
||||
'X-RateLimit-Reset': String(Math.floor(Date.now() / 1000) + 3600)
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
await expect(crawl({ owner: 'owner', repo: 'repo' })).rejects.toThrow(PermissionError);
|
||||
|
||||
@@ -106,9 +106,7 @@ async function throwForStatus(response: Response, rateLimiter: GitHubRateLimiter
|
||||
);
|
||||
}
|
||||
case 404:
|
||||
throw new RepositoryNotFoundError(
|
||||
`Repository not found or not accessible: ${response.url}`
|
||||
);
|
||||
throw new RepositoryNotFoundError(`Repository not found or not accessible: ${response.url}`);
|
||||
default: {
|
||||
const body = await response.text().catch(() => '');
|
||||
throw new Error(`GitHub API error ${response.status}: ${body}`);
|
||||
@@ -129,18 +127,22 @@ async function fetchRepoInfo(
|
||||
token: string | undefined,
|
||||
rateLimiter: GitHubRateLimiter
|
||||
): Promise<GitHubRepoResponse> {
|
||||
return withRetry(async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
return withRetry(
|
||||
async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
|
||||
const response = await fetch(`${GITHUB_API}/repos/${owner}/${repo}`, {
|
||||
headers: buildHeaders(token)
|
||||
});
|
||||
const response = await fetch(`${GITHUB_API}/repos/${owner}/${repo}`, {
|
||||
headers: buildHeaders(token)
|
||||
});
|
||||
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
await throwForStatus(response, rateLimiter);
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
await throwForStatus(response, rateLimiter);
|
||||
|
||||
return (await response.json()) as GitHubRepoResponse;
|
||||
}, 3, isRetryable);
|
||||
return (await response.json()) as GitHubRepoResponse;
|
||||
},
|
||||
3,
|
||||
isRetryable
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -155,21 +157,25 @@ async function fetchTree(
|
||||
token: string | undefined,
|
||||
rateLimiter: GitHubRateLimiter
|
||||
): Promise<GitHubTreeResponse | null> {
|
||||
return withRetry(async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
return withRetry(
|
||||
async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
|
||||
const url = `${GITHUB_API}/repos/${owner}/${repo}/git/trees/${ref}?recursive=1`;
|
||||
const response = await fetch(url, { headers: buildHeaders(token) });
|
||||
const url = `${GITHUB_API}/repos/${owner}/${repo}/git/trees/${ref}?recursive=1`;
|
||||
const response = await fetch(url, { headers: buildHeaders(token) });
|
||||
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
|
||||
// 422 means the tree is too large for a single recursive call.
|
||||
if (response.status === 422) return null;
|
||||
// 422 means the tree is too large for a single recursive call.
|
||||
if (response.status === 422) return null;
|
||||
|
||||
await throwForStatus(response, rateLimiter);
|
||||
await throwForStatus(response, rateLimiter);
|
||||
|
||||
return (await response.json()) as GitHubTreeResponse;
|
||||
}, 3, isRetryable);
|
||||
return (await response.json()) as GitHubTreeResponse;
|
||||
},
|
||||
3,
|
||||
isRetryable
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -184,17 +190,21 @@ async function fetchSubTree(
|
||||
token: string | undefined,
|
||||
rateLimiter: GitHubRateLimiter
|
||||
): Promise<GitHubTreeResponse> {
|
||||
return withRetry(async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
return withRetry(
|
||||
async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
|
||||
const url = `${GITHUB_API}/repos/${owner}/${repo}/git/trees/${treeSha}`;
|
||||
const response = await fetch(url, { headers: buildHeaders(token) });
|
||||
const url = `${GITHUB_API}/repos/${owner}/${repo}/git/trees/${treeSha}`;
|
||||
const response = await fetch(url, { headers: buildHeaders(token) });
|
||||
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
await throwForStatus(response, rateLimiter);
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
await throwForStatus(response, rateLimiter);
|
||||
|
||||
return (await response.json()) as GitHubTreeResponse;
|
||||
}, 3, isRetryable);
|
||||
return (await response.json()) as GitHubTreeResponse;
|
||||
},
|
||||
3,
|
||||
isRetryable
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -208,21 +218,25 @@ async function fetchCommitSha(
|
||||
token: string | undefined,
|
||||
rateLimiter: GitHubRateLimiter
|
||||
): Promise<string> {
|
||||
return withRetry(async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
return withRetry(
|
||||
async () => {
|
||||
await rateLimiter.waitIfNeeded();
|
||||
|
||||
const url = `${GITHUB_API}/repos/${owner}/${repo}/commits/${ref}`;
|
||||
const response = await fetch(url, {
|
||||
headers: { ...buildHeaders(token), Accept: 'application/vnd.github.sha' }
|
||||
});
|
||||
const url = `${GITHUB_API}/repos/${owner}/${repo}/commits/${ref}`;
|
||||
const response = await fetch(url, {
|
||||
headers: { ...buildHeaders(token), Accept: 'application/vnd.github.sha' }
|
||||
});
|
||||
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
await throwForStatus(response, rateLimiter);
|
||||
rateLimiter.updateFromHeaders(response.headers);
|
||||
await throwForStatus(response, rateLimiter);
|
||||
|
||||
// When Accept is 'application/vnd.github.sha', the response body is the
|
||||
// bare SHA string.
|
||||
return (await response.text()).trim();
|
||||
}, 3, isRetryable);
|
||||
// When Accept is 'application/vnd.github.sha', the response body is the
|
||||
// bare SHA string.
|
||||
return (await response.text()).trim();
|
||||
},
|
||||
3,
|
||||
isRetryable
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -347,14 +361,7 @@ async function fetchRepoConfig(
|
||||
|
||||
const content =
|
||||
(await downloadRawFile(owner, repo, ref, configItem.path, token)) ??
|
||||
(await downloadViaContentsApi(
|
||||
owner,
|
||||
repo,
|
||||
ref,
|
||||
configItem.path,
|
||||
token,
|
||||
rateLimiter
|
||||
));
|
||||
(await downloadViaContentsApi(owner, repo, ref, configItem.path, token, rateLimiter));
|
||||
|
||||
if (!content) return undefined;
|
||||
|
||||
@@ -435,14 +442,7 @@ export async function crawl(options: CrawlOptions): Promise<CrawlResult> {
|
||||
// Prefer raw download (cheaper on rate limit); fall back to API.
|
||||
const content =
|
||||
(await downloadRawFile(owner, repo, ref!, item.path, token)) ??
|
||||
(await downloadViaContentsApi(
|
||||
owner,
|
||||
repo,
|
||||
ref!,
|
||||
item.path,
|
||||
token,
|
||||
rateLimiter
|
||||
));
|
||||
(await downloadViaContentsApi(owner, repo, ref!, item.path, token, rateLimiter));
|
||||
|
||||
if (content === null) {
|
||||
console.warn(`[GitHubCrawler] Could not download: ${item.path} — skipping.`);
|
||||
|
||||
@@ -52,7 +52,9 @@ async function cleanupTempRepo(root: string): Promise<void> {
|
||||
let root: string = '';
|
||||
const crawler = new LocalCrawler();
|
||||
|
||||
async function crawlRoot(opts: Partial<LocalCrawlOptions> = {}): Promise<ReturnType<LocalCrawler['crawl']>> {
|
||||
async function crawlRoot(
|
||||
opts: Partial<LocalCrawlOptions> = {}
|
||||
): Promise<ReturnType<LocalCrawler['crawl']>> {
|
||||
return crawler.crawl({ rootPath: root, ...opts });
|
||||
}
|
||||
|
||||
|
||||
@@ -141,7 +141,12 @@ export class LocalCrawler {
|
||||
});
|
||||
|
||||
// Crawl the worktree and stamp the result with the git-resolved metadata.
|
||||
const result = await this.crawlDirectory(worktreePath, options.config, options.onProgress, ref);
|
||||
const result = await this.crawlDirectory(
|
||||
worktreePath,
|
||||
options.config,
|
||||
options.onProgress,
|
||||
ref
|
||||
);
|
||||
|
||||
return { ...result, commitSha };
|
||||
} finally {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,27 +1,27 @@
|
||||
{
|
||||
"version": "7",
|
||||
"dialect": "sqlite",
|
||||
"entries": [
|
||||
{
|
||||
"idx": 0,
|
||||
"version": "6",
|
||||
"when": 1774196053634,
|
||||
"tag": "0000_large_master_chief",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 1,
|
||||
"version": "6",
|
||||
"when": 1774448049161,
|
||||
"tag": "0001_quick_nighthawk",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 2,
|
||||
"version": "6",
|
||||
"when": 1774461897742,
|
||||
"tag": "0002_silky_stellaris",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
"version": "7",
|
||||
"dialect": "sqlite",
|
||||
"entries": [
|
||||
{
|
||||
"idx": 0,
|
||||
"version": "6",
|
||||
"when": 1774196053634,
|
||||
"tag": "0000_large_master_chief",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 1,
|
||||
"version": "6",
|
||||
"when": 1774448049161,
|
||||
"tag": "0001_quick_nighthawk",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 2,
|
||||
"version": "6",
|
||||
"when": 1774461897742,
|
||||
"tag": "0002_silky_stellaris",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -381,7 +381,12 @@ describe('EmbeddingService', () => {
|
||||
.all(snippetId, 'local-default');
|
||||
expect(rows).toHaveLength(1);
|
||||
|
||||
const row = rows[0] as { model: string; dimensions: number; embedding: Buffer; profile_id: string };
|
||||
const row = rows[0] as {
|
||||
model: string;
|
||||
dimensions: number;
|
||||
embedding: Buffer;
|
||||
profile_id: string;
|
||||
};
|
||||
expect(row.model).toBe('test-model');
|
||||
expect(row.dimensions).toBe(4);
|
||||
expect(row.profile_id).toBe('local-default');
|
||||
@@ -494,9 +499,7 @@ describe('createProviderFromConfig', () => {
|
||||
});
|
||||
|
||||
it('throws when openai provider is selected without config', () => {
|
||||
expect(() =>
|
||||
createProviderFromConfig({ provider: 'openai' } as EmbeddingConfig)
|
||||
).toThrow();
|
||||
expect(() => createProviderFromConfig({ provider: 'openai' } as EmbeddingConfig)).toThrow();
|
||||
});
|
||||
|
||||
it('defaultEmbeddingConfig returns provider=none', () => {
|
||||
|
||||
@@ -41,18 +41,16 @@ export class EmbeddingService {
|
||||
|
||||
const placeholders = snippetIds.map(() => '?').join(',');
|
||||
const snippets = this.db
|
||||
.prepare<string[], SnippetRow>(
|
||||
`SELECT id, title, breadcrumb, content FROM snippets WHERE id IN (${placeholders})`
|
||||
)
|
||||
.prepare<
|
||||
string[],
|
||||
SnippetRow
|
||||
>(`SELECT id, title, breadcrumb, content FROM snippets WHERE id IN (${placeholders})`)
|
||||
.all(...snippetIds);
|
||||
|
||||
if (snippets.length === 0) return;
|
||||
|
||||
const texts = snippets.map((s) =>
|
||||
[s.title, s.breadcrumb, s.content]
|
||||
.filter(Boolean)
|
||||
.join('\n')
|
||||
.slice(0, TEXT_MAX_CHARS)
|
||||
[s.title, s.breadcrumb, s.content].filter(Boolean).join('\n').slice(0, TEXT_MAX_CHARS)
|
||||
);
|
||||
|
||||
const insert = this.db.prepare<[string, string, string, number, Buffer]>(`
|
||||
@@ -94,9 +92,10 @@ export class EmbeddingService {
|
||||
*/
|
||||
getEmbedding(snippetId: string, profileId: string = 'local-default'): Float32Array | null {
|
||||
const row = this.db
|
||||
.prepare<[string, string], { embedding: Buffer; dimensions: number }>(
|
||||
`SELECT embedding, dimensions FROM snippet_embeddings WHERE snippet_id = ? AND profile_id = ?`
|
||||
)
|
||||
.prepare<
|
||||
[string, string],
|
||||
{ embedding: Buffer; dimensions: number }
|
||||
>(`SELECT embedding, dimensions FROM snippet_embeddings WHERE snippet_id = ? AND profile_id = ?`)
|
||||
.get(snippetId, profileId);
|
||||
|
||||
if (!row) return null;
|
||||
|
||||
@@ -12,7 +12,11 @@ import { OpenAIEmbeddingProvider } from './openai.provider.js';
|
||||
import { LocalEmbeddingProvider } from './local.provider.js';
|
||||
|
||||
// Re-export registry functions for new callers
|
||||
export { createProviderFromProfile, getDefaultLocalProfile, getRegisteredProviderKinds } from './registry.js';
|
||||
export {
|
||||
createProviderFromProfile,
|
||||
getDefaultLocalProfile,
|
||||
getRegisteredProviderKinds
|
||||
} from './registry.js';
|
||||
|
||||
export interface EmbeddingConfig {
|
||||
provider: 'openai' | 'local' | 'none';
|
||||
|
||||
@@ -43,7 +43,12 @@ export class ContextResponseMapper {
|
||||
lastUpdateDate: repository.lastIndexedAt
|
||||
? repository.lastIndexedAt.toISOString()
|
||||
: null,
|
||||
state: repository.state === 'indexed' ? 'finalized' : repository.state === 'error' ? 'error' : 'initial',
|
||||
state:
|
||||
repository.state === 'indexed'
|
||||
? 'finalized'
|
||||
: repository.state === 'error'
|
||||
? 'error'
|
||||
: 'initial',
|
||||
totalTokens: repository.totalTokens ?? null,
|
||||
totalSnippets: repository.totalSnippets ?? null,
|
||||
stars: repository.stars ?? null,
|
||||
@@ -64,14 +69,16 @@ export class ContextResponseMapper {
|
||||
const mapped: SnippetJsonDto[] = snippets.map(({ snippet }) => {
|
||||
const origin = metadata?.repository
|
||||
? new SnippetOriginJsonDto({
|
||||
repositoryId: snippet.repositoryId,
|
||||
repositoryTitle: metadata.repository.title,
|
||||
source: metadata.repository.source,
|
||||
sourceUrl: metadata.repository.sourceUrl,
|
||||
version: snippet.versionId ? metadata.snippetVersions[snippet.versionId] ?? null : null,
|
||||
versionId: snippet.versionId,
|
||||
isLocal: metadata.localSource
|
||||
})
|
||||
repositoryId: snippet.repositoryId,
|
||||
repositoryTitle: metadata.repository.title,
|
||||
source: metadata.repository.source,
|
||||
sourceUrl: metadata.repository.sourceUrl,
|
||||
version: snippet.versionId
|
||||
? (metadata.snippetVersions[snippet.versionId] ?? null)
|
||||
: null,
|
||||
versionId: snippet.versionId,
|
||||
isLocal: metadata.localSource
|
||||
})
|
||||
: null;
|
||||
|
||||
if (snippet.type === 'code') {
|
||||
@@ -108,22 +115,22 @@ export class ContextResponseMapper {
|
||||
localSource: metadata?.localSource ?? false,
|
||||
repository: metadata?.repository
|
||||
? new ContextRepositoryJsonDto({
|
||||
id: metadata.repository.id,
|
||||
title: metadata.repository.title,
|
||||
source: metadata.repository.source,
|
||||
sourceUrl: metadata.repository.sourceUrl,
|
||||
branch: metadata.repository.branch,
|
||||
isLocal: metadata.localSource
|
||||
})
|
||||
id: metadata.repository.id,
|
||||
title: metadata.repository.title,
|
||||
source: metadata.repository.source,
|
||||
sourceUrl: metadata.repository.sourceUrl,
|
||||
branch: metadata.repository.branch,
|
||||
isLocal: metadata.localSource
|
||||
})
|
||||
: null,
|
||||
version: metadata?.version
|
||||
? new ContextVersionJsonDto({
|
||||
requested: metadata.version.requested,
|
||||
resolved: metadata.version.resolved,
|
||||
id: metadata.version.id
|
||||
})
|
||||
requested: metadata.version.requested,
|
||||
resolved: metadata.version.resolved,
|
||||
id: metadata.version.id
|
||||
})
|
||||
: null,
|
||||
resultCount: metadata?.resultCount ?? snippets.length
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,8 +12,7 @@ export class IndexingJobMapper {
|
||||
processedFiles: entity.processed_files,
|
||||
error: entity.error,
|
||||
startedAt: entity.started_at != null ? new Date(entity.started_at * 1000) : null,
|
||||
completedAt:
|
||||
entity.completed_at != null ? new Date(entity.completed_at * 1000) : null,
|
||||
completedAt: entity.completed_at != null ? new Date(entity.completed_at * 1000) : null,
|
||||
createdAt: new Date(entity.created_at * 1000)
|
||||
});
|
||||
}
|
||||
@@ -33,4 +32,4 @@ export class IndexingJobMapper {
|
||||
createdAt: domain.createdAt
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,4 +32,4 @@ export class RepositoryVersionMapper {
|
||||
createdAt: domain.createdAt
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,4 +64,4 @@ export class RepositoryMapper {
|
||||
updatedAt: domain.updatedAt
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
import { LibrarySearchResult, SnippetRepositoryRef, SnippetSearchResult } from '$lib/server/models/search-result.js';
|
||||
import {
|
||||
LibrarySearchResult,
|
||||
SnippetRepositoryRef,
|
||||
SnippetSearchResult
|
||||
} from '$lib/server/models/search-result.js';
|
||||
import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
|
||||
import { RepositoryVersionMapper } from '$lib/server/mappers/repository-version.mapper.js';
|
||||
import { SnippetMapper } from '$lib/server/mappers/snippet.mapper.js';
|
||||
@@ -26,10 +30,8 @@ export class SearchResultMapper {
|
||||
): LibrarySearchResult {
|
||||
return new LibrarySearchResult({
|
||||
repository: RepositoryMapper.fromEntity(repositoryEntity),
|
||||
versions: versionEntities.map((version) =>
|
||||
RepositoryVersionMapper.fromEntity(version)
|
||||
),
|
||||
versions: versionEntities.map((version) => RepositoryVersionMapper.fromEntity(version)),
|
||||
score
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,4 +16,4 @@ export class SnippetMapper {
|
||||
createdAt: new Date(entity.created_at * 1000)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ export class SnippetOriginJsonDto {
|
||||
this.versionId = props.versionId;
|
||||
this.isLocal = props.isLocal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class LibrarySearchJsonResultDto {
|
||||
id: string;
|
||||
@@ -183,4 +183,4 @@ export class ContextJsonResponseDto {
|
||||
this.version = props.version;
|
||||
this.resultCount = props.resultCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,4 +122,4 @@ export class IndexingJobDto {
|
||||
this.completedAt = props.completedAt;
|
||||
this.createdAt = props.createdAt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -104,4 +104,4 @@ export class RepositoryVersionDto {
|
||||
this.indexedAt = props.indexedAt;
|
||||
this.createdAt = props.createdAt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,4 +164,4 @@ export class RepositoryDto {
|
||||
this.createdAt = props.createdAt;
|
||||
this.updatedAt = props.updatedAt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,4 +51,4 @@ export class LibrarySearchResult {
|
||||
this.versions = props.versions;
|
||||
this.score = props.score;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,4 +80,4 @@ export class Snippet {
|
||||
this.tokenCount = props.tokenCount;
|
||||
this.createdAt = props.createdAt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -286,7 +286,8 @@ This is the second paragraph that also has enough content to be included here.
|
||||
});
|
||||
|
||||
it('skips paragraphs shorter than 20 characters', () => {
|
||||
const content = 'Short.\n\nThis is a much longer paragraph that definitely passes the minimum length filter.';
|
||||
const content =
|
||||
'Short.\n\nThis is a much longer paragraph that definitely passes the minimum length filter.';
|
||||
const snippets = parseCodeFile(content, 'notes.txt', 'text');
|
||||
expect(snippets.length).toBe(1);
|
||||
});
|
||||
@@ -331,7 +332,10 @@ export function realFunction(): string {
|
||||
|
||||
describe('parseCodeFile — token count', () => {
|
||||
it('all snippets have tokenCount within MAX_TOKENS', () => {
|
||||
const lines = Array.from({ length: 300 }, (_, i) => `// comment line number ${i} here\nconst x${i} = ${i};`);
|
||||
const lines = Array.from(
|
||||
{ length: 300 },
|
||||
(_, i) => `// comment line number ${i} here\nconst x${i} = ${i};`
|
||||
);
|
||||
const content = lines.join('\n');
|
||||
|
||||
const snippets = parseCodeFile(content, 'large.ts', 'typescript');
|
||||
|
||||
@@ -26,15 +26,19 @@ import {
|
||||
* The regex is tested line-by-line (multiline flag not needed).
|
||||
*/
|
||||
export const BOUNDARY_PATTERNS: Record<string, RegExp> = {
|
||||
typescript: /^(export\s+)?(declare\s+)?(async\s+)?(function|class|interface|type|enum|const|let|var)\s+\w+/,
|
||||
typescript:
|
||||
/^(export\s+)?(declare\s+)?(async\s+)?(function|class|interface|type|enum|const|let|var)\s+\w+/,
|
||||
javascript: /^(export\s+)?(async\s+)?(function|class|const|let|var)\s+\w+/,
|
||||
python: /^(async\s+)?(def|class)\s+\w+/,
|
||||
go: /^(func|type|var|const)\s+\w+/,
|
||||
rust: /^(pub(\s*\(crate\))?\s+)?(async\s+)?(fn|impl|struct|enum|trait|type|const|static)\s+\w+/,
|
||||
java: /^(\s*(public|private|protected|static|final|abstract|synchronized)\s+)+[\w<>\[\]]+\s+\w+\s*[({]/,
|
||||
csharp: /^(\s*(public|private|protected|internal|static|override|virtual|abstract|sealed)\s+)+[\w<>\[\]]+\s+\w+\s*[({]/,
|
||||
kotlin: /^(\s*(public|private|protected|internal|override|suspend|inline|open|abstract|sealed)\s+)*(fun|class|object|interface|data class|sealed class|enum class)\s+\w+/,
|
||||
swift: /^(\s*(public|private|internal|fileprivate|open|override|static|final|class)\s+)*(func|class|struct|enum|protocol|extension)\s+\w+/,
|
||||
csharp:
|
||||
/^(\s*(public|private|protected|internal|static|override|virtual|abstract|sealed)\s+)+[\w<>\[\]]+\s+\w+\s*[({]/,
|
||||
kotlin:
|
||||
/^(\s*(public|private|protected|internal|override|suspend|inline|open|abstract|sealed)\s+)*(fun|class|object|interface|data class|sealed class|enum class)\s+\w+/,
|
||||
swift:
|
||||
/^(\s*(public|private|internal|fileprivate|open|override|static|final|class)\s+)*(func|class|struct|enum|protocol|extension)\s+\w+/,
|
||||
ruby: /^(def|class|module)\s+\w+/
|
||||
};
|
||||
|
||||
@@ -42,7 +46,10 @@ export const BOUNDARY_PATTERNS: Record<string, RegExp> = {
|
||||
// Internal types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type RawSnippet = Omit<NewSnippet, 'id' | 'repositoryId' | 'documentId' | 'versionId' | 'createdAt'>;
|
||||
type RawSnippet = Omit<
|
||||
NewSnippet,
|
||||
'id' | 'repositoryId' | 'documentId' | 'versionId' | 'createdAt'
|
||||
>;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
@@ -161,7 +168,10 @@ function parseHtmlLikeFile(content: string, filePath: string, language: string):
|
||||
|
||||
while ((match = scriptPattern.exec(content)) !== null) {
|
||||
// Strip the outer tags, keep just the code
|
||||
const inner = match[0].replace(/^<script[^>]*>/, '').replace(/<\/script>$/, '').trim();
|
||||
const inner = match[0]
|
||||
.replace(/^<script[^>]*>/, '')
|
||||
.replace(/<\/script>$/, '')
|
||||
.trim();
|
||||
if (inner.length >= MIN_CONTENT_LENGTH) {
|
||||
scriptBlocks.push(inner);
|
||||
}
|
||||
|
||||
@@ -48,6 +48,13 @@ export function parseFile(file: CrawledFile, options: ParseOptions): NewSnippet[
|
||||
|
||||
// Re-export helpers for consumers that need them individually
|
||||
export { detectLanguage } from './language.js';
|
||||
export { estimateTokens, chunkText, chunkLines, MAX_TOKENS, OVERLAP_TOKENS, MIN_CONTENT_LENGTH } from './chunker.js';
|
||||
export {
|
||||
estimateTokens,
|
||||
chunkText,
|
||||
chunkLines,
|
||||
MAX_TOKENS,
|
||||
OVERLAP_TOKENS,
|
||||
MIN_CONTENT_LENGTH
|
||||
} from './chunker.js';
|
||||
export { parseMarkdown } from './markdown.parser.js';
|
||||
export { parseCodeFile, BOUNDARY_PATTERNS } from './code.parser.js';
|
||||
|
||||
@@ -99,7 +99,10 @@ describe('parseMarkdown — section splitting', () => {
|
||||
|
||||
describe('parseMarkdown — code block extraction', () => {
|
||||
it('extracts a fenced code block as a code snippet', () => {
|
||||
const codeBlock = fence('typescript', 'function hello(name: string): string {\n return `Hello, ${name}!`;\n}');
|
||||
const codeBlock = fence(
|
||||
'typescript',
|
||||
'function hello(name: string): string {\n return `Hello, ${name}!`;\n}'
|
||||
);
|
||||
const source = [
|
||||
'# Example',
|
||||
'',
|
||||
@@ -232,7 +235,10 @@ describe('parseMarkdown — large content chunking', () => {
|
||||
describe('parseMarkdown — real-world sample', () => {
|
||||
it('correctly parses a realistic README excerpt', () => {
|
||||
const bashInstall = fence('bash', 'npm install my-library');
|
||||
const tsUsage = fence('typescript', "import { doTheThing } from 'my-library';\n\ndoTheThing({ verbose: true });");
|
||||
const tsUsage = fence(
|
||||
'typescript',
|
||||
"import { doTheThing } from 'my-library';\n\ndoTheThing({ verbose: true });"
|
||||
);
|
||||
|
||||
const source = [
|
||||
'# My Library',
|
||||
|
||||
@@ -7,7 +7,13 @@
|
||||
|
||||
import { basename } from 'node:path';
|
||||
import type { NewSnippet } from '$lib/server/db/schema.js';
|
||||
import { estimateTokens, chunkText, MAX_TOKENS, OVERLAP_TOKENS, MIN_CONTENT_LENGTH } from './chunker.js';
|
||||
import {
|
||||
estimateTokens,
|
||||
chunkText,
|
||||
MAX_TOKENS,
|
||||
OVERLAP_TOKENS,
|
||||
MIN_CONTENT_LENGTH
|
||||
} from './chunker.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal types
|
||||
@@ -121,7 +127,10 @@ function splitIntoSections(source: string): MarkdownSection[] {
|
||||
// Public parser
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type RawSnippet = Omit<NewSnippet, 'id' | 'repositoryId' | 'documentId' | 'versionId' | 'createdAt'>;
|
||||
type RawSnippet = Omit<
|
||||
NewSnippet,
|
||||
'id' | 'repositoryId' | 'documentId' | 'versionId' | 'createdAt'
|
||||
>;
|
||||
|
||||
/**
|
||||
* Parse a Markdown/MDX file into raw snippets (before IDs and DB fields are
|
||||
|
||||
@@ -86,16 +86,16 @@ describe('computeDiff', () => {
|
||||
|
||||
it('handles a mixed scenario: added, modified, deleted, and unchanged', () => {
|
||||
const crawledFiles = [
|
||||
makeCrawledFile('unchanged.md', 'sha-same'), // unchanged
|
||||
makeCrawledFile('modified.md', 'sha-new'), // modified (different sha)
|
||||
makeCrawledFile('added.md', 'sha-added') // added (not in DB)
|
||||
makeCrawledFile('unchanged.md', 'sha-same'), // unchanged
|
||||
makeCrawledFile('modified.md', 'sha-new'), // modified (different sha)
|
||||
makeCrawledFile('added.md', 'sha-added') // added (not in DB)
|
||||
// 'deleted.md' is absent from crawl → deleted
|
||||
];
|
||||
|
||||
const existingDocs = [
|
||||
makeDocument('unchanged.md', 'sha-same'), // unchanged
|
||||
makeDocument('modified.md', 'sha-old'), // modified
|
||||
makeDocument('deleted.md', 'sha-deleted') // deleted
|
||||
makeDocument('unchanged.md', 'sha-same'), // unchanged
|
||||
makeDocument('modified.md', 'sha-old'), // modified
|
||||
makeDocument('deleted.md', 'sha-deleted') // deleted
|
||||
];
|
||||
|
||||
const diff = computeDiff(crawledFiles, existingDocs);
|
||||
@@ -114,9 +114,9 @@ describe('computeDiff', () => {
|
||||
];
|
||||
|
||||
const existingDocs = [
|
||||
makeDocument('a.md', 'sha-a'), // unchanged
|
||||
makeDocument('a.md', 'sha-a'), // unchanged
|
||||
makeDocument('b.md', 'sha-b-old'), // modified
|
||||
makeDocument('d.md', 'sha-d') // deleted
|
||||
makeDocument('d.md', 'sha-d') // deleted
|
||||
// 'c.md' is not in DB → added
|
||||
];
|
||||
|
||||
|
||||
@@ -22,10 +22,7 @@ function createTestDb(): Database.Database {
|
||||
client.pragma('foreign_keys = ON');
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
const migrationSql = readFileSync(
|
||||
join(migrationsFolder, '0000_large_master_chief.sql'),
|
||||
'utf-8'
|
||||
);
|
||||
const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
|
||||
|
||||
const statements = migrationSql
|
||||
.split('--> statement-breakpoint')
|
||||
@@ -45,10 +42,7 @@ function createTestDb(): Database.Database {
|
||||
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
|
||||
function insertRepo(
|
||||
db: Database.Database,
|
||||
overrides: Partial<Record<string, unknown>> = {}
|
||||
): void {
|
||||
function insertRepo(db: Database.Database, overrides: Partial<Record<string, unknown>> = {}): void {
|
||||
db.prepare(
|
||||
`INSERT INTO repositories
|
||||
(id, title, source, source_url, branch, state,
|
||||
@@ -62,7 +56,15 @@ function insertRepo(
|
||||
overrides.source_url ?? '/tmp/test-repo',
|
||||
overrides.branch ?? 'main',
|
||||
overrides.state ?? 'pending',
|
||||
0, 0, 0, 0, null, null, null, now, now
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
now,
|
||||
now
|
||||
);
|
||||
}
|
||||
|
||||
@@ -108,9 +110,10 @@ describe('recoverStaleJobs', () => {
|
||||
insertJob(db, { status: 'running' });
|
||||
recoverStaleJobs(db);
|
||||
|
||||
const row = db
|
||||
.prepare(`SELECT status, error FROM indexing_jobs LIMIT 1`)
|
||||
.get() as { status: string; error: string };
|
||||
const row = db.prepare(`SELECT status, error FROM indexing_jobs LIMIT 1`).get() as {
|
||||
status: string;
|
||||
error: string;
|
||||
};
|
||||
expect(row.status).toBe('failed');
|
||||
expect(row.error).toMatch(/restarted/i);
|
||||
});
|
||||
@@ -119,9 +122,9 @@ describe('recoverStaleJobs', () => {
|
||||
db.prepare(`UPDATE repositories SET state = 'indexing' WHERE id = '/test/repo'`).run();
|
||||
recoverStaleJobs(db);
|
||||
|
||||
const row = db
|
||||
.prepare(`SELECT state FROM repositories WHERE id = '/test/repo'`)
|
||||
.get() as { state: string };
|
||||
const row = db.prepare(`SELECT state FROM repositories WHERE id = '/test/repo'`).get() as {
|
||||
state: string;
|
||||
};
|
||||
expect(row.state).toBe('error');
|
||||
});
|
||||
|
||||
@@ -164,9 +167,7 @@ describe('JobQueue', () => {
|
||||
const job2 = queue.enqueue('/test/repo');
|
||||
expect(job1.id).toBe(job2.id);
|
||||
|
||||
const count = (
|
||||
db.prepare(`SELECT COUNT(*) as n FROM indexing_jobs`).get() as { n: number }
|
||||
).n;
|
||||
const count = (db.prepare(`SELECT COUNT(*) as n FROM indexing_jobs`).get() as { n: number }).n;
|
||||
expect(count).toBe(1);
|
||||
});
|
||||
|
||||
@@ -255,19 +256,19 @@ describe('IndexingPipeline', () => {
|
||||
})
|
||||
};
|
||||
|
||||
return new IndexingPipeline(
|
||||
db,
|
||||
mockGithubCrawl as never,
|
||||
mockLocalCrawler as never,
|
||||
null
|
||||
);
|
||||
return new IndexingPipeline(db, mockGithubCrawl as never, mockLocalCrawler as never, null);
|
||||
}
|
||||
|
||||
function makeJob(repositoryId = '/test/repo') {
|
||||
const jobId = insertJob(db, { repository_id: repositoryId, status: 'queued' });
|
||||
return db
|
||||
.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`)
|
||||
.get(jobId) as { id: string; repositoryId?: string; repository_id?: string; status: string; versionId?: string; version_id?: string };
|
||||
return db.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`).get(jobId) as {
|
||||
id: string;
|
||||
repositoryId?: string;
|
||||
repository_id?: string;
|
||||
status: string;
|
||||
versionId?: string;
|
||||
version_id?: string;
|
||||
};
|
||||
}
|
||||
|
||||
it('marks job as done when there are no files to index', async () => {
|
||||
@@ -289,9 +290,9 @@ describe('IndexingPipeline', () => {
|
||||
|
||||
await pipeline.run(job as never);
|
||||
|
||||
const updated = db
|
||||
.prepare(`SELECT status FROM indexing_jobs WHERE id = ?`)
|
||||
.get(job.id) as { status: string };
|
||||
const updated = db.prepare(`SELECT status FROM indexing_jobs WHERE id = ?`).get(job.id) as {
|
||||
status: string;
|
||||
};
|
||||
// The job should end in 'done' — the running→done transition is covered
|
||||
// by the pipeline's internal updateJob calls.
|
||||
expect(updated.status).toBe('done');
|
||||
@@ -363,27 +364,24 @@ describe('IndexingPipeline', () => {
|
||||
const job1 = makeJob();
|
||||
await pipeline.run(job1 as never);
|
||||
|
||||
const firstDocCount = (
|
||||
db.prepare(`SELECT COUNT(*) as n FROM documents`).get() as { n: number }
|
||||
).n;
|
||||
const firstSnippetIds = (
|
||||
db.prepare(`SELECT id FROM snippets`).all() as { id: string }[]
|
||||
).map((r) => r.id);
|
||||
const firstDocCount = (db.prepare(`SELECT COUNT(*) as n FROM documents`).get() as { n: number })
|
||||
.n;
|
||||
const firstSnippetIds = (db.prepare(`SELECT id FROM snippets`).all() as { id: string }[]).map(
|
||||
(r) => r.id
|
||||
);
|
||||
|
||||
// Second run with identical files.
|
||||
const job2Id = insertJob(db, { repository_id: '/test/repo', status: 'queued' });
|
||||
const job2 = db
|
||||
.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`)
|
||||
.get(job2Id) as never;
|
||||
const job2 = db.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`).get(job2Id) as never;
|
||||
|
||||
await pipeline.run(job2);
|
||||
|
||||
const secondDocCount = (
|
||||
db.prepare(`SELECT COUNT(*) as n FROM documents`).get() as { n: number }
|
||||
).n;
|
||||
const secondSnippetIds = (
|
||||
db.prepare(`SELECT id FROM snippets`).all() as { id: string }[]
|
||||
).map((r) => r.id);
|
||||
const secondSnippetIds = (db.prepare(`SELECT id FROM snippets`).all() as { id: string }[]).map(
|
||||
(r) => r.id
|
||||
);
|
||||
|
||||
// Document count stays the same and snippet IDs are unchanged.
|
||||
expect(secondDocCount).toBe(firstDocCount);
|
||||
@@ -395,7 +393,8 @@ describe('IndexingPipeline', () => {
|
||||
files: [
|
||||
{
|
||||
path: 'README.md',
|
||||
content: '# Original\n\nThis is the original version of the documentation with sufficient content.',
|
||||
content:
|
||||
'# Original\n\nThis is the original version of the documentation with sufficient content.',
|
||||
sha: 'sha-v1',
|
||||
language: 'markdown'
|
||||
}
|
||||
@@ -415,7 +414,8 @@ describe('IndexingPipeline', () => {
|
||||
files: [
|
||||
{
|
||||
path: 'README.md',
|
||||
content: '# Updated\n\nThis is a completely different version of the documentation with new content.',
|
||||
content:
|
||||
'# Updated\n\nThis is a completely different version of the documentation with new content.',
|
||||
sha: 'sha-v2',
|
||||
language: 'markdown'
|
||||
}
|
||||
@@ -423,14 +423,11 @@ describe('IndexingPipeline', () => {
|
||||
totalFiles: 1
|
||||
});
|
||||
const job2Id = insertJob(db, { repository_id: '/test/repo', status: 'queued' });
|
||||
const job2 = db
|
||||
.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`)
|
||||
.get(job2Id) as never;
|
||||
const job2 = db.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`).get(job2Id) as never;
|
||||
await pipeline2.run(job2);
|
||||
|
||||
const finalDocCount = (
|
||||
db.prepare(`SELECT COUNT(*) as n FROM documents`).get() as { n: number }
|
||||
).n;
|
||||
const finalDocCount = (db.prepare(`SELECT COUNT(*) as n FROM documents`).get() as { n: number })
|
||||
.n;
|
||||
// Only one document should exist (the updated one).
|
||||
expect(finalDocCount).toBe(1);
|
||||
|
||||
@@ -452,9 +449,9 @@ describe('IndexingPipeline', () => {
|
||||
const job = makeJob();
|
||||
await pipeline.run(job as never);
|
||||
|
||||
const updated = db
|
||||
.prepare(`SELECT progress FROM indexing_jobs WHERE id = ?`)
|
||||
.get(job.id) as { progress: number };
|
||||
const updated = db.prepare(`SELECT progress FROM indexing_jobs WHERE id = ?`).get(job.id) as {
|
||||
progress: number;
|
||||
};
|
||||
expect(updated.progress).toBe(100);
|
||||
});
|
||||
|
||||
@@ -467,12 +464,7 @@ describe('IndexingPipeline', () => {
|
||||
commitSha: 'abc'
|
||||
});
|
||||
|
||||
const pipeline = new IndexingPipeline(
|
||||
db,
|
||||
vi.fn() as never,
|
||||
{ crawl } as never,
|
||||
null
|
||||
);
|
||||
const pipeline = new IndexingPipeline(db, vi.fn() as never, { crawl } as never, null);
|
||||
|
||||
const job = makeJob();
|
||||
await pipeline.run(job as never);
|
||||
@@ -511,7 +503,10 @@ describe('IndexingPipeline', () => {
|
||||
await pipeline1.run(job1 as never);
|
||||
|
||||
const afterFirstRun = {
|
||||
docs: db.prepare(`SELECT file_path, checksum FROM documents ORDER BY file_path`).all() as { file_path: string; checksum: string }[],
|
||||
docs: db.prepare(`SELECT file_path, checksum FROM documents ORDER BY file_path`).all() as {
|
||||
file_path: string;
|
||||
checksum: string;
|
||||
}[],
|
||||
snippetCount: (db.prepare(`SELECT COUNT(*) as n FROM snippets`).get() as { n: number }).n
|
||||
};
|
||||
expect(afterFirstRun.docs).toHaveLength(3);
|
||||
|
||||
@@ -250,7 +250,10 @@ export class IndexingPipeline {
|
||||
private async crawl(
|
||||
repo: Repository,
|
||||
job: IndexingJob
|
||||
): Promise<{ files: Array<{ path: string; content: string; sha: string; size: number; language: string }>; totalFiles: number }> {
|
||||
): Promise<{
|
||||
files: Array<{ path: string; content: string; sha: string; size: number; language: string }>;
|
||||
totalFiles: number;
|
||||
}> {
|
||||
if (repo.source === 'github') {
|
||||
// Parse owner/repo from the canonical ID: "/owner/repo"
|
||||
const parts = repo.id.replace(/^\//, '').split('/');
|
||||
|
||||
@@ -133,9 +133,7 @@ export class JobQueue {
|
||||
|
||||
// Check whether another job was queued while this one ran.
|
||||
const next = this.db
|
||||
.prepare<[], { id: string }>(
|
||||
`SELECT id FROM indexing_jobs WHERE status = 'queued' LIMIT 1`
|
||||
)
|
||||
.prepare<[], { id: string }>(`SELECT id FROM indexing_jobs WHERE status = 'queued' LIMIT 1`)
|
||||
.get();
|
||||
if (next) {
|
||||
setImmediate(() => this.processNext());
|
||||
@@ -147,9 +145,7 @@ export class JobQueue {
|
||||
* Retrieve a single job by ID.
|
||||
*/
|
||||
getJob(id: string): IndexingJob | null {
|
||||
const raw = this.db
|
||||
.prepare<[string], IndexingJobEntity>(`${JOB_SELECT} WHERE id = ?`)
|
||||
.get(id);
|
||||
const raw = this.db.prepare<[string], IndexingJobEntity>(`${JOB_SELECT} WHERE id = ?`).get(id);
|
||||
return raw ? IndexingJobMapper.fromEntity(new IndexingJobEntity(raw)) : null;
|
||||
}
|
||||
|
||||
@@ -178,9 +174,9 @@ export class JobQueue {
|
||||
const sql = `${JOB_SELECT} ${where} ORDER BY created_at DESC LIMIT ?`;
|
||||
params.push(limit);
|
||||
|
||||
return (this.db.prepare<unknown[], IndexingJobEntity>(sql).all(...params) as IndexingJobEntity[]).map(
|
||||
(row) => IndexingJobMapper.fromEntity(new IndexingJobEntity(row))
|
||||
);
|
||||
return (
|
||||
this.db.prepare<unknown[], IndexingJobEntity>(sql).all(...params) as IndexingJobEntity[]
|
||||
).map((row) => IndexingJobMapper.fromEntity(new IndexingJobEntity(row)));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -228,9 +224,7 @@ export class JobQueue {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.db
|
||||
.prepare(`UPDATE indexing_jobs SET status = 'paused' WHERE id = ?`)
|
||||
.run(id);
|
||||
this.db.prepare(`UPDATE indexing_jobs SET status = 'paused' WHERE id = ?`).run(id);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -249,9 +243,7 @@ export class JobQueue {
|
||||
return false;
|
||||
}
|
||||
|
||||
this.db
|
||||
.prepare(`UPDATE indexing_jobs SET status = 'queued' WHERE id = ?`)
|
||||
.run(id);
|
||||
this.db.prepare(`UPDATE indexing_jobs SET status = 'queued' WHERE id = ?`).run(id);
|
||||
|
||||
// Trigger queue processing in case the queue was idle
|
||||
this.drainQueued();
|
||||
|
||||
@@ -25,9 +25,13 @@ function createTestDb(): Database.Database {
|
||||
client.pragma('foreign_keys = ON');
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
|
||||
|
||||
// Run all migrations in order
|
||||
const migrations = ['0000_large_master_chief.sql', '0001_quick_nighthawk.sql', '0002_silky_stellaris.sql'];
|
||||
const migrations = [
|
||||
'0000_large_master_chief.sql',
|
||||
'0001_quick_nighthawk.sql',
|
||||
'0002_silky_stellaris.sql'
|
||||
];
|
||||
for (const migrationFile of migrations) {
|
||||
const migrationSql = readFileSync(join(migrationsFolder, migrationFile), 'utf-8');
|
||||
const statements = migrationSql
|
||||
@@ -123,9 +127,7 @@ function seedEmbedding(
|
||||
// Mock EmbeddingProvider
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeMockProvider(
|
||||
returnValues: number[][] = [[1, 0, 0, 0]]
|
||||
): EmbeddingProvider {
|
||||
function makeMockProvider(returnValues: number[][] = [[1, 0, 0, 0]]): EmbeddingProvider {
|
||||
return {
|
||||
name: 'mock',
|
||||
dimensions: returnValues[0]?.length ?? 4,
|
||||
@@ -254,9 +256,18 @@ describe('reciprocalRankFusion', () => {
|
||||
});
|
||||
|
||||
it('handles three lists correctly', () => {
|
||||
const r1 = [{ id: 'a', score: 1 }, { id: 'b', score: 0 }];
|
||||
const r2 = [{ id: 'b', score: 1 }, { id: 'c', score: 0 }];
|
||||
const r3 = [{ id: 'a', score: 1 }, { id: 'c', score: 0 }];
|
||||
const r1 = [
|
||||
{ id: 'a', score: 1 },
|
||||
{ id: 'b', score: 0 }
|
||||
];
|
||||
const r2 = [
|
||||
{ id: 'b', score: 1 },
|
||||
{ id: 'c', score: 0 }
|
||||
];
|
||||
const r3 = [
|
||||
{ id: 'a', score: 1 },
|
||||
{ id: 'c', score: 0 }
|
||||
];
|
||||
const result = reciprocalRankFusion(r1, r2, r3);
|
||||
// 'a' appears first in r1 and r3 → higher combined score than 'b' or 'c'.
|
||||
expect(result[0].id).toBe('a');
|
||||
|
||||
@@ -103,10 +103,7 @@ export class HybridSearchService {
|
||||
* @param options - Search parameters including repositoryId and alpha blend.
|
||||
* @returns Ranked array of SnippetSearchResult, deduplicated by snippet ID.
|
||||
*/
|
||||
async search(
|
||||
query: string,
|
||||
options: HybridSearchOptions
|
||||
): Promise<SnippetSearchResult[]> {
|
||||
async search(query: string, options: HybridSearchOptions): Promise<SnippetSearchResult[]> {
|
||||
const limit = options.limit ?? 20;
|
||||
const mode = options.searchMode ?? 'auto';
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
export function preprocessQuery(raw: string): string {
|
||||
// 1. Trim and collapse whitespace.
|
||||
let q = raw.trim().replace(/\s+/g, ' ');
|
||||
const q = raw.trim().replace(/\s+/g, ' ');
|
||||
|
||||
if (!q) return q;
|
||||
|
||||
@@ -73,7 +73,7 @@ export function preprocessQuery(raw: string): string {
|
||||
|
||||
// 3. Separate operators from searchable terms.
|
||||
const searchableTerms = processedTokens.filter((t) => !['AND', 'OR', 'NOT'].includes(t));
|
||||
|
||||
|
||||
if (searchableTerms.length === 0) return '';
|
||||
|
||||
// 4. Reconstruct final tokens keeping operators between searchable terms.
|
||||
@@ -91,7 +91,10 @@ export function preprocessQuery(raw: string): string {
|
||||
}
|
||||
|
||||
// Remove trailing operators
|
||||
while (finalTokens.length > 0 && ['AND', 'OR', 'NOT'].includes(finalTokens[finalTokens.length - 1])) {
|
||||
while (
|
||||
finalTokens.length > 0 &&
|
||||
['AND', 'OR', 'NOT'].includes(finalTokens[finalTokens.length - 1])
|
||||
) {
|
||||
finalTokens.pop();
|
||||
}
|
||||
|
||||
|
||||
@@ -32,9 +32,7 @@ export interface FusedItem {
|
||||
* descending relevance (index 0 = most relevant).
|
||||
* @returns Fused array sorted by descending rrfScore, deduplicated by id.
|
||||
*/
|
||||
export function reciprocalRankFusion(
|
||||
...rankings: Array<Array<RankedItem>>
|
||||
): Array<FusedItem> {
|
||||
export function reciprocalRankFusion(...rankings: Array<Array<RankedItem>>): Array<FusedItem> {
|
||||
const K = 60; // Standard RRF constant.
|
||||
const scores = new Map<string, number>();
|
||||
|
||||
|
||||
@@ -674,7 +674,9 @@ describe('formatLibraryResults', () => {
|
||||
id: '/facebook/react/v18',
|
||||
repositoryId: '/facebook/react',
|
||||
tag: 'v18',
|
||||
title: 'React 18', commitHash: null, state: 'indexed',
|
||||
title: 'React 18',
|
||||
commitHash: null,
|
||||
state: 'indexed',
|
||||
totalSnippets: 1000,
|
||||
indexedAt: null,
|
||||
createdAt: now
|
||||
@@ -731,7 +733,9 @@ describe('formatLibraryResults', () => {
|
||||
describe('formatSnippetResults', () => {
|
||||
const now = new Date();
|
||||
|
||||
function makeSnippetResult(overrides: Partial<Parameters<typeof formatSnippetResults>[0][number]> = {}): Parameters<typeof formatSnippetResults>[0][number] {
|
||||
function makeSnippetResult(
|
||||
overrides: Partial<Parameters<typeof formatSnippetResults>[0][number]> = {}
|
||||
): Parameters<typeof formatSnippetResults>[0][number] {
|
||||
return {
|
||||
snippet: {
|
||||
id: crypto.randomUUID(),
|
||||
|
||||
@@ -87,10 +87,7 @@ export class SearchService {
|
||||
if (!processedQuery) return [];
|
||||
|
||||
// Build the WHERE clause dynamically based on optional filters.
|
||||
const conditions: string[] = [
|
||||
'snippets_fts MATCH ?',
|
||||
's.repository_id = ?'
|
||||
];
|
||||
const conditions: string[] = ['snippets_fts MATCH ?', 's.repository_id = ?'];
|
||||
const params: unknown[] = [processedQuery, repositoryId];
|
||||
|
||||
if (versionId !== undefined) {
|
||||
@@ -132,10 +129,14 @@ export class SearchService {
|
||||
const rows = this.db.prepare(sql).all(...params) as RawSnippetRow[];
|
||||
|
||||
return rows.map((row) =>
|
||||
SearchResultMapper.snippetFromEntity(new SnippetEntity(row), {
|
||||
id: row.repo_id,
|
||||
title: row.repo_title
|
||||
}, row.score)
|
||||
SearchResultMapper.snippetFromEntity(
|
||||
new SnippetEntity(row),
|
||||
{
|
||||
id: row.repo_id,
|
||||
title: row.repo_title
|
||||
},
|
||||
row.score
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -188,7 +189,11 @@ export class SearchService {
|
||||
|
||||
return rows.map((row) => {
|
||||
const compositeScore =
|
||||
row.exact_match + row.prefix_match + row.desc_match + row.snippet_score + row.trust_component;
|
||||
row.exact_match +
|
||||
row.prefix_match +
|
||||
row.desc_match +
|
||||
row.snippet_score +
|
||||
row.trust_component;
|
||||
return SearchResultMapper.libraryFromEntity(
|
||||
new RepositoryEntity(row),
|
||||
this.getVersionEntities(row.id),
|
||||
@@ -203,9 +208,7 @@ export class SearchService {
|
||||
|
||||
private getVersionEntities(repositoryId: string): RepositoryVersionEntity[] {
|
||||
return this.db
|
||||
.prepare(
|
||||
`SELECT * FROM repository_versions WHERE repository_id = ? ORDER BY created_at DESC`
|
||||
)
|
||||
.prepare(`SELECT * FROM repository_versions WHERE repository_id = ? ORDER BY created_at DESC`)
|
||||
.all(repositoryId) as RawVersionRow[];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,9 +46,7 @@ interface RawEmbeddingRow {
|
||||
*/
|
||||
export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
||||
if (a.length !== b.length) {
|
||||
throw new Error(
|
||||
`Embedding dimension mismatch: ${a.length} vs ${b.length}`
|
||||
);
|
||||
throw new Error(`Embedding dimension mismatch: ${a.length} vs ${b.length}`);
|
||||
}
|
||||
|
||||
let dot = 0;
|
||||
|
||||
@@ -27,10 +27,7 @@ function createTestDb(): Database.Database {
|
||||
client.pragma('foreign_keys = ON');
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
const migrationSql = readFileSync(
|
||||
join(migrationsFolder, '0000_large_master_chief.sql'),
|
||||
'utf-8'
|
||||
);
|
||||
const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
|
||||
|
||||
// Drizzle migration files use `--> statement-breakpoint` as separator.
|
||||
const statements = migrationSql
|
||||
@@ -261,9 +258,7 @@ describe('RepositoryService.add()', () => {
|
||||
});
|
||||
|
||||
it('throws InvalidInputError when sourceUrl is empty', () => {
|
||||
expect(() =>
|
||||
service.add({ source: 'github', sourceUrl: '' })
|
||||
).toThrow(InvalidInputError);
|
||||
expect(() => service.add({ source: 'github', sourceUrl: '' })).toThrow(InvalidInputError);
|
||||
});
|
||||
|
||||
it('stores description and branch when provided', () => {
|
||||
@@ -321,9 +316,7 @@ describe('RepositoryService.update()', () => {
|
||||
});
|
||||
|
||||
it('throws NotFoundError for a non-existent repository', () => {
|
||||
expect(() =>
|
||||
service.update('/not/found', { title: 'New Title' })
|
||||
).toThrow(NotFoundError);
|
||||
expect(() => service.update('/not/found', { title: 'New Title' })).toThrow(NotFoundError);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -74,9 +74,7 @@ export class RepositoryService {
|
||||
.get(state) as { n: number };
|
||||
return row.n;
|
||||
}
|
||||
const row = this.db
|
||||
.prepare(`SELECT COUNT(*) as n FROM repositories`)
|
||||
.get() as { n: number };
|
||||
const row = this.db.prepare(`SELECT COUNT(*) as n FROM repositories`).get() as { n: number };
|
||||
return row.n;
|
||||
}
|
||||
|
||||
@@ -115,13 +113,13 @@ export class RepositoryService {
|
||||
}
|
||||
// Default title from owner/repo
|
||||
const parts = id.split('/').filter(Boolean);
|
||||
title = input.title ?? (parts[1] ?? id);
|
||||
title = input.title ?? parts[1] ?? id;
|
||||
} else {
|
||||
// local
|
||||
const existing = this.list({ limit: 9999 }).map((r) => r.id);
|
||||
id = resolveLocalId(input.sourceUrl, existing);
|
||||
const parts = input.sourceUrl.split('/');
|
||||
title = input.title ?? (parts.at(-1) ?? 'local-repo');
|
||||
title = input.title ?? parts.at(-1) ?? 'local-repo';
|
||||
}
|
||||
|
||||
// Check for collision
|
||||
|
||||
@@ -23,16 +23,10 @@ function createTestDb(): Database.Database {
|
||||
client.pragma('foreign_keys = ON');
|
||||
|
||||
const migrationsFolder = join(import.meta.dirname, '../db/migrations');
|
||||
|
||||
|
||||
// Apply all migration files in order
|
||||
const migration0 = readFileSync(
|
||||
join(migrationsFolder, '0000_large_master_chief.sql'),
|
||||
'utf-8'
|
||||
);
|
||||
const migration1 = readFileSync(
|
||||
join(migrationsFolder, '0001_quick_nighthawk.sql'),
|
||||
'utf-8'
|
||||
);
|
||||
const migration0 = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
|
||||
const migration1 = readFileSync(join(migrationsFolder, '0001_quick_nighthawk.sql'), 'utf-8');
|
||||
|
||||
// Apply first migration
|
||||
const statements0 = migration0
|
||||
@@ -201,9 +195,7 @@ describe('VersionService.remove()', () => {
|
||||
|
||||
versionService.remove('/facebook/react', 'v18.3.0');
|
||||
|
||||
const doc = client
|
||||
.prepare(`SELECT id FROM documents WHERE id = ?`)
|
||||
.get(docId);
|
||||
const doc = client.prepare(`SELECT id FROM documents WHERE id = ?`).get(docId);
|
||||
expect(doc).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -40,12 +40,7 @@ export class VersionService {
|
||||
* @throws NotFoundError when the parent repository does not exist
|
||||
* @throws AlreadyExistsError when the tag is already registered
|
||||
*/
|
||||
add(
|
||||
repositoryId: string,
|
||||
tag: string,
|
||||
title?: string,
|
||||
commitHash?: string
|
||||
): RepositoryVersion {
|
||||
add(repositoryId: string, tag: string, title?: string, commitHash?: string): RepositoryVersion {
|
||||
// Verify parent repository exists.
|
||||
const repo = this.db
|
||||
.prepare(`SELECT id, source, source_url FROM repositories WHERE id = ?`)
|
||||
@@ -115,9 +110,7 @@ export class VersionService {
|
||||
*/
|
||||
getByTag(repositoryId: string, tag: string): RepositoryVersion | null {
|
||||
const row = this.db
|
||||
.prepare(
|
||||
`SELECT * FROM repository_versions WHERE repository_id = ? AND tag = ?`
|
||||
)
|
||||
.prepare(`SELECT * FROM repository_versions WHERE repository_id = ? AND tag = ?`)
|
||||
.get(repositoryId, tag) as RepositoryVersionEntity | undefined;
|
||||
return row ? RepositoryVersionMapper.fromEntity(new RepositoryVersionEntity(row)) : null;
|
||||
}
|
||||
@@ -137,9 +130,9 @@ export class VersionService {
|
||||
previousVersions: { tag: string; title: string; commitHash?: string }[]
|
||||
): RepositoryVersion[] {
|
||||
// Verify parent repository exists.
|
||||
const repo = this.db
|
||||
.prepare(`SELECT id FROM repositories WHERE id = ?`)
|
||||
.get(repositoryId) as { id: string } | undefined;
|
||||
const repo = this.db.prepare(`SELECT id FROM repositories WHERE id = ?`).get(repositoryId) as
|
||||
| { id: string }
|
||||
| undefined;
|
||||
|
||||
if (!repo) {
|
||||
throw new NotFoundError(`Repository ${repositoryId} not found`);
|
||||
|
||||
@@ -65,13 +65,10 @@ export function discoverVersionTags(options: DiscoverTagsOptions): string[] {
|
||||
|
||||
try {
|
||||
// List all tags, sorted by commit date (newest first)
|
||||
const output = execSync(
|
||||
`git -C "${repoPath}" tag -l --sort=-creatordate`,
|
||||
{
|
||||
encoding: 'utf-8',
|
||||
stdio: ['ignore', 'pipe', 'pipe']
|
||||
}
|
||||
).trim();
|
||||
const output = execSync(`git -C "${repoPath}" tag -l --sort=-creatordate`, {
|
||||
encoding: 'utf-8',
|
||||
stdio: ['ignore', 'pipe', 'pipe']
|
||||
}).trim();
|
||||
|
||||
if (!output) return [];
|
||||
|
||||
|
||||
@@ -33,10 +33,11 @@ export function resolveLocalId(path: string, existingIds: string[]): string {
|
||||
* Slugify a string to be safe for use in IDs.
|
||||
*/
|
||||
function slugify(str: string): string {
|
||||
return str
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9-_]/g, '-')
|
||||
.replace(/-+/g, '-')
|
||||
.replace(/^-|-$/g, '')
|
||||
|| 'repo';
|
||||
return (
|
||||
str
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9-_]/g, '-')
|
||||
.replace(/-+/g, '-')
|
||||
.replace(/^-|-$/g, '') || 'repo'
|
||||
);
|
||||
}
|
||||
|
||||
@@ -59,13 +59,10 @@ export function errorResponse(
|
||||
status: number,
|
||||
details?: Record<string, unknown>
|
||||
): Response {
|
||||
return new Response(
|
||||
JSON.stringify({ error, code, ...(details ? { details } : {}) }),
|
||||
{
|
||||
status,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
}
|
||||
);
|
||||
return new Response(JSON.stringify({ error, code, ...(details ? { details } : {}) }), {
|
||||
status,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user