From 5a3c27224def5daec53d20309189982b42a0f378 Mon Sep 17 00:00:00 2001 From: Giancarmine Salucci Date: Fri, 27 Mar 2026 02:23:01 +0100 Subject: [PATCH] chore(FEEDBACK-0001): linting --- README.md | 78 +- docker-compose.yml | 6 +- docs/FINDINGS.md | 132 +- docs/PRD.md | 105 +- docs/features/TRUEREF-0001.md | 188 +- docs/features/TRUEREF-0002.md | 157 +- docs/features/TRUEREF-0003.md | 192 +- docs/features/TRUEREF-0004.md | 132 +- docs/features/TRUEREF-0005.md | 302 +-- docs/features/TRUEREF-0006.md | 178 +- docs/features/TRUEREF-0007.md | 261 ++- docs/features/TRUEREF-0008.md | 125 +- docs/features/TRUEREF-0009.md | 338 ++-- docs/features/TRUEREF-0010.md | 152 +- docs/features/TRUEREF-0011.md | 361 ++-- docs/features/TRUEREF-0012.md | 108 +- docs/features/TRUEREF-0013.md | 272 ++- docs/features/TRUEREF-0014.md | 108 +- docs/features/TRUEREF-0015.md | 372 ++-- docs/features/TRUEREF-0016.md | 164 +- docs/features/TRUEREF-0017.md | 81 +- docs/features/TRUEREF-0018.md | 284 +-- docs/features/TRUEREF-0019.md | 57 +- docs/features/TRUEREF-0020.md | 22 +- src/hooks.server.ts | 6 +- src/lib/components/AddRepositoryModal.svelte | 10 +- src/lib/components/FolderPicker.svelte | 23 +- src/lib/components/RepositoryCard.svelte | 5 +- src/lib/components/StatBadge.svelte | 5 +- .../components/admin/JobStatusBadge.svelte | 4 +- src/lib/components/search/SearchInput.svelte | 2 +- src/lib/components/search/SnippetCard.svelte | 22 +- src/lib/server/api/dto-response.ts | 2 +- src/lib/server/api/formatters.test.ts | 16 +- src/lib/server/api/formatters.ts | 11 +- src/lib/server/config/config-parser.test.ts | 33 +- src/lib/server/config/config-parser.ts | 12 +- src/lib/server/config/trueref-config.json | 166 +- src/lib/server/crawler/file-filter.ts | 77 +- src/lib/server/crawler/github-tags.ts | 7 +- src/lib/server/crawler/github.crawler.test.ts | 63 +- src/lib/server/crawler/github.crawler.ts | 116 +- src/lib/server/crawler/local.crawler.test.ts | 4 +- src/lib/server/crawler/local.crawler.ts | 7 +- .../db/migrations/meta/0000_snapshot.json | 1440 +++++++------- .../db/migrations/meta/0001_snapshot.json | 1454 +++++++------- .../db/migrations/meta/0002_snapshot.json | 1667 ++++++++--------- .../server/db/migrations/meta/_journal.json | 52 +- .../embeddings/embedding.service.test.ts | 11 +- .../server/embeddings/embedding.service.ts | 19 +- src/lib/server/embeddings/factory.ts | 6 +- .../server/mappers/context-response.mapper.ts | 49 +- src/lib/server/mappers/indexing-job.mapper.ts | 5 +- .../mappers/repository-version.mapper.ts | 2 +- src/lib/server/mappers/repository.mapper.ts | 2 +- .../server/mappers/search-result.mapper.ts | 12 +- src/lib/server/mappers/snippet.mapper.ts | 2 +- src/lib/server/models/context-response.ts | 4 +- src/lib/server/models/indexing-job.ts | 2 +- src/lib/server/models/repository-version.ts | 2 +- src/lib/server/models/repository.ts | 2 +- src/lib/server/models/search-result.ts | 2 +- src/lib/server/models/snippet.ts | 2 +- src/lib/server/parser/code.parser.test.ts | 8 +- src/lib/server/parser/code.parser.ts | 22 +- src/lib/server/parser/index.ts | 9 +- src/lib/server/parser/markdown.parser.test.ts | 10 +- src/lib/server/parser/markdown.parser.ts | 13 +- src/lib/server/pipeline/diff.test.ts | 16 +- .../server/pipeline/indexing.pipeline.test.ts | 115 +- src/lib/server/pipeline/indexing.pipeline.ts | 5 +- src/lib/server/pipeline/job-queue.ts | 22 +- .../search/hybrid.search.service.test.ts | 27 +- .../server/search/hybrid.search.service.ts | 5 +- src/lib/server/search/query-preprocessor.ts | 9 +- src/lib/server/search/rrf.ts | 4 +- src/lib/server/search/search.service.test.ts | 8 +- src/lib/server/search/search.service.ts | 27 +- src/lib/server/search/vector.search.ts | 4 +- .../services/repository.service.test.ts | 13 +- src/lib/server/services/repository.service.ts | 8 +- .../server/services/version.service.test.ts | 16 +- src/lib/server/services/version.service.ts | 17 +- src/lib/server/utils/git.ts | 11 +- src/lib/server/utils/id-resolver.ts | 13 +- src/lib/server/utils/validation.ts | 11 +- src/mcp/index.ts | 10 +- src/mcp/tools/resolve-library-id.ts | 4 +- src/routes/admin/jobs/+page.svelte | 42 +- .../api/v1/api-contract.integration.test.ts | 4 +- src/routes/api/v1/context/+server.ts | 65 +- src/routes/api/v1/fs/browse/+server.ts | 2 +- src/routes/api/v1/jobs/[id]/cancel/+server.ts | 10 +- src/routes/api/v1/jobs/[id]/pause/+server.ts | 6 +- src/routes/api/v1/jobs/[id]/resume/+server.ts | 10 +- src/routes/api/v1/libs/+server.ts | 4 +- src/routes/api/v1/libs/[id]/index/+server.ts | 4 +- .../api/v1/settings/embedding/+server.ts | 1 - .../api/v1/settings/embedding/test/+server.ts | 8 +- src/routes/repos/[id]/+page.svelte | 6 +- src/routes/search/+page.svelte | 8 +- src/routes/settings/+page.svelte | 6 +- 102 files changed, 5108 insertions(+), 4976 deletions(-) diff --git a/README.md b/README.md index 0ab22fc..e855d93 100644 --- a/README.md +++ b/README.md @@ -38,13 +38,13 @@ TrueRef is under active development. The current codebase already includes: TrueRef is organized into four main layers: 1. Web UI - SvelteKit application for adding repositories, monitoring indexing, searching content, and configuring embeddings. + SvelteKit application for adding repositories, monitoring indexing, searching content, and configuring embeddings. 2. REST API - Endpoints under `/api/v1/*` for repository management, search, schema discovery, job status, and settings. + Endpoints under `/api/v1/*` for repository management, search, schema discovery, job status, and settings. 3. Indexing pipeline - Crawlers, parsers, chunking logic, snippet storage, and optional embedding generation. + Crawlers, parsers, chunking logic, snippet storage, and optional embedding generation. 4. MCP server - A thin compatibility layer that forwards `resolve-library-id` and `query-docs` requests to the TrueRef REST API. + A thin compatibility layer that forwards `resolve-library-id` and `query-docs` requests to the TrueRef REST API. At runtime, the app uses SQLite via `better-sqlite3` and Drizzle, plus optional embedding providers for semantic retrieval. @@ -367,9 +367,9 @@ The tool names and argument shapes intentionally mirror context7 so existing wor The MCP server uses: - `TRUEREF_API_URL` - Base URL of the TrueRef web app. Default: `http://localhost:5173` + Base URL of the TrueRef web app. Default: `http://localhost:5173` - `PORT` - Used only for HTTP transport. Default: `3001` + Used only for HTTP transport. Default: `3001` ### Start MCP over stdio @@ -602,6 +602,7 @@ alwaysApply: true --- When answering questions about indexed libraries, always use the TrueRef MCP tools: + 1. Call `resolve-library-id` with the library name and the user's question to get the library ID. 2. Call `query-docs` with the library ID and question to retrieve relevant documentation. 3. Use the returned documentation to answer accurately. @@ -614,9 +615,9 @@ Never rely on training data alone for library APIs that may have changed. Whether you are using VS Code, IntelliJ, or Claude Code, the expected retrieval flow is: 1. `resolve-library-id` - Find the correct repository or version identifier. + Find the correct repository or version identifier. 2. `query-docs` - Retrieve the actual documentation and code snippets for the user question. + Retrieve the actual documentation and code snippets for the user question. Example: @@ -638,10 +639,10 @@ docker compose up --build This builds the image and starts two services: -| Service | Default port | Purpose | -|---------|-------------|---------| -| `web` | `3000` | SvelteKit web UI and REST API | -| `mcp` | `3001` | MCP HTTP server | +| Service | Default port | Purpose | +| ------- | ------------ | ----------------------------- | +| `web` | `3000` | SvelteKit web UI and REST API | +| `mcp` | `3001` | MCP HTTP server | The SQLite database is stored in a named Docker volume (`trueref-data`) and persists across restarts. @@ -687,10 +688,10 @@ services: - ${USERPROFILE:-$HOME}/.gitconfig:/root/.gitconfig:ro - ${CORP_CA_CERT}:/certs/corp-ca.crt:ro environment: - BITBUCKET_HOST: "${BITBUCKET_HOST}" - GITLAB_HOST: "${GITLAB_HOST}" - GIT_TOKEN_BITBUCKET: "${GIT_TOKEN_BITBUCKET}" - GIT_TOKEN_GITLAB: "${GIT_TOKEN_GITLAB}" + BITBUCKET_HOST: '${BITBUCKET_HOST}' + GITLAB_HOST: '${GITLAB_HOST}' + GIT_TOKEN_BITBUCKET: '${GIT_TOKEN_BITBUCKET}' + GIT_TOKEN_GITLAB: '${GIT_TOKEN_GITLAB}' ``` 5. **Start the services**: @@ -708,6 +709,7 @@ The Docker entrypoint script (`docker-entrypoint.sh`) runs these steps in order: 3. **Configure git credentials**: Sets up per-host credential helpers that provide the correct username and token for each remote. This setup works for: + - HTTPS cloning with personal access tokens - SSH cloning with mounted SSH keys - On-premise servers with custom CA certificates @@ -718,6 +720,7 @@ This setup works for: For long-lived deployments, SSH authentication is recommended: 1. Generate an SSH key pair if you don't have one: + ```sh ssh-keygen -t ed25519 -C "trueref@your-company.com" ``` @@ -727,6 +730,7 @@ For long-lived deployments, SSH authentication is recommended: - GitLab: User Settings → SSH Keys 3. Ensure your `~/.ssh/config` has the correct host entries: + ``` Host bitbucket.corp.example.com IdentityFile ~/.ssh/id_ed25519 @@ -737,13 +741,13 @@ For long-lived deployments, SSH authentication is recommended: ### Environment variables -| Variable | Default | Description | -|----------|---------|-------------| -| `DATABASE_URL` | `/data/trueref.db` | Path to the SQLite database inside the container | -| `PORT` | `3000` | Port the web app listens on | -| `HOST` | `0.0.0.0` | Bind address for the web app | +| Variable | Default | Description | +| ----------------- | ----------------------- | -------------------------------------------------- | +| `DATABASE_URL` | `/data/trueref.db` | Path to the SQLite database inside the container | +| `PORT` | `3000` | Port the web app listens on | +| `HOST` | `0.0.0.0` | Bind address for the web app | | `TRUEREF_API_URL` | `http://localhost:3000` | Base URL the MCP server uses to reach the REST API | -| `MCP_PORT` | `3001` | Port the MCP HTTP server listens on | +| `MCP_PORT` | `3001` | Port the MCP HTTP server listens on | Override them in `docker-compose.yml` or pass them with `-e` flags. @@ -770,12 +774,12 @@ Once both containers are running, point VS Code at the MCP HTTP endpoint: ```json { - "servers": { - "trueref": { - "type": "http", - "url": "http://localhost:3001/mcp" - } - } + "servers": { + "trueref": { + "type": "http", + "url": "http://localhost:3001/mcp" + } + } } ``` @@ -783,12 +787,12 @@ Once both containers are running, point VS Code at the MCP HTTP endpoint: ```json { - "mcpServers": { - "trueref": { - "type": "http", - "url": "http://localhost:3001/mcp" - } - } + "mcpServers": { + "trueref": { + "type": "http", + "url": "http://localhost:3001/mcp" + } + } } ``` @@ -806,10 +810,10 @@ Verify the connection inside Claude Code: ### Health checks -| Endpoint | Expected response | -|----------|------------------| +| Endpoint | Expected response | +| ----------------------------------- | ------------------------------- | | `http://localhost:3000/api/v1/libs` | JSON array of indexed libraries | -| `http://localhost:3001/ping` | `{"ok":true}` | +| `http://localhost:3001/ping` | `{"ok":true}` | ### Mounting a local repository diff --git a/docker-compose.yml b/docker-compose.yml index 7e88f73..3d145e9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ services: web: build: . ports: - - "3000:3000" + - '3000:3000' volumes: - trueref-data:/data # Corporate deployment support (TRUEREF-0019) @@ -24,10 +24,10 @@ services: build: . command: mcp ports: - - "3001:3001" + - '3001:3001' environment: TRUEREF_API_URL: http://web:3000 - MCP_PORT: "3001" + MCP_PORT: '3001' depends_on: - web restart: unless-stopped diff --git a/docs/FINDINGS.md b/docs/FINDINGS.md index d491c28..71460d2 100644 --- a/docs/FINDINGS.md +++ b/docs/FINDINGS.md @@ -37,85 +37,85 @@ Add subsequent research below this section. - Task: Refresh only stale documentation after changes to retrieval, formatters, token budgeting, and parser behavior. - Files inspected: - - `docs/docs_cache_state.yaml` - - `docs/ARCHITECTURE.md` - - `docs/CODE_STYLE.md` - - `docs/FINDINGS.md` - - `package.json` - - `src/routes/api/v1/context/+server.ts` - - `src/lib/server/api/formatters.ts` - - `src/lib/server/api/token-budget.ts` - - `src/lib/server/search/query-preprocessor.ts` - - `src/lib/server/search/search.service.ts` - - `src/lib/server/search/hybrid.search.service.ts` - - `src/lib/server/mappers/context-response.mapper.ts` - - `src/lib/server/models/context-response.ts` - - `src/lib/server/models/search-result.ts` - - `src/lib/server/parser/index.ts` - - `src/lib/server/parser/code.parser.ts` - - `src/lib/server/parser/markdown.parser.ts` + - `docs/docs_cache_state.yaml` + - `docs/ARCHITECTURE.md` + - `docs/CODE_STYLE.md` + - `docs/FINDINGS.md` + - `package.json` + - `src/routes/api/v1/context/+server.ts` + - `src/lib/server/api/formatters.ts` + - `src/lib/server/api/token-budget.ts` + - `src/lib/server/search/query-preprocessor.ts` + - `src/lib/server/search/search.service.ts` + - `src/lib/server/search/hybrid.search.service.ts` + - `src/lib/server/mappers/context-response.mapper.ts` + - `src/lib/server/models/context-response.ts` + - `src/lib/server/models/search-result.ts` + - `src/lib/server/parser/index.ts` + - `src/lib/server/parser/code.parser.ts` + - `src/lib/server/parser/markdown.parser.ts` - Findings: - - The documentation cache was trusted, but the architecture summary no longer captured current retrieval behavior: query preprocessing now sanitizes punctuation-heavy input for FTS5, semantic mode can bypass FTS entirely, and auto or hybrid retrieval can fall back to vector search when keyword search returns no candidates. - - Plain-text and JSON context formatting now carry repository and version metadata, and the text formatter emits an explicit no-results section instead of an empty body. - - Token budgeting now skips individual over-budget snippets and continues evaluating lower-ranked candidates, which changes the response-selection behavior described at the architecture level. - - Parser coverage now explicitly includes Markdown, code, config, HTML-like, and plain-text inputs, so the architecture summary needed to reflect that broader file-type handling. - - The conventions documented in CODE_STYLE.md still match the current repository: strict TypeScript, tab indentation, ESM imports, Prettier and ESLint flat config, and pragmatic service-oriented server modules. + - The documentation cache was trusted, but the architecture summary no longer captured current retrieval behavior: query preprocessing now sanitizes punctuation-heavy input for FTS5, semantic mode can bypass FTS entirely, and auto or hybrid retrieval can fall back to vector search when keyword search returns no candidates. + - Plain-text and JSON context formatting now carry repository and version metadata, and the text formatter emits an explicit no-results section instead of an empty body. + - Token budgeting now skips individual over-budget snippets and continues evaluating lower-ranked candidates, which changes the response-selection behavior described at the architecture level. + - Parser coverage now explicitly includes Markdown, code, config, HTML-like, and plain-text inputs, so the architecture summary needed to reflect that broader file-type handling. + - The conventions documented in CODE_STYLE.md still match the current repository: strict TypeScript, tab indentation, ESM imports, Prettier and ESLint flat config, and pragmatic service-oriented server modules. - Risks / follow-ups: - - Future cache invalidation should continue to distinguish between behavioral changes that affect architecture docs and localized implementation changes that do not affect the style guide. - - If the public API contract becomes externally versioned, the new context metadata fields likely deserve a dedicated API document instead of only architecture-level coverage. + - Future cache invalidation should continue to distinguish between behavioral changes that affect architecture docs and localized implementation changes that do not affect the style guide. + - If the public API contract becomes externally versioned, the new context metadata fields likely deserve a dedicated API document instead of only architecture-level coverage. ### 2026-03-27 — FEEDBACK-0001 planning research - Task: Plan the retrieval-fix iteration covering FTS query safety, hybrid fallback, empty-result behavior, result metadata, token budgeting, and parser chunking. - Files inspected: - - `package.json` - - `src/routes/api/v1/context/+server.ts` - - `src/lib/server/search/query-preprocessor.ts` - - `src/lib/server/search/search.service.ts` - - `src/lib/server/search/hybrid.search.service.ts` - - `src/lib/server/search/vector.search.ts` - - `src/lib/server/api/token-budget.ts` - - `src/lib/server/api/formatters.ts` - - `src/lib/server/mappers/context-response.mapper.ts` - - `src/lib/server/models/context-response.ts` - - `src/lib/server/models/search-result.ts` - - `src/lib/server/parser/code.parser.ts` - - `src/lib/server/search/search.service.test.ts` - - `src/lib/server/search/hybrid.search.service.test.ts` - - `src/lib/server/api/formatters.test.ts` - - `src/lib/server/parser/code.parser.test.ts` - - `src/routes/api/v1/api-contract.integration.test.ts` - - `src/mcp/tools/query-docs.ts` - - `src/mcp/client.ts` + - `package.json` + - `src/routes/api/v1/context/+server.ts` + - `src/lib/server/search/query-preprocessor.ts` + - `src/lib/server/search/search.service.ts` + - `src/lib/server/search/hybrid.search.service.ts` + - `src/lib/server/search/vector.search.ts` + - `src/lib/server/api/token-budget.ts` + - `src/lib/server/api/formatters.ts` + - `src/lib/server/mappers/context-response.mapper.ts` + - `src/lib/server/models/context-response.ts` + - `src/lib/server/models/search-result.ts` + - `src/lib/server/parser/code.parser.ts` + - `src/lib/server/search/search.service.test.ts` + - `src/lib/server/search/hybrid.search.service.test.ts` + - `src/lib/server/api/formatters.test.ts` + - `src/lib/server/parser/code.parser.test.ts` + - `src/routes/api/v1/api-contract.integration.test.ts` + - `src/mcp/tools/query-docs.ts` + - `src/mcp/client.ts` - Findings: - - `better-sqlite3` `^12.6.2` backs the affected search path; the code already uses bound parameters for `MATCH`, so the practical fix belongs in query normalization and fallback handling rather than SQL string construction. - - `query-preprocessor.ts` only strips parentheses and appends a trailing wildcard. Other code-like punctuation currently reaches the FTS execution path unsanitized. - - `search.service.ts` sends the preprocessed text directly to `snippets_fts MATCH ?` and already returns `[]` for blank processed queries. - - `hybrid.search.service.ts` always executes keyword search before semantic branching. In the current flow, an FTS parse failure can abort `auto`, `hybrid`, and `semantic` requests before vector retrieval runs. - - `vector.search.ts` already preserves `repositoryId`, `versionId`, and `profileId` filtering and does not need architectural changes for this iteration. - - `token-budget.ts` stops at the first over-budget snippet instead of skipping that item and continuing through later ranked results. - - `formatContextTxt([], [])` returns an empty string, so `/api/v1/context?type=txt` can emit an empty `200 OK` body today. - - `context-response.mapper.ts` and `context-response.ts` expose snippet content and breadcrumb/page title but do not identify local TrueRef origin, repository source metadata, or normalized snippet origin labels. - - `code.parser.ts` splits primarily at top-level declarations; class/object member functions remain in coarse chunks, which limits method-level recall for camelCase API queries. - - Existing relevant automated coverage is concentrated in the search, formatter, and parser unit tests; `/api/v1/context` contract coverage currently omits the context endpoint entirely. + - `better-sqlite3` `^12.6.2` backs the affected search path; the code already uses bound parameters for `MATCH`, so the practical fix belongs in query normalization and fallback handling rather than SQL string construction. + - `query-preprocessor.ts` only strips parentheses and appends a trailing wildcard. Other code-like punctuation currently reaches the FTS execution path unsanitized. + - `search.service.ts` sends the preprocessed text directly to `snippets_fts MATCH ?` and already returns `[]` for blank processed queries. + - `hybrid.search.service.ts` always executes keyword search before semantic branching. In the current flow, an FTS parse failure can abort `auto`, `hybrid`, and `semantic` requests before vector retrieval runs. + - `vector.search.ts` already preserves `repositoryId`, `versionId`, and `profileId` filtering and does not need architectural changes for this iteration. + - `token-budget.ts` stops at the first over-budget snippet instead of skipping that item and continuing through later ranked results. + - `formatContextTxt([], [])` returns an empty string, so `/api/v1/context?type=txt` can emit an empty `200 OK` body today. + - `context-response.mapper.ts` and `context-response.ts` expose snippet content and breadcrumb/page title but do not identify local TrueRef origin, repository source metadata, or normalized snippet origin labels. + - `code.parser.ts` splits primarily at top-level declarations; class/object member functions remain in coarse chunks, which limits method-level recall for camelCase API queries. + - Existing relevant automated coverage is concentrated in the search, formatter, and parser unit tests; `/api/v1/context` contract coverage currently omits the context endpoint entirely. - Risks / follow-ups: - - Response-shape changes must be additive because `src/mcp/client.ts`, `src/mcp/tools/query-docs.ts`, and UI consumers expect the current top-level keys to remain present. - - Parser improvements should stay inside `parseCodeFile()` and existing chunking helpers to avoid turning this fix iteration into a schema or pipeline redesign. + - Response-shape changes must be additive because `src/mcp/client.ts`, `src/mcp/tools/query-docs.ts`, and UI consumers expect the current top-level keys to remain present. + - Parser improvements should stay inside `parseCodeFile()` and existing chunking helpers to avoid turning this fix iteration into a schema or pipeline redesign. ### 2026-03-27 — FEEDBACK-0001 SQLite FTS5 syntax research - Task: Verify the FTS5 query-grammar constraints that affect punctuation-heavy local search queries. - Files inspected: - - `package.json` - - `src/lib/server/search/query-preprocessor.ts` - - `src/lib/server/search/search.service.ts` - - `src/lib/server/search/hybrid.search.service.ts` + - `package.json` + - `src/lib/server/search/query-preprocessor.ts` + - `src/lib/server/search/search.service.ts` + - `src/lib/server/search/hybrid.search.service.ts` - Findings: - - `better-sqlite3` is pinned at `^12.6.2` in `package.json`, and the application binds the `MATCH` string as a parameter instead of interpolating SQL directly. - - The canonical SQLite FTS5 docs state that barewords may contain letters, digits, underscore, non-ASCII characters, and the substitute character; strings containing other punctuation must be quoted or they become syntax errors in `MATCH` expressions. - - The same docs state that prefix search is expressed by placing `*` after the token or phrase, not inside quotes, which matches the current trailing-wildcard strategy in `query-preprocessor.ts`. - - SQLite documents that FTS5 is stricter than FTS3/4 about unrecognized punctuation in query strings, which confirms that code-like user input should be normalized before it reaches `snippets_fts MATCH ?`. - - Based on the current code path, the practical fix remains application-side sanitization and fallback behavior in `query-preprocessor.ts` and `hybrid.search.service.ts`, not SQL construction changes. + - `better-sqlite3` is pinned at `^12.6.2` in `package.json`, and the application binds the `MATCH` string as a parameter instead of interpolating SQL directly. + - The canonical SQLite FTS5 docs state that barewords may contain letters, digits, underscore, non-ASCII characters, and the substitute character; strings containing other punctuation must be quoted or they become syntax errors in `MATCH` expressions. + - The same docs state that prefix search is expressed by placing `*` after the token or phrase, not inside quotes, which matches the current trailing-wildcard strategy in `query-preprocessor.ts`. + - SQLite documents that FTS5 is stricter than FTS3/4 about unrecognized punctuation in query strings, which confirms that code-like user input should be normalized before it reaches `snippets_fts MATCH ?`. + - Based on the current code path, the practical fix remains application-side sanitization and fallback behavior in `query-preprocessor.ts` and `hybrid.search.service.ts`, not SQL construction changes. - Risks / follow-ups: - - Over-sanitizing punctuation-heavy inputs could erase useful identifiers, so the implementation should preserve searchable alphanumeric and underscore tokens while discarding grammar-breaking punctuation. - - Prefix expansion should remain on the final searchable token only so the fix preserves current query-cost expectations and test semantics. + - Over-sanitizing punctuation-heavy inputs could erase useful identifiers, so the implementation should preserve searchable alphanumeric and underscore tokens while discarding grammar-breaking punctuation. + - Prefix expansion should remain on the final searchable token only so the fix preserves current query-cost expectations and test semantics. diff --git a/docs/PRD.md b/docs/PRD.md index ca668c2..7c07988 100644 --- a/docs/PRD.md +++ b/docs/PRD.md @@ -17,6 +17,7 @@ The core use case is enabling AI coding assistants (Claude Code, Cursor, Zed, et ## 2. Problem Statement ### 2.1 Context7's Limitations + - The indexing and crawling backend is entirely private and closed-source. - Only public libraries already in the context7.com catalog are available. - Private, internal, or niche repositories cannot be added. @@ -24,6 +25,7 @@ The core use case is enabling AI coding assistants (Claude Code, Cursor, Zed, et - No way to self-host for air-gapped or compliance-constrained environments. ### 2.2 The Gap + Teams with internal SDKs, private libraries, proprietary documentation, or a need for data sovereignty have no tooling that provides context7-equivalent LLM documentation retrieval. --- @@ -31,6 +33,7 @@ Teams with internal SDKs, private libraries, proprietary documentation, or a nee ## 3. Goals & Non-Goals ### Goals + - Replicate all context7 capabilities: library search, documentation retrieval, MCP tools (`resolve-library-id`, `query-docs`). - Support both GitHub-hosted and local filesystem repositories. - Provide a full indexing pipeline: crawl → parse → chunk → embed → store → query. @@ -42,6 +45,7 @@ Teams with internal SDKs, private libraries, proprietary documentation, or a nee - Self-hostable with minimal dependencies (SQLite-first, no external vector DB required). ### Non-Goals (v1) + - Authentication & authorization (deferred to a future version). - Skill generation (context7 CLI skill feature). - Multi-tenant SaaS mode. @@ -54,9 +58,11 @@ Teams with internal SDKs, private libraries, proprietary documentation, or a nee ## 4. Users & Personas ### Primary: The Developer / Tech Lead + Configures TrueRef, adds repositories, integrates the MCP server with their AI coding assistant. Technical, comfortable with CLI and config files. ### Secondary: The AI Coding Assistant + The "user" at query time. Calls `resolve-library-id` and `query-docs` via MCP to retrieve documentation snippets for code generation. --- @@ -100,25 +106,27 @@ The "user" at query time. Calls `resolve-library-id` and `query-docs` via MCP to ``` ### Technology Stack -| Layer | Technology | -|-------|-----------| -| Framework | SvelteKit (Node adapter) | -| Language | TypeScript | -| Database | SQLite via better-sqlite3 + drizzle-orm | -| Full-Text Search | SQLite FTS5 | -| Vector Search | SQLite `sqlite-vec` extension (cosine similarity) | -| Embeddings | Pluggable: local (transformers.js / ONNX) or OpenAI-compatible API | -| MCP Protocol | `@modelcontextprotocol/sdk` | -| HTTP | SvelteKit API routes + optional standalone MCP HTTP server | -| CSS | TailwindCSS v4 | -| Testing | Vitest | -| Linting | ESLint + Prettier | + +| Layer | Technology | +| ---------------- | ------------------------------------------------------------------ | +| Framework | SvelteKit (Node adapter) | +| Language | TypeScript | +| Database | SQLite via better-sqlite3 + drizzle-orm | +| Full-Text Search | SQLite FTS5 | +| Vector Search | SQLite `sqlite-vec` extension (cosine similarity) | +| Embeddings | Pluggable: local (transformers.js / ONNX) or OpenAI-compatible API | +| MCP Protocol | `@modelcontextprotocol/sdk` | +| HTTP | SvelteKit API routes + optional standalone MCP HTTP server | +| CSS | TailwindCSS v4 | +| Testing | Vitest | +| Linting | ESLint + Prettier | --- ## 6. Data Model ### 6.1 Repositories + A `Repository` is the top-level entity. It maps to a GitHub repo or local directory. ``` @@ -141,6 +149,7 @@ Repository { ``` ### 6.2 Repository Versions + ``` RepositoryVersion { id TEXT PRIMARY KEY @@ -153,6 +162,7 @@ RepositoryVersion { ``` ### 6.3 Documents (parsed files) + ``` Document { id TEXT PRIMARY KEY @@ -169,6 +179,7 @@ Document { ``` ### 6.4 Snippets (indexed chunks) + ``` Snippet { id TEXT PRIMARY KEY @@ -186,6 +197,7 @@ Snippet { ``` ### 6.5 Indexing Jobs + ``` IndexingJob { id TEXT PRIMARY KEY @@ -203,6 +215,7 @@ IndexingJob { ``` ### 6.6 Repository Configuration (`trueref.json`) + ``` RepositoryConfig { repositoryId TEXT FK → Repository @@ -221,15 +234,19 @@ RepositoryConfig { ## 7. Core Features ### F1: Repository Management + Add, remove, update, and list repositories. Support GitHub (public/private via token) and local filesystem sources. Trigger indexing on demand or on schedule. ### F2: GitHub Crawler + Fetch repository file trees via GitHub Trees API. Download file contents. Respect `trueref.json` include/exclude rules. Support rate limiting and incremental re-indexing (checksum-based). ### F3: Local Filesystem Crawler + Walk directory trees. Apply include/exclude rules from `trueref.json`. Watch for file changes (optional). ### F4: Document Parser & Chunker + - Parse Markdown files into sections (heading-based splitting). - Extract code blocks from Markdown. - Parse standalone code files into function/class-level chunks. @@ -237,16 +254,19 @@ Walk directory trees. Apply include/exclude rules from `trueref.json`. Watch for - Produce structured `Snippet` records (type: "code" or "info"). ### F5: Embedding & Vector Storage + - Generate embeddings for each snippet using a pluggable embeddings backend. - Store embeddings as binary blobs in SQLite (sqlite-vec). - Support fallback to FTS5-only search when no embedding provider is configured. ### F6: Semantic Search Engine + - Hybrid search: vector similarity + FTS5 keyword matching (BM25) with reciprocal rank fusion. - Query-time retrieval: given `libraryId + query`, return ranked snippets. - Library search: given `libraryName + query`, return matching repositories. ### F7: REST API (`/api/v1/*`) + - `GET /api/v1/libs/search?query=&libraryName=` — search libraries (context7-compatible) - `GET /api/v1/context?query=&libraryId=&type=json|txt` — fetch documentation - `GET /api/v1/libs` — list all indexed libraries @@ -256,12 +276,14 @@ Walk directory trees. Apply include/exclude rules from `trueref.json`. Watch for - `GET /api/v1/jobs/:id` — get indexing job status ### F8: MCP Server + - Tool: `resolve-library-id` — search for libraries by name - Tool: `query-docs` — fetch documentation by libraryId + query - Transport: stdio (primary), HTTP (optional) - Compatible with Claude Code, Cursor, and other MCP-aware tools ### F9: Web UI — Repository Dashboard + - List all repositories with status, snippet count, last indexed date - Add/remove repositories (GitHub URL or local path) - Trigger re-indexing @@ -269,23 +291,27 @@ Walk directory trees. Apply include/exclude rules from `trueref.json`. Watch for - View repository config (`trueref.json`) ### F10: Web UI — Search Explorer + - Interactive search interface (resolve library → query docs) - Preview snippets with syntax highlighting - View raw document content ### F11: `trueref.json` Config Support + - Parse `trueref.json` from repo root (or `context7.json` for compatibility) - Apply `folders`, `excludeFolders`, `excludeFiles` during crawling - Inject `rules` into LLM context alongside snippets - Support `previousVersions` for versioned documentation ### F12: Indexing Pipeline & Job Queue + - SQLite-backed job queue (no external message broker required) - Sequential processing with progress tracking - Error recovery and retry logic - Incremental re-indexing using file checksums ### F13: Version Support + - Index specific git tags/branches per repository - Serve version-specific context when libraryId includes version (`/owner/repo/v1.2.3`) - UI for managing available versions @@ -296,12 +322,13 @@ Walk directory trees. Apply include/exclude rules from `trueref.json`. Watch for TrueRef's REST API mirrors context7's `/api/v2/*` interface to allow drop-in compatibility: -| context7 Endpoint | TrueRef Endpoint | Notes | -|-------------------|-----------------|-------| -| `GET /api/v2/libs/search` | `GET /api/v1/libs/search` | Same query params | -| `GET /api/v2/context` | `GET /api/v1/context` | Same query params, same response shape | +| context7 Endpoint | TrueRef Endpoint | Notes | +| ------------------------- | ------------------------- | -------------------------------------- | +| `GET /api/v2/libs/search` | `GET /api/v1/libs/search` | Same query params | +| `GET /api/v2/context` | `GET /api/v1/context` | Same query params, same response shape | The MCP tool names and input schemas are identical: + - `resolve-library-id` with `libraryName` + `query` - `query-docs` with `libraryId` + `query` @@ -312,20 +339,24 @@ Library IDs follow the same convention: `/owner/repo` or `/owner/repo/version`. ## 9. Non-Functional Requirements ### Performance + - Library search: < 200ms p99 - Documentation retrieval: < 500ms p99 for 20 snippets - Indexing throughput: > 1,000 files/minute (GitHub API rate-limited) ### Reliability + - Failed indexing jobs must not corrupt existing indexed data - Atomic snippet replacement during re-indexing ### Portability + - Single SQLite file for all data - Runs on Linux, macOS, Windows (Node.js 20+) - No required external services beyond optional embedding API ### Scalability (v1 constraints) + - Designed for single-node deployment - SQLite suitable for up to ~500 repositories, ~500k snippets @@ -333,26 +364,26 @@ Library IDs follow the same convention: `/owner/repo` or `/owner/repo/version`. ## 10. Milestones & Feature Order -| ID | Feature | Priority | Depends On | -|----|---------|----------|-----------| -| TRUEREF-0001 | Database schema & core data models | P0 | — | -| TRUEREF-0002 | Repository management service & REST API | P0 | TRUEREF-0001 | -| TRUEREF-0003 | GitHub repository crawler | P0 | TRUEREF-0001 | -| TRUEREF-0004 | Local filesystem crawler | P1 | TRUEREF-0001 | -| TRUEREF-0005 | Document parser & chunker | P0 | TRUEREF-0001 | -| TRUEREF-0006 | SQLite FTS5 full-text search | P0 | TRUEREF-0005 | -| TRUEREF-0007 | Embedding generation & vector storage | P1 | TRUEREF-0005 | -| TRUEREF-0008 | Hybrid semantic search engine | P1 | TRUEREF-0006, TRUEREF-0007 | -| TRUEREF-0009 | Indexing pipeline & job queue | P0 | TRUEREF-0003, TRUEREF-0005 | -| TRUEREF-0010 | REST API (search + context endpoints) | P0 | TRUEREF-0006, TRUEREF-0009 | -| TRUEREF-0011 | MCP server (stdio transport) | P0 | TRUEREF-0010 | -| TRUEREF-0012 | MCP server (HTTP transport) | P1 | TRUEREF-0011 | -| TRUEREF-0013 | `trueref.json` config file support | P0 | TRUEREF-0003 | -| TRUEREF-0014 | Repository version management | P1 | TRUEREF-0003 | -| TRUEREF-0015 | Web UI — repository dashboard | P1 | TRUEREF-0002, TRUEREF-0009 | -| TRUEREF-0016 | Web UI — search explorer | P2 | TRUEREF-0010, TRUEREF-0015 | -| TRUEREF-0017 | Incremental re-indexing (checksum diff) | P1 | TRUEREF-0009 | -| TRUEREF-0018 | Embedding provider configuration UI | P2 | TRUEREF-0007, TRUEREF-0015 | +| ID | Feature | Priority | Depends On | +| ------------ | ---------------------------------------- | -------- | -------------------------- | +| TRUEREF-0001 | Database schema & core data models | P0 | — | +| TRUEREF-0002 | Repository management service & REST API | P0 | TRUEREF-0001 | +| TRUEREF-0003 | GitHub repository crawler | P0 | TRUEREF-0001 | +| TRUEREF-0004 | Local filesystem crawler | P1 | TRUEREF-0001 | +| TRUEREF-0005 | Document parser & chunker | P0 | TRUEREF-0001 | +| TRUEREF-0006 | SQLite FTS5 full-text search | P0 | TRUEREF-0005 | +| TRUEREF-0007 | Embedding generation & vector storage | P1 | TRUEREF-0005 | +| TRUEREF-0008 | Hybrid semantic search engine | P1 | TRUEREF-0006, TRUEREF-0007 | +| TRUEREF-0009 | Indexing pipeline & job queue | P0 | TRUEREF-0003, TRUEREF-0005 | +| TRUEREF-0010 | REST API (search + context endpoints) | P0 | TRUEREF-0006, TRUEREF-0009 | +| TRUEREF-0011 | MCP server (stdio transport) | P0 | TRUEREF-0010 | +| TRUEREF-0012 | MCP server (HTTP transport) | P1 | TRUEREF-0011 | +| TRUEREF-0013 | `trueref.json` config file support | P0 | TRUEREF-0003 | +| TRUEREF-0014 | Repository version management | P1 | TRUEREF-0003 | +| TRUEREF-0015 | Web UI — repository dashboard | P1 | TRUEREF-0002, TRUEREF-0009 | +| TRUEREF-0016 | Web UI — search explorer | P2 | TRUEREF-0010, TRUEREF-0015 | +| TRUEREF-0017 | Incremental re-indexing (checksum diff) | P1 | TRUEREF-0009 | +| TRUEREF-0018 | Embedding provider configuration UI | P2 | TRUEREF-0007, TRUEREF-0015 | --- diff --git a/docs/features/TRUEREF-0001.md b/docs/features/TRUEREF-0001.md index eae550b..112b163 100644 --- a/docs/features/TRUEREF-0001.md +++ b/docs/features/TRUEREF-0001.md @@ -31,24 +31,26 @@ Represents an indexed library source (GitHub repo or local directory). ```typescript export const repositories = sqliteTable('repositories', { - id: text('id').primaryKey(), // e.g. "/facebook/react" or "/local/my-sdk" - title: text('title').notNull(), - description: text('description'), - source: text('source', { enum: ['github', 'local'] }).notNull(), - sourceUrl: text('source_url').notNull(), // GitHub URL or absolute local path - branch: text('branch').default('main'), - state: text('state', { - enum: ['pending', 'indexing', 'indexed', 'error'] - }).notNull().default('pending'), - totalSnippets: integer('total_snippets').default(0), - totalTokens: integer('total_tokens').default(0), - trustScore: real('trust_score').default(0), // 0.0–10.0 - benchmarkScore: real('benchmark_score').default(0), // 0.0–100.0 - stars: integer('stars'), - githubToken: text('github_token'), // encrypted PAT for private repos - lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }), - createdAt: integer('created_at', { mode: 'timestamp' }).notNull(), - updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull(), + id: text('id').primaryKey(), // e.g. "/facebook/react" or "/local/my-sdk" + title: text('title').notNull(), + description: text('description'), + source: text('source', { enum: ['github', 'local'] }).notNull(), + sourceUrl: text('source_url').notNull(), // GitHub URL or absolute local path + branch: text('branch').default('main'), + state: text('state', { + enum: ['pending', 'indexing', 'indexed', 'error'] + }) + .notNull() + .default('pending'), + totalSnippets: integer('total_snippets').default(0), + totalTokens: integer('total_tokens').default(0), + trustScore: real('trust_score').default(0), // 0.0–10.0 + benchmarkScore: real('benchmark_score').default(0), // 0.0–100.0 + stars: integer('stars'), + githubToken: text('github_token'), // encrypted PAT for private repos + lastIndexedAt: integer('last_indexed_at', { mode: 'timestamp' }), + createdAt: integer('created_at', { mode: 'timestamp' }).notNull(), + updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull() }); ``` @@ -58,17 +60,20 @@ Tracks indexed git tags/branches beyond the default branch. ```typescript export const repositoryVersions = sqliteTable('repository_versions', { - id: text('id').primaryKey(), // e.g. "/facebook/react/v18.3.0" - repositoryId: text('repository_id').notNull() - .references(() => repositories.id, { onDelete: 'cascade' }), - tag: text('tag').notNull(), // git tag or branch name - title: text('title'), - state: text('state', { - enum: ['pending', 'indexing', 'indexed', 'error'] - }).notNull().default('pending'), - totalSnippets: integer('total_snippets').default(0), - indexedAt: integer('indexed_at', { mode: 'timestamp' }), - createdAt: integer('created_at', { mode: 'timestamp' }).notNull(), + id: text('id').primaryKey(), // e.g. "/facebook/react/v18.3.0" + repositoryId: text('repository_id') + .notNull() + .references(() => repositories.id, { onDelete: 'cascade' }), + tag: text('tag').notNull(), // git tag or branch name + title: text('title'), + state: text('state', { + enum: ['pending', 'indexing', 'indexed', 'error'] + }) + .notNull() + .default('pending'), + totalSnippets: integer('total_snippets').default(0), + indexedAt: integer('indexed_at', { mode: 'timestamp' }), + createdAt: integer('created_at', { mode: 'timestamp' }).notNull() }); ``` @@ -78,17 +83,17 @@ A parsed source file within a repository. ```typescript export const documents = sqliteTable('documents', { - id: text('id').primaryKey(), // UUID - repositoryId: text('repository_id').notNull() - .references(() => repositories.id, { onDelete: 'cascade' }), - versionId: text('version_id') - .references(() => repositoryVersions.id, { onDelete: 'cascade' }), - filePath: text('file_path').notNull(), // relative path within repo - title: text('title'), - language: text('language'), // e.g. "typescript", "markdown" - tokenCount: integer('token_count').default(0), - checksum: text('checksum').notNull(), // SHA-256 of file content - indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull(), + id: text('id').primaryKey(), // UUID + repositoryId: text('repository_id') + .notNull() + .references(() => repositories.id, { onDelete: 'cascade' }), + versionId: text('version_id').references(() => repositoryVersions.id, { onDelete: 'cascade' }), + filePath: text('file_path').notNull(), // relative path within repo + title: text('title'), + language: text('language'), // e.g. "typescript", "markdown" + tokenCount: integer('token_count').default(0), + checksum: text('checksum').notNull(), // SHA-256 of file content + indexedAt: integer('indexed_at', { mode: 'timestamp' }).notNull() }); ``` @@ -98,20 +103,21 @@ An indexed chunk of content, the atomic unit of search. ```typescript export const snippets = sqliteTable('snippets', { - id: text('id').primaryKey(), // UUID - documentId: text('document_id').notNull() - .references(() => documents.id, { onDelete: 'cascade' }), - repositoryId: text('repository_id').notNull() - .references(() => repositories.id, { onDelete: 'cascade' }), - versionId: text('version_id') - .references(() => repositoryVersions.id, { onDelete: 'cascade' }), - type: text('type', { enum: ['code', 'info'] }).notNull(), - title: text('title'), - content: text('content').notNull(), // searchable text / code - language: text('language'), - breadcrumb: text('breadcrumb'), // e.g. "Installation > Getting Started" - tokenCount: integer('token_count').default(0), - createdAt: integer('created_at', { mode: 'timestamp' }).notNull(), + id: text('id').primaryKey(), // UUID + documentId: text('document_id') + .notNull() + .references(() => documents.id, { onDelete: 'cascade' }), + repositoryId: text('repository_id') + .notNull() + .references(() => repositories.id, { onDelete: 'cascade' }), + versionId: text('version_id').references(() => repositoryVersions.id, { onDelete: 'cascade' }), + type: text('type', { enum: ['code', 'info'] }).notNull(), + title: text('title'), + content: text('content').notNull(), // searchable text / code + language: text('language'), + breadcrumb: text('breadcrumb'), // e.g. "Installation > Getting Started" + tokenCount: integer('token_count').default(0), + createdAt: integer('created_at', { mode: 'timestamp' }).notNull() }); ``` @@ -121,12 +127,13 @@ Stores vector embeddings separately to keep snippets table lean. ```typescript export const snippetEmbeddings = sqliteTable('snippet_embeddings', { - snippetId: text('snippet_id').primaryKey() - .references(() => snippets.id, { onDelete: 'cascade' }), - model: text('model').notNull(), // embedding model identifier - dimensions: integer('dimensions').notNull(), - embedding: blob('embedding').notNull(), // Float32Array as binary blob - createdAt: integer('created_at', { mode: 'timestamp' }).notNull(), + snippetId: text('snippet_id') + .primaryKey() + .references(() => snippets.id, { onDelete: 'cascade' }), + model: text('model').notNull(), // embedding model identifier + dimensions: integer('dimensions').notNull(), + embedding: blob('embedding').notNull(), // Float32Array as binary blob + createdAt: integer('created_at', { mode: 'timestamp' }).notNull() }); ``` @@ -136,20 +143,23 @@ Tracks asynchronous indexing operations. ```typescript export const indexingJobs = sqliteTable('indexing_jobs', { - id: text('id').primaryKey(), // UUID - repositoryId: text('repository_id').notNull() - .references(() => repositories.id, { onDelete: 'cascade' }), - versionId: text('version_id'), - status: text('status', { - enum: ['queued', 'running', 'done', 'failed'] - }).notNull().default('queued'), - progress: integer('progress').default(0), // 0–100 - totalFiles: integer('total_files').default(0), - processedFiles: integer('processed_files').default(0), - error: text('error'), - startedAt: integer('started_at', { mode: 'timestamp' }), - completedAt: integer('completed_at', { mode: 'timestamp' }), - createdAt: integer('created_at', { mode: 'timestamp' }).notNull(), + id: text('id').primaryKey(), // UUID + repositoryId: text('repository_id') + .notNull() + .references(() => repositories.id, { onDelete: 'cascade' }), + versionId: text('version_id'), + status: text('status', { + enum: ['queued', 'running', 'done', 'failed'] + }) + .notNull() + .default('queued'), + progress: integer('progress').default(0), // 0–100 + totalFiles: integer('total_files').default(0), + processedFiles: integer('processed_files').default(0), + error: text('error'), + startedAt: integer('started_at', { mode: 'timestamp' }), + completedAt: integer('completed_at', { mode: 'timestamp' }), + createdAt: integer('created_at', { mode: 'timestamp' }).notNull() }); ``` @@ -159,17 +169,19 @@ Stores parsed `trueref.json` / `context7.json` configuration. ```typescript export const repositoryConfigs = sqliteTable('repository_configs', { - repositoryId: text('repository_id').primaryKey() - .references(() => repositories.id, { onDelete: 'cascade' }), - projectTitle: text('project_title'), - description: text('description'), - folders: text('folders', { mode: 'json' }).$type(), - excludeFolders: text('exclude_folders', { mode: 'json' }).$type(), - excludeFiles: text('exclude_files', { mode: 'json' }).$type(), - rules: text('rules', { mode: 'json' }).$type(), - previousVersions: text('previous_versions', { mode: 'json' }) - .$type<{ tag: string; title: string }[]>(), - updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull(), + repositoryId: text('repository_id') + .primaryKey() + .references(() => repositories.id, { onDelete: 'cascade' }), + projectTitle: text('project_title'), + description: text('description'), + folders: text('folders', { mode: 'json' }).$type(), + excludeFolders: text('exclude_folders', { mode: 'json' }).$type(), + excludeFiles: text('exclude_files', { mode: 'json' }).$type(), + rules: text('rules', { mode: 'json' }).$type(), + previousVersions: text('previous_versions', { mode: 'json' }).$type< + { tag: string; title: string }[] + >(), + updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull() }); ``` @@ -179,9 +191,9 @@ Key-value store for global application settings. ```typescript export const settings = sqliteTable('settings', { - key: text('key').primaryKey(), - value: text('value', { mode: 'json' }), - updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull(), + key: text('key').primaryKey(), + value: text('value', { mode: 'json' }), + updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull() }); ``` diff --git a/docs/features/TRUEREF-0002.md b/docs/features/TRUEREF-0002.md index ca4c11d..0cd5533 100644 --- a/docs/features/TRUEREF-0002.md +++ b/docs/features/TRUEREF-0002.md @@ -31,10 +31,12 @@ Implement the core `RepositoryService` that handles CRUD operations for reposito ## Repository ID Generation GitHub repositories: + - Input URL: `https://github.com/facebook/react` or `github.com/facebook/react` - Generated ID: `/facebook/react` Local repositories: + - Input path: `/home/user/projects/my-sdk` - Generated ID: `/local/my-sdk` (basename of path, slugified) - Collision resolution: append `-2`, `-3`, etc. @@ -49,44 +51,44 @@ Version-specific IDs: `/facebook/react/v18.3.0` // src/lib/server/services/repository.service.ts export interface AddRepositoryInput { - source: 'github' | 'local'; - sourceUrl: string; // GitHub URL or absolute local path - title?: string; // override auto-detected title - description?: string; - branch?: string; // GitHub: default branch; Local: n/a - githubToken?: string; // for private GitHub repos + source: 'github' | 'local'; + sourceUrl: string; // GitHub URL or absolute local path + title?: string; // override auto-detected title + description?: string; + branch?: string; // GitHub: default branch; Local: n/a + githubToken?: string; // for private GitHub repos } export interface UpdateRepositoryInput { - title?: string; - description?: string; - branch?: string; - githubToken?: string; + title?: string; + description?: string; + branch?: string; + githubToken?: string; } export class RepositoryService { - constructor(private db: BetterSQLite3.Database) {} + constructor(private db: BetterSQLite3.Database) {} - async list(options?: { - state?: Repository['state']; - limit?: number; - offset?: number; - }): Promise + async list(options?: { + state?: Repository['state']; + limit?: number; + offset?: number; + }): Promise; - async get(id: string): Promise + async get(id: string): Promise; - async add(input: AddRepositoryInput): Promise + async add(input: AddRepositoryInput): Promise; - async update(id: string, input: UpdateRepositoryInput): Promise + async update(id: string, input: UpdateRepositoryInput): Promise; - async remove(id: string): Promise + async remove(id: string): Promise; - async getStats(id: string): Promise<{ - totalSnippets: number; - totalTokens: number; - totalDocuments: number; - lastIndexedAt: Date | null; - }> + async getStats(id: string): Promise<{ + totalSnippets: number; + totalTokens: number; + totalDocuments: number; + lastIndexedAt: Date | null; + }>; } ``` @@ -97,48 +99,52 @@ export class RepositoryService { ### `GET /api/v1/libs` Query parameters: + - `state` (optional): filter by state (`pending`, `indexed`, `error`, etc.) - `limit` (optional, default 50): max results - `offset` (optional, default 0): pagination offset Response `200`: + ```json { - "libraries": [ - { - "id": "/facebook/react", - "title": "React", - "description": "...", - "source": "github", - "state": "indexed", - "totalSnippets": 1234, - "totalTokens": 98000, - "trustScore": 8.5, - "stars": 228000, - "lastIndexedAt": "2026-03-22T10:00:00Z", - "versions": ["v18.3.0", "v17.0.2"] - } - ], - "total": 12, - "limit": 50, - "offset": 0 + "libraries": [ + { + "id": "/facebook/react", + "title": "React", + "description": "...", + "source": "github", + "state": "indexed", + "totalSnippets": 1234, + "totalTokens": 98000, + "trustScore": 8.5, + "stars": 228000, + "lastIndexedAt": "2026-03-22T10:00:00Z", + "versions": ["v18.3.0", "v17.0.2"] + } + ], + "total": 12, + "limit": 50, + "offset": 0 } ``` ### `POST /api/v1/libs` Request body: + ```json { - "source": "github", - "sourceUrl": "https://github.com/facebook/react", - "branch": "main", - "githubToken": "ghp_...", - "autoIndex": true + "source": "github", + "sourceUrl": "https://github.com/facebook/react", + "branch": "main", + "githubToken": "ghp_...", + "autoIndex": true } ``` Response `201`: + ```json { "library": { ...Repository }, @@ -149,6 +155,7 @@ Response `201`: `autoIndex: true` (default) immediately queues an indexing job. Response `409` if repository already exists: + ```json { "error": "Repository /facebook/react already exists" } ``` @@ -176,20 +183,22 @@ Response `404`: not found. Triggers a new indexing job. If a job is already running for this repo, returns the existing job. Request body (optional): + ```json { "version": "v18.3.0" } ``` Response `202`: + ```json { - "job": { - "id": "uuid", - "repositoryId": "/facebook/react", - "status": "queued", - "progress": 0, - "createdAt": "2026-03-22T10:00:00Z" - } + "job": { + "id": "uuid", + "repositoryId": "/facebook/react", + "status": "queued", + "progress": 0, + "createdAt": "2026-03-22T10:00:00Z" + } } ``` @@ -198,15 +207,17 @@ Response `202`: ## Error Response Shape All error responses follow: + ```json { - "error": "Human-readable message", - "code": "MACHINE_READABLE_CODE", - "details": {} + "error": "Human-readable message", + "code": "MACHINE_READABLE_CODE", + "details": {} } ``` Error codes: + - `NOT_FOUND` - `ALREADY_EXISTS` - `INVALID_INPUT` @@ -219,23 +230,23 @@ Error codes: ```typescript function resolveGitHubId(url: string): string { - // Parse owner/repo from URL variants: - // https://github.com/facebook/react - // https://github.com/facebook/react.git - // github.com/facebook/react - const match = url.match(/github\.com\/([^/]+)\/([^/\s.]+)/); - if (!match) throw new Error('Invalid GitHub URL'); - return `/${match[1]}/${match[2]}`; + // Parse owner/repo from URL variants: + // https://github.com/facebook/react + // https://github.com/facebook/react.git + // github.com/facebook/react + const match = url.match(/github\.com\/([^/]+)\/([^/\s.]+)/); + if (!match) throw new Error('Invalid GitHub URL'); + return `/${match[1]}/${match[2]}`; } function resolveLocalId(path: string, existingIds: string[]): string { - const base = slugify(path.split('/').at(-1)!); - let id = `/local/${base}`; - let counter = 2; - while (existingIds.includes(id)) { - id = `/local/${base}-${counter++}`; - } - return id; + const base = slugify(path.split('/').at(-1)!); + let id = `/local/${base}`; + let counter = 2; + while (existingIds.includes(id)) { + id = `/local/${base}-${counter++}`; + } + return id; } ``` diff --git a/docs/features/TRUEREF-0003.md b/docs/features/TRUEREF-0003.md index 39aad51..fdf778b 100644 --- a/docs/features/TRUEREF-0003.md +++ b/docs/features/TRUEREF-0003.md @@ -37,17 +37,46 @@ The crawler only downloads files with these extensions: ```typescript const INDEXABLE_EXTENSIONS = new Set([ - // Documentation - '.md', '.mdx', '.txt', '.rst', - // Code - '.ts', '.tsx', '.js', '.jsx', - '.py', '.rb', '.go', '.rs', '.java', '.cs', '.cpp', '.c', '.h', - '.swift', '.kt', '.php', '.scala', '.clj', '.ex', '.exs', - '.sh', '.bash', '.zsh', '.fish', - // Config / data - '.json', '.yaml', '.yml', '.toml', - // Web - '.html', '.css', '.svelte', '.vue', + // Documentation + '.md', + '.mdx', + '.txt', + '.rst', + // Code + '.ts', + '.tsx', + '.js', + '.jsx', + '.py', + '.rb', + '.go', + '.rs', + '.java', + '.cs', + '.cpp', + '.c', + '.h', + '.swift', + '.kt', + '.php', + '.scala', + '.clj', + '.ex', + '.exs', + '.sh', + '.bash', + '.zsh', + '.fish', + // Config / data + '.json', + '.yaml', + '.yml', + '.toml', + // Web + '.html', + '.css', + '.svelte', + '.vue' ]); const MAX_FILE_SIZE_BYTES = 500_000; // 500 KB — skip large generated files @@ -59,28 +88,28 @@ const MAX_FILE_SIZE_BYTES = 500_000; // 500 KB — skip large generated files ```typescript export interface CrawledFile { - path: string; // relative path within repo, e.g. "src/index.ts" - content: string; // UTF-8 file content - size: number; // bytes - sha: string; // GitHub blob SHA (used as checksum) - language: string; // detected from extension + path: string; // relative path within repo, e.g. "src/index.ts" + content: string; // UTF-8 file content + size: number; // bytes + sha: string; // GitHub blob SHA (used as checksum) + language: string; // detected from extension } export interface CrawlResult { - files: CrawledFile[]; - totalFiles: number; // files matching filters - skippedFiles: number; // filtered out or too large - branch: string; // branch/tag that was crawled - commitSha: string; // HEAD commit SHA + files: CrawledFile[]; + totalFiles: number; // files matching filters + skippedFiles: number; // filtered out or too large + branch: string; // branch/tag that was crawled + commitSha: string; // HEAD commit SHA } export interface CrawlOptions { - owner: string; - repo: string; - ref?: string; // branch, tag, or commit SHA; defaults to repo default branch - token?: string; // GitHub PAT for private repos - config?: RepoConfig; // parsed trueref.json - onProgress?: (processed: number, total: number) => void; + owner: string; + repo: string; + ref?: string; // branch, tag, or commit SHA; defaults to repo default branch + token?: string; // GitHub PAT for private repos + config?: RepoConfig; // parsed trueref.json + onProgress?: (processed: number, total: number) => void; } ``` @@ -89,12 +118,14 @@ export interface CrawlOptions { ## GitHub API Usage ### Step 1: Get default branch (if ref not specified) + ``` GET https://api.github.com/repos/{owner}/{repo} → { default_branch: "main", stargazers_count: 12345 } ``` ### Step 2: Fetch file tree (recursive) + ``` GET https://api.github.com/repos/{owner}/{repo}/git/trees/{ref}?recursive=1 → { @@ -109,12 +140,14 @@ GET https://api.github.com/repos/{owner}/{repo}/git/trees/{ref}?recursive=1 If `truncated: true`, the tree has >100k items. Use `--depth` pagination or filter top-level directories first. ### Step 3: Download file contents (parallel) + ``` GET https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref} → { content: "", encoding: "base64", size: 1234, sha: "abc123" } ``` Alternative for large repos: use raw content URL: + ``` GET https://raw.githubusercontent.com/{owner}/{repo}/{ref}/{path} ``` @@ -124,48 +157,47 @@ GET https://raw.githubusercontent.com/{owner}/{repo}/{ref}/{path} ## Filtering Logic ```typescript -function shouldIndexFile( - filePath: string, - fileSize: number, - config?: RepoConfig -): boolean { - const ext = path.extname(filePath).toLowerCase(); - const base = path.basename(filePath); +function shouldIndexFile(filePath: string, fileSize: number, config?: RepoConfig): boolean { + const ext = path.extname(filePath).toLowerCase(); + const base = path.basename(filePath); - // 1. Must have indexable extension - if (!INDEXABLE_EXTENSIONS.has(ext)) return false; + // 1. Must have indexable extension + if (!INDEXABLE_EXTENSIONS.has(ext)) return false; - // 2. Must not exceed size limit - if (fileSize > MAX_FILE_SIZE_BYTES) return false; + // 2. Must not exceed size limit + if (fileSize > MAX_FILE_SIZE_BYTES) return false; - // 3. Exclude lockfiles and other non-source artifacts - if (IGNORED_FILE_NAMES.has(base)) return false; + // 3. Exclude lockfiles and other non-source artifacts + if (IGNORED_FILE_NAMES.has(base)) return false; - // 4. Exclude minified and bundled assets - if (base.includes('.min.') || base.endsWith('.bundle.js') || base.endsWith('.bundle.css')) { - return false; - } + // 4. Exclude minified and bundled assets + if (base.includes('.min.') || base.endsWith('.bundle.js') || base.endsWith('.bundle.css')) { + return false; + } - // 5. Apply config excludeFiles (exact filename match) - if (config?.excludeFiles?.includes(base)) return false; + // 5. Apply config excludeFiles (exact filename match) + if (config?.excludeFiles?.includes(base)) return false; - // 6. Exclude common dependency/build/cache directories at any depth - if (isInIgnoredDirectory(filePath)) return false; + // 6. Exclude common dependency/build/cache directories at any depth + if (isInIgnoredDirectory(filePath)) return false; - // 7. Apply config excludeFolders (regex or prefix match) - if (config?.excludeFolders?.some(folder => - filePath.startsWith(folder) || new RegExp(folder).test(filePath) - )) return false; + // 7. Apply config excludeFolders (regex or prefix match) + if ( + config?.excludeFolders?.some( + (folder) => filePath.startsWith(folder) || new RegExp(folder).test(filePath) + ) + ) + return false; - // 8. Apply config folders allowlist (if specified, only index those paths) - if (config?.folders?.length) { - const inAllowedFolder = config.folders.some(folder => - filePath.startsWith(folder) || new RegExp(folder).test(filePath) - ); - if (!inAllowedFolder) return false; - } + // 8. Apply config folders allowlist (if specified, only index those paths) + if (config?.folders?.length) { + const inAllowedFolder = config.folders.some( + (folder) => filePath.startsWith(folder) || new RegExp(folder).test(filePath) + ); + if (!inAllowedFolder) return false; + } - return true; + return true; } ``` @@ -177,20 +209,20 @@ The shared ignored-directory list is intentionally broader than the original bas ```typescript class GitHubRateLimiter { - private remaining = 5000; - private resetAt = Date.now(); + private remaining = 5000; + private resetAt = Date.now(); - updateFromHeaders(headers: Headers): void { - this.remaining = parseInt(headers.get('X-RateLimit-Remaining') ?? '5000'); - this.resetAt = parseInt(headers.get('X-RateLimit-Reset') ?? '0') * 1000; - } + updateFromHeaders(headers: Headers): void { + this.remaining = parseInt(headers.get('X-RateLimit-Remaining') ?? '5000'); + this.resetAt = parseInt(headers.get('X-RateLimit-Reset') ?? '0') * 1000; + } - async waitIfNeeded(): Promise { - if (this.remaining <= 10) { - const waitMs = Math.max(0, this.resetAt - Date.now()) + 1000; - await sleep(waitMs); - } - } + async waitIfNeeded(): Promise { + if (this.remaining <= 10) { + const waitMs = Math.max(0, this.resetAt - Date.now()) + 1000; + await sleep(waitMs); + } + } } ``` @@ -200,14 +232,14 @@ Requests are made with a concurrency limit of 10 parallel downloads using a sema ## Error Handling -| Scenario | Behavior | -|----------|---------| -| 404 Not Found | Throw `RepositoryNotFoundError` | -| 401 Unauthorized | Throw `AuthenticationError` (invalid or missing token) | -| 403 Forbidden | If `X-RateLimit-Remaining: 0`, wait and retry; else throw `PermissionError` | -| 422 Unprocessable | Tree too large; switch to directory-by-directory traversal | -| Network error | Retry up to 3 times with exponential backoff | -| File content decode error | Skip file, log warning | +| Scenario | Behavior | +| ------------------------- | --------------------------------------------------------------------------- | +| 404 Not Found | Throw `RepositoryNotFoundError` | +| 401 Unauthorized | Throw `AuthenticationError` (invalid or missing token) | +| 403 Forbidden | If `X-RateLimit-Remaining: 0`, wait and retry; else throw `PermissionError` | +| 422 Unprocessable | Tree too large; switch to directory-by-directory traversal | +| Network error | Retry up to 3 times with exponential backoff | +| File content decode error | Skip file, log warning | --- diff --git a/docs/features/TRUEREF-0004.md b/docs/features/TRUEREF-0004.md index a63dada..993f41d 100644 --- a/docs/features/TRUEREF-0004.md +++ b/docs/features/TRUEREF-0004.md @@ -38,9 +38,9 @@ Reuses `CrawledFile` and `CrawlResult` from TRUEREF-0003 crawler types: ```typescript export interface LocalCrawlOptions { - rootPath: string; // absolute path to repository root - config?: RepoConfig; // parsed trueref.json - onProgress?: (processed: number, total: number) => void; + rootPath: string; // absolute path to repository root + config?: RepoConfig; // parsed trueref.json + onProgress?: (processed: number, total: number) => void; } ``` @@ -50,75 +50,73 @@ export interface LocalCrawlOptions { ```typescript export class LocalCrawler { - async crawl(options: LocalCrawlOptions): Promise { - // 1. Load root .gitignore if present - const gitignore = await this.loadGitignore(options.rootPath); + async crawl(options: LocalCrawlOptions): Promise { + // 1. Load root .gitignore if present + const gitignore = await this.loadGitignore(options.rootPath); - // 2. Enumerate files recursively, pruning ignored directories early - const allFiles = await this.walkDirectory(options.rootPath, '', gitignore); + // 2. Enumerate files recursively, pruning ignored directories early + const allFiles = await this.walkDirectory(options.rootPath, '', gitignore); - // 3. Look for trueref.json / context7.json first - const configFile = allFiles.find(f => - f === 'trueref.json' || f === 'context7.json' - ); - let config = options.config; - if (configFile && !config) { - config = await this.parseConfigFile( - path.join(options.rootPath, configFile) - ); - } + // 3. Look for trueref.json / context7.json first + const configFile = allFiles.find((f) => f === 'trueref.json' || f === 'context7.json'); + let config = options.config; + if (configFile && !config) { + config = await this.parseConfigFile(path.join(options.rootPath, configFile)); + } - // 4. Filter files - const filteredFiles = allFiles.filter(relPath => { - const stat = statCache.get(relPath); - return shouldIndexFile(relPath, stat.size, config); - }); + // 4. Filter files + const filteredFiles = allFiles.filter((relPath) => { + const stat = statCache.get(relPath); + return shouldIndexFile(relPath, stat.size, config); + }); - // 5. Read and return file contents - const crawledFiles: CrawledFile[] = []; - for (const [i, relPath] of filteredFiles.entries()) { - const absPath = path.join(options.rootPath, relPath); - const content = await fs.readFile(absPath, 'utf-8'); - const sha = computeSHA256(content); - crawledFiles.push({ - path: relPath, - content, - size: Buffer.byteLength(content, 'utf-8'), - sha, - language: detectLanguage(relPath), - }); - options.onProgress?.(i + 1, filteredFiles.length); - } + // 5. Read and return file contents + const crawledFiles: CrawledFile[] = []; + for (const [i, relPath] of filteredFiles.entries()) { + const absPath = path.join(options.rootPath, relPath); + const content = await fs.readFile(absPath, 'utf-8'); + const sha = computeSHA256(content); + crawledFiles.push({ + path: relPath, + content, + size: Buffer.byteLength(content, 'utf-8'), + sha, + language: detectLanguage(relPath) + }); + options.onProgress?.(i + 1, filteredFiles.length); + } - return { - files: crawledFiles, - totalFiles: filteredFiles.length, - skippedFiles: allFiles.length - filteredFiles.length, - branch: 'local', - commitSha: computeSHA256(crawledFiles.map(f => f.sha).join('')), - }; - } + return { + files: crawledFiles, + totalFiles: filteredFiles.length, + skippedFiles: allFiles.length - filteredFiles.length, + branch: 'local', + commitSha: computeSHA256(crawledFiles.map((f) => f.sha).join('')) + }; + } - private async walkDirectory(dir: string, rel = '', gitignore?: GitignoreFilter): Promise { - const entries = await fs.readdir(dir, { withFileTypes: true }); - const files: string[] = []; - for (const entry of entries) { - if (!entry.isFile() && !entry.isDirectory()) continue; // skip symlinks, devices - const relPath = rel ? `${rel}/${entry.name}` : entry.name; - if (entry.isDirectory()) { - if (shouldPruneDirectory(relPath) || gitignore?.isIgnored(relPath, true)) { - continue; - } - files.push(...await this.walkDirectory( - path.join(dir, entry.name), relPath, gitignore - )); - } else { - if (gitignore?.isIgnored(relPath, false)) continue; - files.push(relPath); - } - } - return files; - } + private async walkDirectory( + dir: string, + rel = '', + gitignore?: GitignoreFilter + ): Promise { + const entries = await fs.readdir(dir, { withFileTypes: true }); + const files: string[] = []; + for (const entry of entries) { + if (!entry.isFile() && !entry.isDirectory()) continue; // skip symlinks, devices + const relPath = rel ? `${rel}/${entry.name}` : entry.name; + if (entry.isDirectory()) { + if (shouldPruneDirectory(relPath) || gitignore?.isIgnored(relPath, true)) { + continue; + } + files.push(...(await this.walkDirectory(path.join(dir, entry.name), relPath, gitignore))); + } else { + if (gitignore?.isIgnored(relPath, false)) continue; + files.push(relPath); + } + } + return files; + } } ``` @@ -142,7 +140,7 @@ Directory pruning should happen during the walk so large dependency trees are ne import { createHash } from 'crypto'; function computeSHA256(content: string): string { - return createHash('sha256').update(content, 'utf-8').digest('hex'); + return createHash('sha256').update(content, 'utf-8').digest('hex'); } ``` diff --git a/docs/features/TRUEREF-0005.md b/docs/features/TRUEREF-0005.md index 438836f..fd981a2 100644 --- a/docs/features/TRUEREF-0005.md +++ b/docs/features/TRUEREF-0005.md @@ -30,19 +30,19 @@ Implement the document parsing and chunking pipeline that transforms raw file co ## Supported File Types -| Extension | Parser Strategy | -|-----------|----------------| -| `.md`, `.mdx` | Heading-based section splitting + code block extraction | -| `.txt`, `.rst` | Paragraph-based splitting | -| `.ts`, `.tsx`, `.js`, `.jsx` | AST-free: function/class boundary detection via regex | -| `.py` | `def`/`class` boundary detection | -| `.go` | `func`/`type` boundary detection | -| `.rs` | `fn`/`impl`/`struct` boundary detection | -| `.java`, `.cs`, `.kt`, `.swift` | Class/method boundary detection | -| `.rb` | `def`/`class` boundary detection | -| `.json`, `.yaml`, `.yml`, `.toml` | Structural chunking (top-level keys) | -| `.html`, `.svelte`, `.vue` | Text content extraction + script block splitting | -| Other code | Line-count-based sliding window (200 lines per chunk) | +| Extension | Parser Strategy | +| --------------------------------- | ------------------------------------------------------- | +| `.md`, `.mdx` | Heading-based section splitting + code block extraction | +| `.txt`, `.rst` | Paragraph-based splitting | +| `.ts`, `.tsx`, `.js`, `.jsx` | AST-free: function/class boundary detection via regex | +| `.py` | `def`/`class` boundary detection | +| `.go` | `func`/`type` boundary detection | +| `.rs` | `fn`/`impl`/`struct` boundary detection | +| `.java`, `.cs`, `.kt`, `.swift` | Class/method boundary detection | +| `.rb` | `def`/`class` boundary detection | +| `.json`, `.yaml`, `.yml`, `.toml` | Structural chunking (top-level keys) | +| `.html`, `.svelte`, `.vue` | Text content extraction + script block splitting | +| Other code | Line-count-based sliding window (200 lines per chunk) | --- @@ -52,9 +52,9 @@ Use a simple character-based approximation (no tokenizer library needed for v1): ```typescript function estimateTokens(text: string): number { - // Empirically: ~4 chars per token for English prose - // ~3 chars per token for code (more symbols) - return Math.ceil(text.length / 3.5); + // Empirically: ~4 chars per token for English prose + // ~3 chars per token for code (more symbols) + return Math.ceil(text.length / 3.5); } ``` @@ -74,49 +74,49 @@ The Markdown parser is the most important parser as most documentation is Markdo ```typescript interface MarkdownSection { - headings: string[]; // heading stack at this point - content: string; // text content (sans code blocks) - codeBlocks: { language: string; code: string }[]; + headings: string[]; // heading stack at this point + content: string; // text content (sans code blocks) + codeBlocks: { language: string; code: string }[]; } function parseMarkdown(content: string, filePath: string): Snippet[] { - const sections = splitIntoSections(content); - const snippets: Snippet[] = []; + const sections = splitIntoSections(content); + const snippets: Snippet[] = []; - for (const section of sections) { - const breadcrumb = section.headings.join(' > '); - const title = section.headings.at(-1) ?? path.basename(filePath); + for (const section of sections) { + const breadcrumb = section.headings.join(' > '); + const title = section.headings.at(-1) ?? path.basename(filePath); - // Emit info snippet for text content - if (section.content.trim().length >= 20) { - const chunks = chunkText(section.content, MAX_TOKENS, OVERLAP_TOKENS); - for (const chunk of chunks) { - snippets.push({ - type: 'info', - title, - content: chunk, - breadcrumb, - tokenCount: estimateTokens(chunk), - }); - } - } + // Emit info snippet for text content + if (section.content.trim().length >= 20) { + const chunks = chunkText(section.content, MAX_TOKENS, OVERLAP_TOKENS); + for (const chunk of chunks) { + snippets.push({ + type: 'info', + title, + content: chunk, + breadcrumb, + tokenCount: estimateTokens(chunk) + }); + } + } - // Emit code snippets for each code block - for (const block of section.codeBlocks) { - if (block.code.trim().length >= 20) { - snippets.push({ - type: 'code', - title, - content: block.code, - language: block.language || detectLanguage('.' + block.language), - breadcrumb, - tokenCount: estimateTokens(block.code), - }); - } - } - } + // Emit code snippets for each code block + for (const block of section.codeBlocks) { + if (block.code.trim().length >= 20) { + snippets.push({ + type: 'code', + title, + content: block.code, + language: block.language || detectLanguage('.' + block.language), + breadcrumb, + tokenCount: estimateTokens(block.code) + }); + } + } + } - return snippets; + return snippets; } ``` @@ -135,43 +135,41 @@ For non-Markdown code files, use regex-based function/class boundary detection. ```typescript const BOUNDARY_PATTERNS: Record = { - typescript: /^(export\s+)?(async\s+)?(function|class|interface|type|const|let|var)\s+\w+/m, - python: /^(async\s+)?(def|class)\s+\w+/m, - go: /^(func|type|var|const)\s+\w+/m, - rust: /^(pub\s+)?(fn|impl|struct|enum|trait)\s+\w+/m, - java: /^(public|private|protected|static).*?(class|interface|enum|void|\w+)\s+\w+\s*[({]/m, + typescript: /^(export\s+)?(async\s+)?(function|class|interface|type|const|let|var)\s+\w+/m, + python: /^(async\s+)?(def|class)\s+\w+/m, + go: /^(func|type|var|const)\s+\w+/m, + rust: /^(pub\s+)?(fn|impl|struct|enum|trait)\s+\w+/m, + java: /^(public|private|protected|static).*?(class|interface|enum|void|\w+)\s+\w+\s*[({]/m }; -function parseCodeFile( - content: string, - filePath: string, - language: string -): Snippet[] { - const pattern = BOUNDARY_PATTERNS[language]; - const breadcrumb = filePath; - const title = path.basename(filePath); +function parseCodeFile(content: string, filePath: string, language: string): Snippet[] { + const pattern = BOUNDARY_PATTERNS[language]; + const breadcrumb = filePath; + const title = path.basename(filePath); - if (!pattern) { - // Fallback: sliding window - return slidingWindowChunks(content, filePath, language); - } + if (!pattern) { + // Fallback: sliding window + return slidingWindowChunks(content, filePath, language); + } - const chunks = splitAtBoundaries(content, pattern); - return chunks - .filter(chunk => chunk.trim().length >= 20) - .flatMap(chunk => { - if (estimateTokens(chunk) <= MAX_TOKENS) { - return [{ - type: 'code' as const, - title, - content: chunk, - language, - breadcrumb, - tokenCount: estimateTokens(chunk), - }]; - } - return slidingWindowChunks(chunk, filePath, language); - }); + const chunks = splitAtBoundaries(content, pattern); + return chunks + .filter((chunk) => chunk.trim().length >= 20) + .flatMap((chunk) => { + if (estimateTokens(chunk) <= MAX_TOKENS) { + return [ + { + type: 'code' as const, + title, + content: chunk, + language, + breadcrumb, + tokenCount: estimateTokens(chunk) + } + ]; + } + return slidingWindowChunks(chunk, filePath, language); + }); } ``` @@ -188,27 +186,23 @@ const MIN_CONTENT_LENGTH = 20; // characters ### Sliding Window Chunker ```typescript -function chunkText( - text: string, - maxTokens: number, - overlapTokens: number -): string[] { - const words = text.split(/\s+/); - const wordsPerToken = 0.75; // ~0.75 words per token - const maxWords = Math.floor(maxTokens * wordsPerToken); - const overlapWords = Math.floor(overlapTokens * wordsPerToken); +function chunkText(text: string, maxTokens: number, overlapTokens: number): string[] { + const words = text.split(/\s+/); + const wordsPerToken = 0.75; // ~0.75 words per token + const maxWords = Math.floor(maxTokens * wordsPerToken); + const overlapWords = Math.floor(overlapTokens * wordsPerToken); - const chunks: string[] = []; - let start = 0; + const chunks: string[] = []; + let start = 0; - while (start < words.length) { - const end = Math.min(start + maxWords, words.length); - chunks.push(words.slice(start, end).join(' ')); - if (end === words.length) break; - start = end - overlapWords; - } + while (start < words.length) { + const end = Math.min(start + maxWords, words.length); + chunks.push(words.slice(start, end).join(' ')); + if (end === words.length) break; + start = end - overlapWords; + } - return chunks; + return chunks; } ``` @@ -218,34 +212,42 @@ function chunkText( ```typescript const LANGUAGE_MAP: Record = { - '.ts': 'typescript', '.tsx': 'typescript', - '.js': 'javascript', '.jsx': 'javascript', - '.py': 'python', - '.rb': 'ruby', - '.go': 'go', - '.rs': 'rust', - '.java': 'java', - '.cs': 'csharp', - '.cpp': 'cpp', '.c': 'c', '.h': 'c', - '.swift': 'swift', - '.kt': 'kotlin', - '.php': 'php', - '.scala': 'scala', - '.sh': 'bash', '.bash': 'bash', '.zsh': 'bash', - '.md': 'markdown', '.mdx': 'markdown', - '.json': 'json', - '.yaml': 'yaml', '.yml': 'yaml', - '.toml': 'toml', - '.html': 'html', - '.css': 'css', - '.svelte': 'svelte', - '.vue': 'vue', - '.sql': 'sql', + '.ts': 'typescript', + '.tsx': 'typescript', + '.js': 'javascript', + '.jsx': 'javascript', + '.py': 'python', + '.rb': 'ruby', + '.go': 'go', + '.rs': 'rust', + '.java': 'java', + '.cs': 'csharp', + '.cpp': 'cpp', + '.c': 'c', + '.h': 'c', + '.swift': 'swift', + '.kt': 'kotlin', + '.php': 'php', + '.scala': 'scala', + '.sh': 'bash', + '.bash': 'bash', + '.zsh': 'bash', + '.md': 'markdown', + '.mdx': 'markdown', + '.json': 'json', + '.yaml': 'yaml', + '.yml': 'yaml', + '.toml': 'toml', + '.html': 'html', + '.css': 'css', + '.svelte': 'svelte', + '.vue': 'vue', + '.sql': 'sql' }; function detectLanguage(filePath: string): string { - const ext = path.extname(filePath).toLowerCase(); - return LANGUAGE_MAP[ext] ?? 'text'; + const ext = path.extname(filePath).toLowerCase(); + return LANGUAGE_MAP[ext] ?? 'text'; } ``` @@ -255,32 +257,32 @@ function detectLanguage(filePath: string): string { ```typescript export interface ParseOptions { - repositoryId: string; - documentId: string; - versionId?: string; + repositoryId: string; + documentId: string; + versionId?: string; } -export function parseFile( - file: CrawledFile, - options: ParseOptions -): NewSnippet[] { - const language = detectLanguage(file.path); - let rawSnippets: Omit[]; +export function parseFile(file: CrawledFile, options: ParseOptions): NewSnippet[] { + const language = detectLanguage(file.path); + let rawSnippets: Omit< + NewSnippet, + 'id' | 'repositoryId' | 'documentId' | 'versionId' | 'createdAt' + >[]; - if (language === 'markdown') { - rawSnippets = parseMarkdown(file.content, file.path); - } else { - rawSnippets = parseCodeFile(file.content, file.path, language); - } + if (language === 'markdown') { + rawSnippets = parseMarkdown(file.content, file.path); + } else { + rawSnippets = parseCodeFile(file.content, file.path, language); + } - return rawSnippets.map(s => ({ - ...s, - id: crypto.randomUUID(), - repositoryId: options.repositoryId, - documentId: options.documentId, - versionId: options.versionId ?? null, - createdAt: new Date(), - })); + return rawSnippets.map((s) => ({ + ...s, + id: crypto.randomUUID(), + repositoryId: options.repositoryId, + documentId: options.documentId, + versionId: options.versionId ?? null, + createdAt: new Date() + })); } ``` diff --git a/docs/features/TRUEREF-0006.md b/docs/features/TRUEREF-0006.md index cf32a1f..779602d 100644 --- a/docs/features/TRUEREF-0006.md +++ b/docs/features/TRUEREF-0006.md @@ -33,42 +33,37 @@ Implement the full-text search engine using SQLite's built-in FTS5 extension. Th // src/lib/server/search/search.service.ts export interface SnippetSearchOptions { - repositoryId: string; - versionId?: string; - type?: 'code' | 'info'; - limit?: number; // default: 20 - offset?: number; // default: 0 + repositoryId: string; + versionId?: string; + type?: 'code' | 'info'; + limit?: number; // default: 20 + offset?: number; // default: 0 } export interface SnippetSearchResult { - snippet: Snippet; - score: number; // BM25 rank (negative, lower = better) - repository: Pick; + snippet: Snippet; + score: number; // BM25 rank (negative, lower = better) + repository: Pick; } export interface LibrarySearchOptions { - libraryName: string; - query?: string; // semantic relevance hint - limit?: number; // default: 10 + libraryName: string; + query?: string; // semantic relevance hint + limit?: number; // default: 10 } export interface LibrarySearchResult { - repository: Repository; - versions: RepositoryVersion[]; - score: number; // composite relevance score + repository: Repository; + versions: RepositoryVersion[]; + score: number; // composite relevance score } export class SearchService { - constructor(private db: BetterSQLite3.Database) {} + constructor(private db: BetterSQLite3.Database) {} - searchSnippets( - query: string, - options: SnippetSearchOptions - ): SnippetSearchResult[] + searchSnippets(query: string, options: SnippetSearchOptions): SnippetSearchResult[]; - searchRepositories( - options: LibrarySearchOptions - ): LibrarySearchResult[] + searchRepositories(options: LibrarySearchOptions): LibrarySearchResult[]; } ``` @@ -101,21 +96,21 @@ The FTS5 MATCH query uses the porter stemmer and unicode61 tokenizer (configured ```typescript function preprocessQuery(raw: string): string { - // 1. Trim and normalize whitespace - let q = raw.trim().replace(/\s+/g, ' '); + // 1. Trim and normalize whitespace + let q = raw.trim().replace(/\s+/g, ' '); - // 2. Escape FTS5 special characters that aren't intended as operators - // Keep: * (prefix), " " (phrase), AND, OR, NOT - q = q.replace(/[()]/g, ' '); + // 2. Escape FTS5 special characters that aren't intended as operators + // Keep: * (prefix), " " (phrase), AND, OR, NOT + q = q.replace(/[()]/g, ' '); - // 3. Add prefix wildcard to last token for "typing as you go" feel - const tokens = q.split(' '); - const lastToken = tokens.at(-1) ?? ''; - if (lastToken.length >= 3 && !lastToken.endsWith('*')) { - tokens[tokens.length - 1] = lastToken + '*'; - } + // 3. Add prefix wildcard to last token for "typing as you go" feel + const tokens = q.split(' '); + const lastToken = tokens.at(-1) ?? ''; + if (lastToken.length >= 3 && !lastToken.endsWith('*')) { + tokens[tokens.length - 1] = lastToken + '*'; + } - return tokens.join(' '); + return tokens.join(' '); } ``` @@ -174,56 +169,65 @@ searchRepositories(options: LibrarySearchOptions): LibrarySearchResult[] { The search results must be formatted for the REST API and MCP tool responses: ### Library search response (for `resolve-library-id`): + ```typescript function formatLibraryResults(results: LibrarySearchResult[]): string { - if (results.length === 0) { - return 'No libraries found matching your search.'; - } + if (results.length === 0) { + return 'No libraries found matching your search.'; + } - return results.map((r, i) => { - const repo = r.repository; - const versions = r.versions.map(v => v.tag).join(', ') || 'default branch'; - return [ - `${i + 1}. ${repo.title}`, - ` Library ID: ${repo.id}`, - ` Description: ${repo.description ?? 'No description'}`, - ` Snippets: ${repo.totalSnippets} | Trust Score: ${repo.trustScore.toFixed(1)}/10`, - ` Available Versions: ${versions}`, - ].join('\n'); - }).join('\n\n'); + return results + .map((r, i) => { + const repo = r.repository; + const versions = r.versions.map((v) => v.tag).join(', ') || 'default branch'; + return [ + `${i + 1}. ${repo.title}`, + ` Library ID: ${repo.id}`, + ` Description: ${repo.description ?? 'No description'}`, + ` Snippets: ${repo.totalSnippets} | Trust Score: ${repo.trustScore.toFixed(1)}/10`, + ` Available Versions: ${versions}` + ].join('\n'); + }) + .join('\n\n'); } ``` ### Snippet search response (for `query-docs`): + ```typescript -function formatSnippetResults( - results: SnippetSearchResult[], - rules?: string[] -): string { - const parts: string[] = []; +function formatSnippetResults(results: SnippetSearchResult[], rules?: string[]): string { + const parts: string[] = []; - // Prepend repository rules if present - if (rules?.length) { - parts.push('## Library Rules\n' + rules.map(r => `- ${r}`).join('\n')); - } + // Prepend repository rules if present + if (rules?.length) { + parts.push('## Library Rules\n' + rules.map((r) => `- ${r}`).join('\n')); + } - for (const { snippet } of results) { - if (snippet.type === 'code') { - parts.push([ - snippet.title ? `### ${snippet.title}` : '', - snippet.breadcrumb ? `*${snippet.breadcrumb}*` : '', - `\`\`\`${snippet.language ?? ''}\n${snippet.content}\n\`\`\``, - ].filter(Boolean).join('\n')); - } else { - parts.push([ - snippet.title ? `### ${snippet.title}` : '', - snippet.breadcrumb ? `*${snippet.breadcrumb}*` : '', - snippet.content, - ].filter(Boolean).join('\n')); - } - } + for (const { snippet } of results) { + if (snippet.type === 'code') { + parts.push( + [ + snippet.title ? `### ${snippet.title}` : '', + snippet.breadcrumb ? `*${snippet.breadcrumb}*` : '', + `\`\`\`${snippet.language ?? ''}\n${snippet.content}\n\`\`\`` + ] + .filter(Boolean) + .join('\n') + ); + } else { + parts.push( + [ + snippet.title ? `### ${snippet.title}` : '', + snippet.breadcrumb ? `*${snippet.breadcrumb}*` : '', + snippet.content + ] + .filter(Boolean) + .join('\n') + ); + } + } - return parts.join('\n\n---\n\n'); + return parts.join('\n\n---\n\n'); } ``` @@ -235,26 +239,26 @@ Compute `trustScore` (0–10) when a repository is first indexed: ```typescript function computeTrustScore(repo: Repository): number { - let score = 0; + let score = 0; - // Stars (up to 4 points): log scale, 10k stars = 4 pts - if (repo.stars) { - score += Math.min(4, Math.log10(repo.stars + 1)); - } + // Stars (up to 4 points): log scale, 10k stars = 4 pts + if (repo.stars) { + score += Math.min(4, Math.log10(repo.stars + 1)); + } - // Documentation coverage (up to 3 points) - score += Math.min(3, repo.totalSnippets / 500); + // Documentation coverage (up to 3 points) + score += Math.min(3, repo.totalSnippets / 500); - // Source type (1 point for GitHub, 0 for local) - if (repo.source === 'github') score += 1; + // Source type (1 point for GitHub, 0 for local) + if (repo.source === 'github') score += 1; - // Successful indexing (1 point) - if (repo.state === 'indexed') score += 1; + // Successful indexing (1 point) + if (repo.state === 'indexed') score += 1; - // Has description (1 point) - if (repo.description) score += 1; + // Has description (1 point) + if (repo.description) score += 1; - return Math.min(10, parseFloat(score.toFixed(1))); + return Math.min(10, parseFloat(score.toFixed(1))); } ``` diff --git a/docs/features/TRUEREF-0007.md b/docs/features/TRUEREF-0007.md index 0304e09..4ca2f20 100644 --- a/docs/features/TRUEREF-0007.md +++ b/docs/features/TRUEREF-0007.md @@ -34,18 +34,18 @@ Implement a pluggable embedding generation system that produces vector represent // src/lib/server/embeddings/provider.ts export interface EmbeddingVector { - values: Float32Array; - dimensions: number; - model: string; + values: Float32Array; + dimensions: number; + model: string; } export interface EmbeddingProvider { - readonly name: string; - readonly dimensions: number; - readonly model: string; + readonly name: string; + readonly dimensions: number; + readonly model: string; - embed(texts: string[]): Promise; - isAvailable(): Promise; + embed(texts: string[]): Promise; + isAvailable(): Promise; } ``` @@ -55,51 +55,51 @@ export interface EmbeddingProvider { ```typescript export interface OpenAIProviderConfig { - baseUrl: string; // e.g. "https://api.openai.com/v1" or "http://localhost:11434/v1" - apiKey: string; - model: string; // e.g. "text-embedding-3-small", "nomic-embed-text" - dimensions?: number; // override for models that support it (e.g. text-embedding-3-small) - maxBatchSize?: number; // default: 100 + baseUrl: string; // e.g. "https://api.openai.com/v1" or "http://localhost:11434/v1" + apiKey: string; + model: string; // e.g. "text-embedding-3-small", "nomic-embed-text" + dimensions?: number; // override for models that support it (e.g. text-embedding-3-small) + maxBatchSize?: number; // default: 100 } export class OpenAIEmbeddingProvider implements EmbeddingProvider { - constructor(private config: OpenAIProviderConfig) {} + constructor(private config: OpenAIProviderConfig) {} - async embed(texts: string[]): Promise { - // Batch into groups of maxBatchSize - const batches = chunk(texts, this.config.maxBatchSize ?? 100); - const allEmbeddings: EmbeddingVector[] = []; + async embed(texts: string[]): Promise { + // Batch into groups of maxBatchSize + const batches = chunk(texts, this.config.maxBatchSize ?? 100); + const allEmbeddings: EmbeddingVector[] = []; - for (const batch of batches) { - const response = await fetch(`${this.config.baseUrl}/embeddings`, { - method: 'POST', - headers: { - 'Authorization': `Bearer ${this.config.apiKey}`, - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ - model: this.config.model, - input: batch, - dimensions: this.config.dimensions, - }), - }); + for (const batch of batches) { + const response = await fetch(`${this.config.baseUrl}/embeddings`, { + method: 'POST', + headers: { + Authorization: `Bearer ${this.config.apiKey}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + model: this.config.model, + input: batch, + dimensions: this.config.dimensions + }) + }); - if (!response.ok) { - throw new EmbeddingError(`API error: ${response.status}`); - } + if (!response.ok) { + throw new EmbeddingError(`API error: ${response.status}`); + } - const data = await response.json(); - for (const item of data.data) { - allEmbeddings.push({ - values: new Float32Array(item.embedding), - dimensions: item.embedding.length, - model: this.config.model, - }); - } - } + const data = await response.json(); + for (const item of data.data) { + allEmbeddings.push({ + values: new Float32Array(item.embedding), + dimensions: item.embedding.length, + model: this.config.model + }); + } + } - return allEmbeddings; - } + return allEmbeddings; + } } ``` @@ -110,41 +110,41 @@ export class OpenAIEmbeddingProvider implements EmbeddingProvider { ```typescript // Uses @xenova/transformers — only loaded if installed export class LocalEmbeddingProvider implements EmbeddingProvider { - private pipeline: unknown = null; + private pipeline: unknown = null; - readonly name = 'local'; - readonly model = 'Xenova/all-MiniLM-L6-v2'; // 384-dim, fast, small - readonly dimensions = 384; + readonly name = 'local'; + readonly model = 'Xenova/all-MiniLM-L6-v2'; // 384-dim, fast, small + readonly dimensions = 384; - async embed(texts: string[]): Promise { - if (!this.pipeline) { - const { pipeline } = await import('@xenova/transformers'); - this.pipeline = await pipeline('feature-extraction', this.model); - } + async embed(texts: string[]): Promise { + if (!this.pipeline) { + const { pipeline } = await import('@xenova/transformers'); + this.pipeline = await pipeline('feature-extraction', this.model); + } - const results: EmbeddingVector[] = []; - for (const text of texts) { - const output = await (this.pipeline as Function)(text, { - pooling: 'mean', - normalize: true, - }); - results.push({ - values: new Float32Array(output.data), - dimensions: this.dimensions, - model: this.model, - }); - } - return results; - } + const results: EmbeddingVector[] = []; + for (const text of texts) { + const output = await (this.pipeline as Function)(text, { + pooling: 'mean', + normalize: true + }); + results.push({ + values: new Float32Array(output.data), + dimensions: this.dimensions, + model: this.model + }); + } + return results; + } - async isAvailable(): Promise { - try { - await import('@xenova/transformers'); - return true; - } catch { - return false; - } - } + async isAvailable(): Promise { + try { + await import('@xenova/transformers'); + return true; + } catch { + return false; + } + } } ``` @@ -154,53 +154,55 @@ export class LocalEmbeddingProvider implements EmbeddingProvider { ```typescript export class EmbeddingService { - constructor( - private db: BetterSQLite3.Database, - private provider: EmbeddingProvider - ) {} + constructor( + private db: BetterSQLite3.Database, + private provider: EmbeddingProvider + ) {} - async embedSnippets( - snippetIds: string[], - onProgress?: (done: number, total: number) => void - ): Promise { - const snippets = this.db.prepare( - `SELECT id, content, type FROM snippets WHERE id IN (${snippetIds.map(() => '?').join(',')})` - ).all(...snippetIds) as Snippet[]; + async embedSnippets( + snippetIds: string[], + onProgress?: (done: number, total: number) => void + ): Promise { + const snippets = this.db + .prepare( + `SELECT id, content, type FROM snippets WHERE id IN (${snippetIds.map(() => '?').join(',')})` + ) + .all(...snippetIds) as Snippet[]; - // Prepare text for embedding: combine title + content - const texts = snippets.map(s => - [s.title, s.breadcrumb, s.content].filter(Boolean).join('\n').slice(0, 2048) - ); + // Prepare text for embedding: combine title + content + const texts = snippets.map((s) => + [s.title, s.breadcrumb, s.content].filter(Boolean).join('\n').slice(0, 2048) + ); - const BATCH_SIZE = 50; - const insert = this.db.prepare(` + const BATCH_SIZE = 50; + const insert = this.db.prepare(` INSERT OR REPLACE INTO snippet_embeddings (snippet_id, model, dimensions, embedding, created_at) VALUES (?, ?, ?, ?, unixepoch()) `); - for (let i = 0; i < snippets.length; i += BATCH_SIZE) { - const batch = snippets.slice(i, i + BATCH_SIZE); - const batchTexts = texts.slice(i, i + BATCH_SIZE); + for (let i = 0; i < snippets.length; i += BATCH_SIZE) { + const batch = snippets.slice(i, i + BATCH_SIZE); + const batchTexts = texts.slice(i, i + BATCH_SIZE); - const embeddings = await this.provider.embed(batchTexts); + const embeddings = await this.provider.embed(batchTexts); - const insertMany = this.db.transaction(() => { - for (let j = 0; j < batch.length; j++) { - const snippet = batch[j]; - const embedding = embeddings[j]; - insert.run( - snippet.id, - embedding.model, - embedding.dimensions, - Buffer.from(embedding.values.buffer) - ); - } - }); - insertMany(); + const insertMany = this.db.transaction(() => { + for (let j = 0; j < batch.length; j++) { + const snippet = batch[j]; + const embedding = embeddings[j]; + insert.run( + snippet.id, + embedding.model, + embedding.dimensions, + Buffer.from(embedding.values.buffer) + ); + } + }); + insertMany(); - onProgress?.(Math.min(i + BATCH_SIZE, snippets.length), snippets.length); - } - } + onProgress?.(Math.min(i + BATCH_SIZE, snippets.length), snippets.length); + } + } } ``` @@ -212,13 +214,13 @@ Stored in the `settings` table as JSON: ```typescript export interface EmbeddingConfig { - provider: 'openai' | 'local' | 'none'; - openai?: { - baseUrl: string; - apiKey: string; - model: string; - dimensions?: number; - }; + provider: 'openai' | 'local' | 'none'; + openai?: { + baseUrl: string; + apiKey: string; + model: string; + dimensions?: number; + }; } // Settings key: 'embedding_config' @@ -227,14 +229,15 @@ export interface EmbeddingConfig { ### API Endpoints `GET /api/v1/settings/embedding` + ```json { - "provider": "openai", - "openai": { - "baseUrl": "https://api.openai.com/v1", - "model": "text-embedding-3-small", - "dimensions": 1536 - } + "provider": "openai", + "openai": { + "baseUrl": "https://api.openai.com/v1", + "model": "text-embedding-3-small", + "dimensions": 1536 + } } ``` @@ -251,11 +254,7 @@ Embeddings are stored as raw `Float32Array` binary blobs: const buffer = Buffer.from(float32Array.buffer); // Retrieve -const float32Array = new Float32Array( - buffer.buffer, - buffer.byteOffset, - buffer.byteLength / 4 -); +const float32Array = new Float32Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 4); ``` --- diff --git a/docs/features/TRUEREF-0008.md b/docs/features/TRUEREF-0008.md index 3288802..1566f5b 100644 --- a/docs/features/TRUEREF-0008.md +++ b/docs/features/TRUEREF-0008.md @@ -102,21 +102,21 @@ async vectorSearch( ```typescript function reciprocalRankFusion( - ...rankings: Array> + ...rankings: Array> ): Array<{ id: string; rrfScore: number }> { - const K = 60; // RRF constant (standard value) - const scores = new Map(); + const K = 60; // RRF constant (standard value) + const scores = new Map(); - for (const ranking of rankings) { - ranking.forEach(({ id }, rank) => { - const current = scores.get(id) ?? 0; - scores.set(id, current + 1 / (K + rank + 1)); - }); - } + for (const ranking of rankings) { + ranking.forEach(({ id }, rank) => { + const current = scores.get(id) ?? 0; + scores.set(id, current + 1 / (K + rank + 1)); + }); + } - return Array.from(scores.entries()) - .map(([id, rrfScore]) => ({ id, rrfScore })) - .sort((a, b) => b.rrfScore - a.rrfScore); + return Array.from(scores.entries()) + .map(([id, rrfScore]) => ({ id, rrfScore })) + .sort((a, b) => b.rrfScore - a.rrfScore); } ``` @@ -126,65 +126,62 @@ function reciprocalRankFusion( ```typescript export interface HybridSearchOptions { - repositoryId: string; - versionId?: string; - type?: 'code' | 'info'; - limit?: number; - alpha?: number; // 0.0 = FTS5 only, 1.0 = vector only, 0.5 = balanced + repositoryId: string; + versionId?: string; + type?: 'code' | 'info'; + limit?: number; + alpha?: number; // 0.0 = FTS5 only, 1.0 = vector only, 0.5 = balanced } export class HybridSearchService { - constructor( - private db: BetterSQLite3.Database, - private searchService: SearchService, - private embeddingProvider: EmbeddingProvider | null, - ) {} + constructor( + private db: BetterSQLite3.Database, + private searchService: SearchService, + private embeddingProvider: EmbeddingProvider | null + ) {} - async search( - query: string, - options: HybridSearchOptions - ): Promise { - const limit = options.limit ?? 20; - const alpha = options.alpha ?? 0.5; + async search(query: string, options: HybridSearchOptions): Promise { + const limit = options.limit ?? 20; + const alpha = options.alpha ?? 0.5; - // Always run FTS5 search - const ftsResults = this.searchService.searchSnippets(query, { - repositoryId: options.repositoryId, - versionId: options.versionId, - type: options.type, - limit: limit * 3, // get more candidates for fusion - }); + // Always run FTS5 search + const ftsResults = this.searchService.searchSnippets(query, { + repositoryId: options.repositoryId, + versionId: options.versionId, + type: options.type, + limit: limit * 3 // get more candidates for fusion + }); - // If no embedding provider or alpha = 0, return FTS5 results directly - if (!this.embeddingProvider || alpha === 0) { - return ftsResults.slice(0, limit); - } + // If no embedding provider or alpha = 0, return FTS5 results directly + if (!this.embeddingProvider || alpha === 0) { + return ftsResults.slice(0, limit); + } - // Embed the query and run vector search - const [queryEmbedding] = await this.embeddingProvider.embed([query]); - const vectorResults = await this.vectorSearch( - queryEmbedding.values, - options.repositoryId, - limit * 3 - ); + // Embed the query and run vector search + const [queryEmbedding] = await this.embeddingProvider.embed([query]); + const vectorResults = await this.vectorSearch( + queryEmbedding.values, + options.repositoryId, + limit * 3 + ); - // Normalize result lists for RRF - const ftsRanked = ftsResults.map((r, i) => ({ - id: r.snippet.id, - score: i, - })); - const vecRanked = vectorResults.map((r, i) => ({ - id: r.snippetId, - score: i, - })); + // Normalize result lists for RRF + const ftsRanked = ftsResults.map((r, i) => ({ + id: r.snippet.id, + score: i + })); + const vecRanked = vectorResults.map((r, i) => ({ + id: r.snippetId, + score: i + })); - // Apply RRF - const fused = reciprocalRankFusion(ftsRanked, vecRanked); + // Apply RRF + const fused = reciprocalRankFusion(ftsRanked, vecRanked); - // Fetch full snippet data for top results - const topIds = fused.slice(0, limit).map(r => r.id); - return this.fetchSnippetsByIds(topIds, options.repositoryId); - } + // Fetch full snippet data for top results + const topIds = fused.slice(0, limit).map((r) => r.id); + return this.fetchSnippetsByIds(topIds, options.repositoryId); + } } ``` @@ -197,9 +194,9 @@ The hybrid search alpha value can be set per-request or globally via settings: ```typescript // Default config stored in settings table under key 'search_config' export interface SearchConfig { - alpha: number; // 0.5 default - maxResults: number; // 20 default - enableHybrid: boolean; // true if embedding provider is configured + alpha: number; // 0.5 default + maxResults: number; // 20 default + enableHybrid: boolean; // true if embedding provider is configured } ``` diff --git a/docs/features/TRUEREF-0009.md b/docs/features/TRUEREF-0009.md index a9a2dc0..45e02e8 100644 --- a/docs/features/TRUEREF-0009.md +++ b/docs/features/TRUEREF-0009.md @@ -56,75 +56,83 @@ Implement the end-to-end indexing pipeline that orchestrates crawling, parsing, // src/lib/server/pipeline/job-queue.ts export class JobQueue { - private isRunning = false; + private isRunning = false; - constructor(private db: BetterSQLite3.Database) {} + constructor(private db: BetterSQLite3.Database) {} - enqueue(repositoryId: string, versionId?: string): IndexingJob { - const job: NewIndexingJob = { - id: crypto.randomUUID(), - repositoryId, - versionId: versionId ?? null, - status: 'queued', - progress: 0, - totalFiles: 0, - processedFiles: 0, - error: null, - startedAt: null, - completedAt: null, - createdAt: new Date(), - }; + enqueue(repositoryId: string, versionId?: string): IndexingJob { + const job: NewIndexingJob = { + id: crypto.randomUUID(), + repositoryId, + versionId: versionId ?? null, + status: 'queued', + progress: 0, + totalFiles: 0, + processedFiles: 0, + error: null, + startedAt: null, + completedAt: null, + createdAt: new Date() + }; - this.db.prepare(` + this.db + .prepare( + ` INSERT INTO indexing_jobs VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - `).run(...Object.values(job)); + ` + ) + .run(...Object.values(job)); - // Kick off processing if not already running - if (!this.isRunning) { - setImmediate(() => this.processNext()); - } + // Kick off processing if not already running + if (!this.isRunning) { + setImmediate(() => this.processNext()); + } - return job; - } + return job; + } - private async processNext(): Promise { - if (this.isRunning) return; + private async processNext(): Promise { + if (this.isRunning) return; - const job = this.db.prepare(` + const job = this.db + .prepare( + ` SELECT * FROM indexing_jobs WHERE status = 'queued' ORDER BY created_at ASC LIMIT 1 - `).get() as IndexingJob | undefined; + ` + ) + .get() as IndexingJob | undefined; - if (!job) return; + if (!job) return; - this.isRunning = true; - try { - await this.pipeline.run(job); - } finally { - this.isRunning = false; - // Check for next queued job - const nextJob = this.db.prepare( - `SELECT id FROM indexing_jobs WHERE status = 'queued' LIMIT 1` - ).get(); - if (nextJob) setImmediate(() => this.processNext()); - } - } + this.isRunning = true; + try { + await this.pipeline.run(job); + } finally { + this.isRunning = false; + // Check for next queued job + const nextJob = this.db + .prepare(`SELECT id FROM indexing_jobs WHERE status = 'queued' LIMIT 1`) + .get(); + if (nextJob) setImmediate(() => this.processNext()); + } + } - getJob(id: string): IndexingJob | null { - return this.db.prepare( - `SELECT * FROM indexing_jobs WHERE id = ?` - ).get(id) as IndexingJob | null; - } + getJob(id: string): IndexingJob | null { + return this.db + .prepare(`SELECT * FROM indexing_jobs WHERE id = ?`) + .get(id) as IndexingJob | null; + } - listJobs(repositoryId?: string, limit = 20): IndexingJob[] { - const query = repositoryId - ? `SELECT * FROM indexing_jobs WHERE repository_id = ? ORDER BY created_at DESC LIMIT ?` - : `SELECT * FROM indexing_jobs ORDER BY created_at DESC LIMIT ?`; - const params = repositoryId ? [repositoryId, limit] : [limit]; - return this.db.prepare(query).all(...params) as IndexingJob[]; - } + listJobs(repositoryId?: string, limit = 20): IndexingJob[] { + const query = repositoryId + ? `SELECT * FROM indexing_jobs WHERE repository_id = ? ORDER BY created_at DESC LIMIT ?` + : `SELECT * FROM indexing_jobs ORDER BY created_at DESC LIMIT ?`; + const params = repositoryId ? [repositoryId, limit] : [limit]; + return this.db.prepare(query).all(...params) as IndexingJob[]; + } } ``` @@ -136,94 +144,96 @@ export class JobQueue { // src/lib/server/pipeline/indexing.pipeline.ts export class IndexingPipeline { - constructor( - private db: BetterSQLite3.Database, - private githubCrawler: GitHubCrawler, - private localCrawler: LocalCrawler, - private embeddingService: EmbeddingService | null, - ) {} + constructor( + private db: BetterSQLite3.Database, + private githubCrawler: GitHubCrawler, + private localCrawler: LocalCrawler, + private embeddingService: EmbeddingService | null + ) {} - async run(job: IndexingJob): Promise { - this.updateJob(job.id, { status: 'running', startedAt: new Date() }); + async run(job: IndexingJob): Promise { + this.updateJob(job.id, { status: 'running', startedAt: new Date() }); - try { - const repo = this.getRepository(job.repositoryId); - if (!repo) throw new Error(`Repository ${job.repositoryId} not found`); + try { + const repo = this.getRepository(job.repositoryId); + if (!repo) throw new Error(`Repository ${job.repositoryId} not found`); - // Update repo state - this.updateRepo(repo.id, { state: 'indexing' }); + // Update repo state + this.updateRepo(repo.id, { state: 'indexing' }); - // Step 1: Crawl - const crawlResult = await this.crawl(repo, job); + // Step 1: Crawl + const crawlResult = await this.crawl(repo, job); - // Step 2: Parse and diff - const { newSnippets, changedDocIds, newDocuments } = - await this.parseAndDiff(crawlResult, repo, job); + // Step 2: Parse and diff + const { newSnippets, changedDocIds, newDocuments } = await this.parseAndDiff( + crawlResult, + repo, + job + ); - // Step 3: Atomic replacement - this.replaceSnippets(repo.id, changedDocIds, newDocuments, newSnippets); + // Step 3: Atomic replacement + this.replaceSnippets(repo.id, changedDocIds, newDocuments, newSnippets); - // Step 4: Embeddings (async, non-blocking for job completion) - if (this.embeddingService && newSnippets.length > 0) { - await this.embeddingService.embedSnippets( - newSnippets.map(s => s.id), - (done, total) => { - // Update job progress for embedding phase - } - ); - } + // Step 4: Embeddings (async, non-blocking for job completion) + if (this.embeddingService && newSnippets.length > 0) { + await this.embeddingService.embedSnippets( + newSnippets.map((s) => s.id), + (done, total) => { + // Update job progress for embedding phase + } + ); + } - // Step 5: Update repo stats - const stats = this.computeStats(repo.id); - this.updateRepo(repo.id, { - state: 'indexed', - totalSnippets: stats.totalSnippets, - totalTokens: stats.totalTokens, - trustScore: computeTrustScore({ ...repo, ...stats }), - lastIndexedAt: new Date(), - }); + // Step 5: Update repo stats + const stats = this.computeStats(repo.id); + this.updateRepo(repo.id, { + state: 'indexed', + totalSnippets: stats.totalSnippets, + totalTokens: stats.totalTokens, + trustScore: computeTrustScore({ ...repo, ...stats }), + lastIndexedAt: new Date() + }); - this.updateJob(job.id, { - status: 'done', - progress: 100, - completedAt: new Date(), - }); + this.updateJob(job.id, { + status: 'done', + progress: 100, + completedAt: new Date() + }); + } catch (error) { + this.updateJob(job.id, { + status: 'failed', + error: (error as Error).message, + completedAt: new Date() + }); + this.updateRepo(job.repositoryId, { state: 'error' }); + throw error; + } + } - } catch (error) { - this.updateJob(job.id, { - status: 'failed', - error: (error as Error).message, - completedAt: new Date(), - }); - this.updateRepo(job.repositoryId, { state: 'error' }); - throw error; - } - } + private replaceSnippets( + repositoryId: string, + changedDocIds: string[], + newDocuments: NewDocument[], + newSnippets: NewSnippet[] + ): void { + // Single transaction: delete old → insert new + this.db.transaction(() => { + if (changedDocIds.length > 0) { + // Cascade deletes snippets via FK constraint + this.db + .prepare(`DELETE FROM documents WHERE id IN (${changedDocIds.map(() => '?').join(',')})`) + .run(...changedDocIds); + } - private replaceSnippets( - repositoryId: string, - changedDocIds: string[], - newDocuments: NewDocument[], - newSnippets: NewSnippet[] - ): void { - // Single transaction: delete old → insert new - this.db.transaction(() => { - if (changedDocIds.length > 0) { - // Cascade deletes snippets via FK constraint - this.db.prepare( - `DELETE FROM documents WHERE id IN (${changedDocIds.map(() => '?').join(',')})` - ).run(...changedDocIds); - } + for (const doc of newDocuments) { + this.insertDocument(doc); + } - for (const doc of newDocuments) { - this.insertDocument(doc); - } - - for (const snippet of newSnippets) { - this.insertSnippet(snippet); - } - })(); - } + for (const snippet of newSnippets) { + this.insertSnippet(snippet); + } + })(); + } } ``` @@ -233,26 +243,24 @@ export class IndexingPipeline { ```typescript function calculateProgress( - processedFiles: number, - totalFiles: number, - embeddingsDone: number, - embeddingsTotal: number, - hasEmbeddings: boolean + processedFiles: number, + totalFiles: number, + embeddingsDone: number, + embeddingsTotal: number, + hasEmbeddings: boolean ): number { - if (totalFiles === 0) return 0; + if (totalFiles === 0) return 0; - if (!hasEmbeddings) { - // Crawl + parse = 100% - return Math.round((processedFiles / totalFiles) * 100); - } + if (!hasEmbeddings) { + // Crawl + parse = 100% + return Math.round((processedFiles / totalFiles) * 100); + } - // Crawl+parse = 80%, embeddings = 20% - const parseProgress = (processedFiles / totalFiles) * 80; - const embedProgress = embeddingsTotal > 0 - ? (embeddingsDone / embeddingsTotal) * 20 - : 0; + // Crawl+parse = 80%, embeddings = 20% + const parseProgress = (processedFiles / totalFiles) * 80; + const embedProgress = embeddingsTotal > 0 ? (embeddingsDone / embeddingsTotal) * 20 : 0; - return Math.round(parseProgress + embedProgress); + return Math.round(parseProgress + embedProgress); } ``` @@ -263,20 +271,21 @@ function calculateProgress( ### `GET /api/v1/jobs/:id` Response `200`: + ```json { - "job": { - "id": "uuid", - "repositoryId": "/facebook/react", - "status": "running", - "progress": 47, - "totalFiles": 342, - "processedFiles": 162, - "error": null, - "startedAt": "2026-03-22T10:00:00Z", - "completedAt": null, - "createdAt": "2026-03-22T09:59:55Z" - } + "job": { + "id": "uuid", + "repositoryId": "/facebook/react", + "status": "running", + "progress": 47, + "totalFiles": 342, + "processedFiles": 162, + "error": null, + "startedAt": "2026-03-22T10:00:00Z", + "completedAt": null, + "createdAt": "2026-03-22T09:59:55Z" + } } ``` @@ -285,6 +294,7 @@ Response `200`: Query params: `repositoryId` (optional), `status` (optional), `limit` (default 20). Response `200`: + ```json { "jobs": [...], @@ -300,20 +310,24 @@ On application start, mark any jobs in `running` state as `failed` (they were in ```typescript function recoverStaleJobs(db: BetterSQLite3.Database): void { - db.prepare(` + db.prepare( + ` UPDATE indexing_jobs SET status = 'failed', error = 'Server restarted while job was running', completed_at = unixepoch() WHERE status = 'running' - `).run(); + ` + ).run(); - // Also reset any repositories stuck in 'indexing' state - db.prepare(` + // Also reset any repositories stuck in 'indexing' state + db.prepare( + ` UPDATE repositories SET state = 'error' WHERE state = 'indexing' - `).run(); + ` + ).run(); } ``` diff --git a/docs/features/TRUEREF-0010.md b/docs/features/TRUEREF-0010.md index 9e2d5b8..6c75bf5 100644 --- a/docs/features/TRUEREF-0010.md +++ b/docs/features/TRUEREF-0010.md @@ -32,33 +32,33 @@ Implement the public-facing REST API endpoints that replicate context7's `/api/v ### Query Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `libraryName` | string | Yes | Library name to search for | -| `query` | string | No | User's question for relevance ranking | -| `limit` | integer | No | Max results (default: 10, max: 50) | +| Parameter | Type | Required | Description | +| ------------- | ------- | -------- | ------------------------------------- | +| `libraryName` | string | Yes | Library name to search for | +| `query` | string | No | User's question for relevance ranking | +| `limit` | integer | No | Max results (default: 10, max: 50) | ### Response `200` (`type=json`, default): ```json { - "results": [ - { - "id": "/facebook/react", - "title": "React", - "description": "A JavaScript library for building user interfaces", - "branch": "main", - "lastUpdateDate": "2026-03-22T10:00:00Z", - "state": "finalized", - "totalTokens": 142000, - "totalSnippets": 1247, - "stars": 228000, - "trustScore": 9.2, - "benchmarkScore": 87, - "versions": ["v18.3.0", "v17.0.2"], - "source": "https://github.com/facebook/react" - } - ] + "results": [ + { + "id": "/facebook/react", + "title": "React", + "description": "A JavaScript library for building user interfaces", + "branch": "main", + "lastUpdateDate": "2026-03-22T10:00:00Z", + "state": "finalized", + "totalTokens": 142000, + "totalSnippets": 1247, + "stars": 228000, + "trustScore": 9.2, + "benchmarkScore": 87, + "versions": ["v18.3.0", "v17.0.2"], + "source": "https://github.com/facebook/react" + } + ] } ``` @@ -67,11 +67,11 @@ Note: `state: "finalized"` maps from TrueRef's `state: "indexed"` for compatibil ### State Mapping | TrueRef state | context7 state | -|---------------|---------------| -| `pending` | `initial` | -| `indexing` | `initial` | -| `indexed` | `finalized` | -| `error` | `error` | +| ------------- | -------------- | +| `pending` | `initial` | +| `indexing` | `initial` | +| `indexed` | `finalized` | +| `error` | `error` | --- @@ -81,43 +81,43 @@ Note: `state: "finalized"` maps from TrueRef's `state: "indexed"` for compatibil ### Query Parameters -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `libraryId` | string | Yes | Library ID, e.g. `/facebook/react` or `/facebook/react/v18.3.0` | -| `query` | string | Yes | Specific question about the library | -| `type` | string | No | `json` (default) or `txt` (plain text for LLM injection) | -| `tokens` | integer | No | Approximate max token count for response (default: 10000) | +| Parameter | Type | Required | Description | +| ----------- | ------- | -------- | --------------------------------------------------------------- | +| `libraryId` | string | Yes | Library ID, e.g. `/facebook/react` or `/facebook/react/v18.3.0` | +| `query` | string | Yes | Specific question about the library | +| `type` | string | No | `json` (default) or `txt` (plain text for LLM injection) | +| `tokens` | integer | No | Approximate max token count for response (default: 10000) | ### Response `200` (`type=json`): ```json { - "snippets": [ - { - "type": "code", - "title": "Basic Component", - "description": "Getting Started > Components", - "language": "tsx", - "codeList": [ - { - "language": "tsx", - "code": "function MyComponent() {\n return
Hello
;\n}" - } - ], - "id": "uuid", - "tokenCount": 45, - "pageTitle": "Getting Started" - }, - { - "type": "info", - "text": "React components let you split the UI into independent...", - "breadcrumb": "Core Concepts > Components", - "pageId": "uuid", - "tokenCount": 120 - } - ], - "rules": ["Always use functional components", "..."], - "totalTokens": 2840 + "snippets": [ + { + "type": "code", + "title": "Basic Component", + "description": "Getting Started > Components", + "language": "tsx", + "codeList": [ + { + "language": "tsx", + "code": "function MyComponent() {\n return
Hello
;\n}" + } + ], + "id": "uuid", + "tokenCount": 45, + "pageTitle": "Getting Started" + }, + { + "type": "info", + "text": "React components let you split the UI into independent...", + "breadcrumb": "Core Concepts > Components", + "pageId": "uuid", + "tokenCount": 120 + } + ], + "rules": ["Always use functional components", "..."], + "totalTokens": 2840 } ``` @@ -125,7 +125,7 @@ Note: `state: "finalized"` maps from TrueRef's `state: "indexed"` for compatibil Plain text formatted for direct LLM context injection: -``` +```` ## Library Rules - Always use functional components - Use hooks for state management @@ -139,15 +139,17 @@ Plain text formatted for direct LLM context injection: function MyComponent() { return
Hello
; } -``` +```` --- ### React components let you split the UI... -*Core Concepts > Components* + +_Core Concepts > Components_ React components let you split the UI into independent, reusable pieces... -``` + +```` --- @@ -167,7 +169,7 @@ function parseLibraryId(libraryId: string): { version: match[3], }; } -``` +```` --- @@ -176,20 +178,17 @@ function parseLibraryId(libraryId: string): { The `tokens` parameter limits the total response size. Snippets are added greedily until the budget is exhausted: ```typescript -function selectSnippetsWithinBudget( - snippets: Snippet[], - maxTokens: number -): Snippet[] { - const selected: Snippet[] = []; - let usedTokens = 0; +function selectSnippetsWithinBudget(snippets: Snippet[], maxTokens: number): Snippet[] { + const selected: Snippet[] = []; + let usedTokens = 0; - for (const snippet of snippets) { - if (usedTokens + (snippet.tokenCount ?? 0) > maxTokens) break; - selected.push(snippet); - usedTokens += snippet.tokenCount ?? 0; - } + for (const snippet of snippets) { + if (usedTokens + (snippet.tokenCount ?? 0) > maxTokens) break; + selected.push(snippet); + usedTokens += snippet.tokenCount ?? 0; + } - return selected; + return selected; } ``` @@ -215,6 +214,7 @@ Default token budget: 10,000 tokens (~7,500 words) — enough for ~20 medium sni ## CORS Configuration All API routes include: + ``` Access-Control-Allow-Origin: * Access-Control-Allow-Methods: GET, POST, PATCH, DELETE, OPTIONS diff --git a/docs/features/TRUEREF-0011.md b/docs/features/TRUEREF-0011.md index b35e059..3b84156 100644 --- a/docs/features/TRUEREF-0011.md +++ b/docs/features/TRUEREF-0011.md @@ -32,8 +32,8 @@ Implement a Model Context Protocol (MCP) server that exposes `resolve-library-id ```json { - "@modelcontextprotocol/sdk": "^1.25.1", - "zod": "^4.3.4" + "@modelcontextprotocol/sdk": "^1.25.1", + "zod": "^4.3.4" } ``` @@ -46,189 +46,190 @@ Implement a Model Context Protocol (MCP) server that exposes `resolve-library-id import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; -import { - CallToolRequestSchema, - ListToolsRequestSchema, -} from '@modelcontextprotocol/sdk/types.js'; +import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; import { z } from 'zod'; const API_BASE = process.env.TRUEREF_API_URL ?? 'http://localhost:5173'; const server = new Server( - { - name: 'io.github.trueref/trueref', - version: '1.0.0', - }, - { - capabilities: { tools: {} }, - } + { + name: 'io.github.trueref/trueref', + version: '1.0.0' + }, + { + capabilities: { tools: {} } + } ); // Tool schemas — identical to context7 for drop-in compatibility const ResolveLibraryIdSchema = z.object({ - libraryName: z.string().describe( - 'Library name to search for and resolve to a TrueRef library ID' - ), - query: z.string().describe( - "The user's question or context to help rank results" - ), + libraryName: z + .string() + .describe('Library name to search for and resolve to a TrueRef library ID'), + query: z.string().describe("The user's question or context to help rank results") }); const QueryDocsSchema = z.object({ - libraryId: z.string().describe( - 'The TrueRef library ID obtained from resolve-library-id, e.g. /facebook/react' - ), - query: z.string().describe( - 'Specific question about the library to retrieve relevant documentation' - ), - tokens: z.number().optional().describe( - 'Maximum token budget for the response (default: 10000)' - ), + libraryId: z + .string() + .describe('The TrueRef library ID obtained from resolve-library-id, e.g. /facebook/react'), + query: z + .string() + .describe('Specific question about the library to retrieve relevant documentation'), + tokens: z.number().optional().describe('Maximum token budget for the response (default: 10000)') }); server.setRequestHandler(ListToolsRequestSchema, async () => ({ - tools: [ - { - name: 'resolve-library-id', - description: [ - 'Searches TrueRef to find a library matching the given name.', - 'Returns a list of matching libraries with their IDs.', - 'ALWAYS call this tool before query-docs to get the correct library ID.', - 'Call at most 3 times per user question.', - ].join(' '), - inputSchema: { - type: 'object', - properties: { - libraryName: { - type: 'string', - description: 'Library name to search for', - }, - query: { - type: 'string', - description: "User's question for relevance ranking", - }, - }, - required: ['libraryName', 'query'], - }, - }, - { - name: 'query-docs', - description: [ - 'Fetches documentation and code examples from TrueRef for a specific library.', - 'Requires a library ID obtained from resolve-library-id.', - 'Returns relevant snippets formatted for LLM consumption.', - 'Call at most 3 times per user question.', - ].join(' '), - inputSchema: { - type: 'object', - properties: { - libraryId: { - type: 'string', - description: 'TrueRef library ID, e.g. /facebook/react', - }, - query: { - type: 'string', - description: 'Specific question about the library', - }, - tokens: { - type: 'number', - description: 'Max token budget (default: 10000)', - }, - }, - required: ['libraryId', 'query'], - }, - }, - ], + tools: [ + { + name: 'resolve-library-id', + description: [ + 'Searches TrueRef to find a library matching the given name.', + 'Returns a list of matching libraries with their IDs.', + 'ALWAYS call this tool before query-docs to get the correct library ID.', + 'Call at most 3 times per user question.' + ].join(' '), + inputSchema: { + type: 'object', + properties: { + libraryName: { + type: 'string', + description: 'Library name to search for' + }, + query: { + type: 'string', + description: "User's question for relevance ranking" + } + }, + required: ['libraryName', 'query'] + } + }, + { + name: 'query-docs', + description: [ + 'Fetches documentation and code examples from TrueRef for a specific library.', + 'Requires a library ID obtained from resolve-library-id.', + 'Returns relevant snippets formatted for LLM consumption.', + 'Call at most 3 times per user question.' + ].join(' '), + inputSchema: { + type: 'object', + properties: { + libraryId: { + type: 'string', + description: 'TrueRef library ID, e.g. /facebook/react' + }, + query: { + type: 'string', + description: 'Specific question about the library' + }, + tokens: { + type: 'number', + description: 'Max token budget (default: 10000)' + } + }, + required: ['libraryId', 'query'] + } + } + ] })); server.setRequestHandler(CallToolRequestSchema, async (request) => { - const { name, arguments: args } = request.params; + const { name, arguments: args } = request.params; - if (name === 'resolve-library-id') { - const { libraryName, query } = ResolveLibraryIdSchema.parse(args); + if (name === 'resolve-library-id') { + const { libraryName, query } = ResolveLibraryIdSchema.parse(args); - const url = new URL(`${API_BASE}/api/v1/libs/search`); - url.searchParams.set('libraryName', libraryName); - url.searchParams.set('query', query); - url.searchParams.set('type', 'txt'); + const url = new URL(`${API_BASE}/api/v1/libs/search`); + url.searchParams.set('libraryName', libraryName); + url.searchParams.set('query', query); + url.searchParams.set('type', 'txt'); - const response = await fetch(url.toString()); - if (!response.ok) { - return { - content: [{ - type: 'text', - text: `Error searching libraries: ${response.status} ${response.statusText}`, - }], - isError: true, - }; - } + const response = await fetch(url.toString()); + if (!response.ok) { + return { + content: [ + { + type: 'text', + text: `Error searching libraries: ${response.status} ${response.statusText}` + } + ], + isError: true + }; + } - const text = await response.text(); - return { - content: [{ type: 'text', text }], - }; - } + const text = await response.text(); + return { + content: [{ type: 'text', text }] + }; + } - if (name === 'query-docs') { - const { libraryId, query, tokens } = QueryDocsSchema.parse(args); + if (name === 'query-docs') { + const { libraryId, query, tokens } = QueryDocsSchema.parse(args); - const url = new URL(`${API_BASE}/api/v1/context`); - url.searchParams.set('libraryId', libraryId); - url.searchParams.set('query', query); - url.searchParams.set('type', 'txt'); - if (tokens) url.searchParams.set('tokens', String(tokens)); + const url = new URL(`${API_BASE}/api/v1/context`); + url.searchParams.set('libraryId', libraryId); + url.searchParams.set('query', query); + url.searchParams.set('type', 'txt'); + if (tokens) url.searchParams.set('tokens', String(tokens)); - const response = await fetch(url.toString()); - if (!response.ok) { - const status = response.status; - if (status === 404) { - return { - content: [{ - type: 'text', - text: `Library "${libraryId}" not found. Please run resolve-library-id first.`, - }], - isError: true, - }; - } - if (status === 503) { - return { - content: [{ - type: 'text', - text: `Library "${libraryId}" is currently being indexed. Please try again in a moment.`, - }], - isError: true, - }; - } - return { - content: [{ - type: 'text', - text: `Error fetching documentation: ${response.status} ${response.statusText}`, - }], - isError: true, - }; - } + const response = await fetch(url.toString()); + if (!response.ok) { + const status = response.status; + if (status === 404) { + return { + content: [ + { + type: 'text', + text: `Library "${libraryId}" not found. Please run resolve-library-id first.` + } + ], + isError: true + }; + } + if (status === 503) { + return { + content: [ + { + type: 'text', + text: `Library "${libraryId}" is currently being indexed. Please try again in a moment.` + } + ], + isError: true + }; + } + return { + content: [ + { + type: 'text', + text: `Error fetching documentation: ${response.status} ${response.statusText}` + } + ], + isError: true + }; + } - const text = await response.text(); - return { - content: [{ type: 'text', text }], - }; - } + const text = await response.text(); + return { + content: [{ type: 'text', text }] + }; + } - return { - content: [{ type: 'text', text: `Unknown tool: ${name}` }], - isError: true, - }; + return { + content: [{ type: 'text', text: `Unknown tool: ${name}` }], + isError: true + }; }); async function main() { - const transport = new StdioServerTransport(); - await server.connect(transport); - // Server runs until process exits + const transport = new StdioServerTransport(); + await server.connect(transport); + // Server runs until process exits } main().catch((err) => { - process.stderr.write(`MCP server error: ${err.message}\n`); - process.exit(1); + process.stderr.write(`MCP server error: ${err.message}\n`); + process.exit(1); }); ``` @@ -238,18 +239,19 @@ main().catch((err) => { ```json { - "scripts": { - "mcp:start": "node --experimental-vm-modules src/mcp/index.ts" - } + "scripts": { + "mcp:start": "node --experimental-vm-modules src/mcp/index.ts" + } } ``` Or with `tsx` for TypeScript-direct execution: + ```json { - "scripts": { - "mcp:start": "tsx src/mcp/index.ts" - } + "scripts": { + "mcp:start": "tsx src/mcp/index.ts" + } } ``` @@ -261,30 +263,31 @@ Users add to `.mcp.json`: ```json { - "mcpServers": { - "trueref": { - "command": "node", - "args": ["/path/to/trueref/dist/mcp/index.js"], - "env": { - "TRUEREF_API_URL": "http://localhost:5173" - } - } - } + "mcpServers": { + "trueref": { + "command": "node", + "args": ["/path/to/trueref/dist/mcp/index.js"], + "env": { + "TRUEREF_API_URL": "http://localhost:5173" + } + } + } } ``` Or with tsx for development: + ```json { - "mcpServers": { - "trueref": { - "command": "npx", - "args": ["tsx", "/path/to/trueref/src/mcp/index.ts"], - "env": { - "TRUEREF_API_URL": "http://localhost:5173" - } - } - } + "mcpServers": { + "trueref": { + "command": "npx", + "args": ["tsx", "/path/to/trueref/src/mcp/index.ts"], + "env": { + "TRUEREF_API_URL": "http://localhost:5173" + } + } + } } ``` @@ -295,13 +298,15 @@ Or with tsx for development: The MCP server should include a `resources` list item (optional) or the library responses themselves prepend rules. Additionally, users should add a Claude rule file: ```markdown - ---- +## + description: Use TrueRef to retrieve documentation for indexed libraries alwaysApply: true + --- When answering questions about indexed libraries, always use the TrueRef MCP tools: + 1. Call `resolve-library-id` with the library name and the user's question to get the library ID 2. Call `query-docs` with the library ID and question to retrieve relevant documentation 3. Use the returned documentation to answer the question accurately diff --git a/docs/features/TRUEREF-0012.md b/docs/features/TRUEREF-0012.md index bd97792..991ed9c 100644 --- a/docs/features/TRUEREF-0012.md +++ b/docs/features/TRUEREF-0012.md @@ -50,64 +50,64 @@ import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; const { values: args } = parseArgs({ - options: { - transport: { type: 'string', default: 'stdio' }, - port: { type: 'string', default: process.env.PORT ?? '3001' }, - }, + options: { + transport: { type: 'string', default: 'stdio' }, + port: { type: 'string', default: process.env.PORT ?? '3001' } + } }); async function startHttp(server: Server, port: number): Promise { - const httpServer = createServer(async (req, res) => { - const url = new URL(req.url!, `http://localhost:${port}`); + const httpServer = createServer(async (req, res) => { + const url = new URL(req.url!, `http://localhost:${port}`); - // Health check - if (url.pathname === '/ping') { - res.writeHead(200, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify({ ok: true })); - return; - } + // Health check + if (url.pathname === '/ping') { + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ ok: true })); + return; + } - // MCP endpoint - if (url.pathname === '/mcp') { - // CORS preflight - res.setHeader('Access-Control-Allow-Origin', '*'); - res.setHeader('Access-Control-Allow-Methods', 'POST, GET, OPTIONS'); - res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Accept'); + // MCP endpoint + if (url.pathname === '/mcp') { + // CORS preflight + res.setHeader('Access-Control-Allow-Origin', '*'); + res.setHeader('Access-Control-Allow-Methods', 'POST, GET, OPTIONS'); + res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Accept'); - if (req.method === 'OPTIONS') { - res.writeHead(204); - res.end(); - return; - } + if (req.method === 'OPTIONS') { + res.writeHead(204); + res.end(); + return; + } - const transport = new StreamableHTTPServerTransport({ - sessionIdGenerator: () => crypto.randomUUID(), - }); + const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: () => crypto.randomUUID() + }); - await server.connect(transport); - await transport.handleRequest(req, res); - return; - } + await server.connect(transport); + await transport.handleRequest(req, res); + return; + } - res.writeHead(404); - res.end('Not Found'); - }); + res.writeHead(404); + res.end('Not Found'); + }); - httpServer.listen(port, () => { - process.stderr.write(`TrueRef MCP server listening on http://localhost:${port}/mcp\n`); - }); + httpServer.listen(port, () => { + process.stderr.write(`TrueRef MCP server listening on http://localhost:${port}/mcp\n`); + }); } async function main() { - const mcpServer = createMcpServer(); // shared server creation + const mcpServer = createMcpServer(); // shared server creation - if (args.transport === 'http') { - const port = parseInt(args.port!, 10); - await startHttp(mcpServer, port); - } else { - const transport = new StdioServerTransport(); - await mcpServer.connect(transport); - } + if (args.transport === 'http') { + const port = parseInt(args.port!, 10); + await startHttp(mcpServer, port); + } else { + const transport = new StdioServerTransport(); + await mcpServer.connect(transport); + } } ``` @@ -117,10 +117,10 @@ async function main() { ```json { - "scripts": { - "mcp:start": "tsx src/mcp/index.ts", - "mcp:http": "tsx src/mcp/index.ts --transport http --port 3001" - } + "scripts": { + "mcp:start": "tsx src/mcp/index.ts", + "mcp:http": "tsx src/mcp/index.ts --transport http --port 3001" + } } ``` @@ -132,12 +132,12 @@ For HTTP transport, users configure Claude Code with the remote URL: ```json { - "mcpServers": { - "trueref": { - "type": "http", - "url": "http://localhost:3001/mcp" - } - } + "mcpServers": { + "trueref": { + "type": "http", + "url": "http://localhost:3001/mcp" + } + } } ``` diff --git a/docs/features/TRUEREF-0013.md b/docs/features/TRUEREF-0013.md index 2cdb8eb..a120148 100644 --- a/docs/features/TRUEREF-0013.md +++ b/docs/features/TRUEREF-0013.md @@ -32,53 +32,53 @@ Support `trueref.json` configuration files placed in the root of a repository. T // src/lib/server/config/trueref-config.schema.ts export interface TrueRefConfig { - /** - * Override the display name for this library. - * 1–100 characters. - */ - projectTitle?: string; + /** + * Override the display name for this library. + * 1–100 characters. + */ + projectTitle?: string; - /** - * Description of the library for search ranking. - * 10–500 characters. - */ - description?: string; + /** + * Description of the library for search ranking. + * 10–500 characters. + */ + description?: string; - /** - * Folders to include in indexing (allowlist). - * Each entry is a path prefix or regex string. - * If empty/absent, all folders are included. - * Examples: ["src/", "docs/", "^packages/core"] - */ - folders?: string[]; + /** + * Folders to include in indexing (allowlist). + * Each entry is a path prefix or regex string. + * If empty/absent, all folders are included. + * Examples: ["src/", "docs/", "^packages/core"] + */ + folders?: string[]; - /** - * Folders to exclude from indexing. - * Applied after `folders` allowlist. - * Examples: ["test/", "fixtures/", "__mocks__"] - */ - excludeFolders?: string[]; + /** + * Folders to exclude from indexing. + * Applied after `folders` allowlist. + * Examples: ["test/", "fixtures/", "__mocks__"] + */ + excludeFolders?: string[]; - /** - * Exact filenames to exclude (no path, no regex). - * Examples: ["README.md", "CHANGELOG.md", "jest.config.ts"] - */ - excludeFiles?: string[]; + /** + * Exact filenames to exclude (no path, no regex). + * Examples: ["README.md", "CHANGELOG.md", "jest.config.ts"] + */ + excludeFiles?: string[]; - /** - * Best practices / rules to inject at the top of every query-docs response. - * Each rule: 5–500 characters. - * Maximum 20 rules. - */ - rules?: string[]; + /** + * Best practices / rules to inject at the top of every query-docs response. + * Each rule: 5–500 characters. + * Maximum 20 rules. + */ + rules?: string[]; - /** - * Previously released versions to make available for versioned queries. - */ - previousVersions?: Array<{ - tag: string; // git tag (e.g. "v1.2.3") - title: string; // human-readable (e.g. "Version 1.2.3") - }>; + /** + * Previously released versions to make available for versioned queries. + */ + previousVersions?: Array<{ + tag: string; // git tag (e.g. "v1.2.3") + title: string; // human-readable (e.g. "Version 1.2.3") + }>; } ``` @@ -88,14 +88,14 @@ export interface TrueRefConfig { ```typescript const CONFIG_CONSTRAINTS = { - projectTitle: { minLength: 1, maxLength: 100 }, - description: { minLength: 10, maxLength: 500 }, - folders: { maxItems: 50, maxLength: 200 }, // per entry - excludeFolders: { maxItems: 50, maxLength: 200 }, - excludeFiles: { maxItems: 100, maxLength: 200 }, - rules: { maxItems: 20, minLength: 5, maxLength: 500 }, - previousVersions: { maxItems: 50 }, - versionTag: { pattern: /^v?\d+\.\d+(\.\d+)?(-.*)?$/ }, + projectTitle: { minLength: 1, maxLength: 100 }, + description: { minLength: 10, maxLength: 500 }, + folders: { maxItems: 50, maxLength: 200 }, // per entry + excludeFolders: { maxItems: 50, maxLength: 200 }, + excludeFiles: { maxItems: 100, maxLength: 200 }, + rules: { maxItems: 20, minLength: 5, maxLength: 500 }, + previousVersions: { maxItems: 50 }, + versionTag: { pattern: /^v?\d+\.\d+(\.\d+)?(-.*)?$/ } }; ``` @@ -107,94 +107,96 @@ const CONFIG_CONSTRAINTS = { // src/lib/server/config/config-parser.ts export interface ParsedConfig { - config: TrueRefConfig; - source: 'trueref.json' | 'context7.json'; - warnings: string[]; + config: TrueRefConfig; + source: 'trueref.json' | 'context7.json'; + warnings: string[]; } export function parseConfigFile(content: string, filename: string): ParsedConfig { - let raw: unknown; + let raw: unknown; - try { - raw = JSON.parse(content); - } catch (e) { - throw new ConfigParseError(`${filename} is not valid JSON: ${(e as Error).message}`); - } + try { + raw = JSON.parse(content); + } catch (e) { + throw new ConfigParseError(`${filename} is not valid JSON: ${(e as Error).message}`); + } - if (typeof raw !== 'object' || raw === null) { - throw new ConfigParseError(`${filename} must be a JSON object`); - } + if (typeof raw !== 'object' || raw === null) { + throw new ConfigParseError(`${filename} must be a JSON object`); + } - const config = raw as Record; - const validated: TrueRefConfig = {}; - const warnings: string[] = []; + const config = raw as Record; + const validated: TrueRefConfig = {}; + const warnings: string[] = []; - // projectTitle - if (config.projectTitle !== undefined) { - if (typeof config.projectTitle !== 'string') { - warnings.push('projectTitle must be a string, ignoring'); - } else if (config.projectTitle.length > 100) { - validated.projectTitle = config.projectTitle.slice(0, 100); - warnings.push('projectTitle truncated to 100 characters'); - } else { - validated.projectTitle = config.projectTitle; - } - } + // projectTitle + if (config.projectTitle !== undefined) { + if (typeof config.projectTitle !== 'string') { + warnings.push('projectTitle must be a string, ignoring'); + } else if (config.projectTitle.length > 100) { + validated.projectTitle = config.projectTitle.slice(0, 100); + warnings.push('projectTitle truncated to 100 characters'); + } else { + validated.projectTitle = config.projectTitle; + } + } - // description - if (config.description !== undefined) { - if (typeof config.description === 'string') { - validated.description = config.description.slice(0, 500); - } - } + // description + if (config.description !== undefined) { + if (typeof config.description === 'string') { + validated.description = config.description.slice(0, 500); + } + } - // folders / excludeFolders / excludeFiles — validated as string arrays - for (const field of ['folders', 'excludeFolders', 'excludeFiles'] as const) { - if (config[field] !== undefined) { - if (!Array.isArray(config[field])) { - warnings.push(`${field} must be an array, ignoring`); - } else { - validated[field] = (config[field] as unknown[]) - .filter((item): item is string => { - if (typeof item !== 'string') { - warnings.push(`${field} entry must be a string, skipping: ${item}`); - return false; - } - return true; - }) - .slice(0, field === 'excludeFiles' ? 100 : 50); - } - } - } + // folders / excludeFolders / excludeFiles — validated as string arrays + for (const field of ['folders', 'excludeFolders', 'excludeFiles'] as const) { + if (config[field] !== undefined) { + if (!Array.isArray(config[field])) { + warnings.push(`${field} must be an array, ignoring`); + } else { + validated[field] = (config[field] as unknown[]) + .filter((item): item is string => { + if (typeof item !== 'string') { + warnings.push(`${field} entry must be a string, skipping: ${item}`); + return false; + } + return true; + }) + .slice(0, field === 'excludeFiles' ? 100 : 50); + } + } + } - // rules - if (config.rules !== undefined) { - if (Array.isArray(config.rules)) { - validated.rules = (config.rules as unknown[]) - .filter((r): r is string => typeof r === 'string' && r.length >= 5) - .map(r => r.slice(0, 500)) - .slice(0, 20); - } - } + // rules + if (config.rules !== undefined) { + if (Array.isArray(config.rules)) { + validated.rules = (config.rules as unknown[]) + .filter((r): r is string => typeof r === 'string' && r.length >= 5) + .map((r) => r.slice(0, 500)) + .slice(0, 20); + } + } - // previousVersions - if (config.previousVersions !== undefined) { - if (Array.isArray(config.previousVersions)) { - validated.previousVersions = (config.previousVersions as unknown[]) - .filter((v): v is { tag: string; title: string } => - typeof v === 'object' && v !== null && - typeof (v as Record).tag === 'string' && - typeof (v as Record).title === 'string' - ) - .slice(0, 50); - } - } + // previousVersions + if (config.previousVersions !== undefined) { + if (Array.isArray(config.previousVersions)) { + validated.previousVersions = (config.previousVersions as unknown[]) + .filter( + (v): v is { tag: string; title: string } => + typeof v === 'object' && + v !== null && + typeof (v as Record).tag === 'string' && + typeof (v as Record).title === 'string' + ) + .slice(0, 50); + } + } - return { - config: validated, - source: filename.startsWith('trueref') ? 'trueref.json' : 'context7.json', - warnings, - }; + return { + config: validated, + source: filename.startsWith('trueref') ? 'trueref.json' : 'context7.json', + warnings + }; } ``` @@ -219,21 +221,15 @@ When `query-docs` returns results, `rules` from `repository_configs` are prepend ```typescript // In formatters.ts -function buildContextResponse( - snippets: Snippet[], - config: RepositoryConfig | null -): string { - const parts: string[] = []; +function buildContextResponse(snippets: Snippet[], config: RepositoryConfig | null): string { + const parts: string[] = []; - if (config?.rules?.length) { - parts.push( - '## Library Best Practices\n' + - config.rules.map(r => `- ${r}`).join('\n') - ); - } + if (config?.rules?.length) { + parts.push('## Library Best Practices\n' + config.rules.map((r) => `- ${r}`).join('\n')); + } - // ... append snippet content - return parts.join('\n\n---\n\n'); + // ... append snippet content + return parts.join('\n\n---\n\n'); } ``` diff --git a/docs/features/TRUEREF-0014.md b/docs/features/TRUEREF-0014.md index 85be02a..1363af9 100644 --- a/docs/features/TRUEREF-0014.md +++ b/docs/features/TRUEREF-0014.md @@ -45,34 +45,37 @@ Examples: ### `GET /api/v1/libs/:id/versions` Response `200`: + ```json { - "versions": [ - { - "id": "/facebook/react/v18.3.0", - "repositoryId": "/facebook/react", - "tag": "v18.3.0", - "title": "React v18.3.0", - "state": "indexed", - "totalSnippets": 892, - "indexedAt": "2026-03-22T10:00:00Z" - } - ] + "versions": [ + { + "id": "/facebook/react/v18.3.0", + "repositoryId": "/facebook/react", + "tag": "v18.3.0", + "title": "React v18.3.0", + "state": "indexed", + "totalSnippets": 892, + "indexedAt": "2026-03-22T10:00:00Z" + } + ] } ``` ### `POST /api/v1/libs/:id/versions` Request body: + ```json { - "tag": "v18.3.0", - "title": "React v18.3.0", - "autoIndex": true + "tag": "v18.3.0", + "title": "React v18.3.0", + "autoIndex": true } ``` Response `201`: + ```json { "version": { ...RepositoryVersion }, @@ -96,23 +99,22 @@ Response `202` with job details. ```typescript async function listGitHubTags( - owner: string, - repo: string, - token?: string + owner: string, + repo: string, + token?: string ): Promise> { - const headers: Record = { - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'TrueRef/1.0', - }; - if (token) headers['Authorization'] = `Bearer ${token}`; + const headers: Record = { + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'TrueRef/1.0' + }; + if (token) headers['Authorization'] = `Bearer ${token}`; - const response = await fetch( - `https://api.github.com/repos/${owner}/${repo}/tags?per_page=100`, - { headers } - ); + const response = await fetch(`https://api.github.com/repos/${owner}/${repo}/tags?per_page=100`, { + headers + }); - if (!response.ok) throw new GitHubApiError(response.status); - return response.json(); + if (!response.ok) throw new GitHubApiError(response.status); + return response.json(); } ``` @@ -124,28 +126,26 @@ In the search/context endpoints, the `libraryId` is parsed to extract the option ```typescript function resolveSearchTarget(libraryId: string): { - repositoryId: string; - versionId?: string; + repositoryId: string; + versionId?: string; } { - const { repositoryId, version } = parseLibraryId(libraryId); + const { repositoryId, version } = parseLibraryId(libraryId); - if (!version) { - // Query default branch: versionId = NULL - return { repositoryId }; - } + if (!version) { + // Query default branch: versionId = NULL + return { repositoryId }; + } - // Look up versionId from tag - const versionRecord = db.prepare( - `SELECT id FROM repository_versions WHERE repository_id = ? AND tag = ?` - ).get(repositoryId, version) as { id: string } | undefined; + // Look up versionId from tag + const versionRecord = db + .prepare(`SELECT id FROM repository_versions WHERE repository_id = ? AND tag = ?`) + .get(repositoryId, version) as { id: string } | undefined; - if (!versionRecord) { - throw new NotFoundError( - `Version "${version}" not found for library "${repositoryId}"` - ); - } + if (!versionRecord) { + throw new NotFoundError(`Version "${version}" not found for library "${repositoryId}"`); + } - return { repositoryId, versionId: versionRecord.id }; + return { repositoryId, versionId: versionRecord.id }; } ``` @@ -157,20 +157,20 @@ Snippets with `version_id IS NULL` belong to the default branch; snippets with a ```typescript export class VersionService { - constructor(private db: BetterSQLite3.Database) {} + constructor(private db: BetterSQLite3.Database) {} - list(repositoryId: string): RepositoryVersion[] + list(repositoryId: string): RepositoryVersion[]; - add(repositoryId: string, tag: string, title?: string): RepositoryVersion + add(repositoryId: string, tag: string, title?: string): RepositoryVersion; - remove(repositoryId: string, tag: string): void + remove(repositoryId: string, tag: string): void; - getByTag(repositoryId: string, tag: string): RepositoryVersion | null + getByTag(repositoryId: string, tag: string): RepositoryVersion | null; - registerFromConfig( - repositoryId: string, - previousVersions: { tag: string; title: string }[] - ): RepositoryVersion[] + registerFromConfig( + repositoryId: string, + previousVersions: { tag: string; title: string }[] + ): RepositoryVersion[]; } ``` diff --git a/docs/features/TRUEREF-0015.md b/docs/features/TRUEREF-0015.md index 0a88ce5..b20944e 100644 --- a/docs/features/TRUEREF-0015.md +++ b/docs/features/TRUEREF-0015.md @@ -49,79 +49,79 @@ Implement the main web interface for managing repositories. Built with SvelteKit ```svelte
-
-
-

{repo.title}

-

{repo.id}

-
- - {stateLabels[repo.state]} - -
+
+
+

{repo.title}

+

{repo.id}

+
+ + {stateLabels[repo.state]} + +
- {#if repo.description} -

{repo.description}

- {/if} + {#if repo.description} +

{repo.description}

+ {/if} -
- {repo.totalSnippets.toLocaleString()} snippets - · - Trust: {repo.trustScore.toFixed(1)}/10 - {#if repo.stars} - · - ★ {repo.stars.toLocaleString()} - {/if} -
+
+ {repo.totalSnippets.toLocaleString()} snippets + · + Trust: {repo.trustScore.toFixed(1)}/10 + {#if repo.stars} + · + ★ {repo.stars.toLocaleString()} + {/if} +
- {#if repo.state === 'error'} -

Indexing failed. Check jobs for details.

- {/if} + {#if repo.state === 'error'} +

Indexing failed. Check jobs for details.

+ {/if} -
- - - Details - - -
+
+ + + Details + + +
``` @@ -132,98 +132,104 @@ Implement the main web interface for managing repositories. Built with SvelteKit ```svelte - ``` @@ -234,48 +240,48 @@ Implement the main web interface for managing repositories. Built with SvelteKit ```svelte {#if job} -
-
- {job.processedFiles} / {job.totalFiles} files - {job.progress}% -
-
-
-
- {#if job.status === 'failed'} -

{job.error}

- {/if} -
+
+
+ {job.processedFiles} / {job.totalFiles} files + {job.progress}% +
+
+
+
+ {#if job.status === 'failed'} +

{job.error}

+ {/if} +
{/if} ``` @@ -288,9 +294,9 @@ Implement the main web interface for managing repositories. Built with SvelteKit import type { PageServerLoad } from './$types'; export const load: PageServerLoad = async ({ fetch }) => { - const res = await fetch('/api/v1/libs'); - const data = await res.json(); - return { repositories: data.libraries }; + const res = await fetch('/api/v1/libs'); + const data = await res.json(); + return { repositories: data.libraries }; }; ``` diff --git a/docs/features/TRUEREF-0016.md b/docs/features/TRUEREF-0016.md index 5828e9a..8150c9a 100644 --- a/docs/features/TRUEREF-0016.md +++ b/docs/features/TRUEREF-0016.md @@ -57,25 +57,31 @@ An interactive search interface within the web UI that lets users test the docum ```svelte ``` @@ -86,37 +92,39 @@ An interactive search interface within the web UI that lets users test the docum ```svelte -
-
-
- {#if snippet.type === 'code'} - code - {:else} - info - {/if} - {#if snippet.title} - {snippet.title} - {/if} -
- {snippet.tokenCount} tokens -
+
+
+
+ {#if snippet.type === 'code'} + code + {:else} + info + {/if} + {#if snippet.title} + {snippet.title} + {/if} +
+ {snippet.tokenCount} tokens +
- {#if snippet.breadcrumb} -

{snippet.breadcrumb}

- {/if} + {#if snippet.breadcrumb} +

{snippet.breadcrumb}

+ {/if} -
- {#if snippet.type === 'code'} -
{snippet.content}
- {:else} -
{snippet.content}
- {/if} -
+
+ {#if snippet.type === 'code'} +
{snippet.content}
+ {:else} +
{snippet.content}
+ {/if} +
``` @@ -127,44 +135,44 @@ An interactive search interface within the web UI that lets users test the docum ```svelte ``` @@ -177,9 +185,9 @@ Use a minimal, zero-dependency approach for v1 — wrap code blocks in `
 {
-  const hljs = await import('highlight.js/lib/core');
-  // Register only needed languages
-  return hljs.highlight(code, { language }).value;
+	const hljs = await import('highlight.js/lib/core');
+	// Register only needed languages
+	return hljs.highlight(code, { language }).value;
 }
 ```
 
diff --git a/docs/features/TRUEREF-0017.md b/docs/features/TRUEREF-0017.md
index 6055c18..371e2e8 100644
--- a/docs/features/TRUEREF-0017.md
+++ b/docs/features/TRUEREF-0017.md
@@ -30,39 +30,39 @@ Optimize re-indexing by skipping files that haven't changed since the last index
 
 ```typescript
 interface FileDiff {
-  added: CrawledFile[];      // new files not in DB
-  modified: CrawledFile[];   // files with changed checksum
-  deleted: string[];         // file paths in DB but not in crawl
-  unchanged: string[];       // file paths with matching checksum
+	added: CrawledFile[]; // new files not in DB
+	modified: CrawledFile[]; // files with changed checksum
+	deleted: string[]; // file paths in DB but not in crawl
+	unchanged: string[]; // file paths with matching checksum
 }
 
 function computeDiff(
-  crawledFiles: CrawledFile[],
-  existingDocs: Document[]   // documents currently in DB for this repo
+	crawledFiles: CrawledFile[],
+	existingDocs: Document[] // documents currently in DB for this repo
 ): FileDiff {
-  const existingMap = new Map(existingDocs.map(d => [d.filePath, d]));
-  const crawledMap = new Map(crawledFiles.map(f => [f.path, f]));
+	const existingMap = new Map(existingDocs.map((d) => [d.filePath, d]));
+	const crawledMap = new Map(crawledFiles.map((f) => [f.path, f]));
 
-  const added: CrawledFile[] = [];
-  const modified: CrawledFile[] = [];
-  const unchanged: string[] = [];
+	const added: CrawledFile[] = [];
+	const modified: CrawledFile[] = [];
+	const unchanged: string[] = [];
 
-  for (const file of crawledFiles) {
-    const existing = existingMap.get(file.path);
-    if (!existing) {
-      added.push(file);
-    } else if (existing.checksum !== file.sha) {
-      modified.push(file);
-    } else {
-      unchanged.push(file.path);
-    }
-  }
+	for (const file of crawledFiles) {
+		const existing = existingMap.get(file.path);
+		if (!existing) {
+			added.push(file);
+		} else if (existing.checksum !== file.sha) {
+			modified.push(file);
+		} else {
+			unchanged.push(file.path);
+		}
+	}
 
-  const deleted = existingDocs
-    .filter(doc => !crawledMap.has(doc.filePath))
-    .map(doc => doc.filePath);
+	const deleted = existingDocs
+		.filter((doc) => !crawledMap.has(doc.filePath))
+		.map((doc) => doc.filePath);
 
-  return { added, modified, deleted, unchanged };
+	return { added, modified, deleted, unchanged };
 }
 ```
 
@@ -78,7 +78,7 @@ const diff = computeDiff(crawledResult.files, existingDocs);
 
 // Log diff summary
 this.updateJob(job.id, {
-  totalFiles: crawledResult.files.length,
+	totalFiles: crawledResult.files.length
 });
 
 // Process only changed/new files
@@ -89,29 +89,29 @@ const docIdsToDelete: string[] = [];
 
 // Map modified files to their existing document IDs for deletion
 for (const file of diff.modified) {
-  const existing = existingDocs.find(d => d.filePath === file.path);
-  if (existing) docIdsToDelete.push(existing.id);
+	const existing = existingDocs.find((d) => d.filePath === file.path);
+	if (existing) docIdsToDelete.push(existing.id);
 }
 
 // Map deleted file paths to document IDs
 for (const filePath of diff.deleted) {
-  const existing = existingDocs.find(d => d.filePath === filePath);
-  if (existing) docIdsToDelete.push(existing.id);
+	const existing = existingDocs.find((d) => d.filePath === filePath);
+	if (existing) docIdsToDelete.push(existing.id);
 }
 
 // Parse new/modified files
 for (const [i, file] of filesToProcess.entries()) {
-  const docId = crypto.randomUUID();
-  newDocuments.push({ id: docId, ...buildDocument(file, repo.id, job.versionId) });
-  newSnippets.push(...parseFile(file, { repositoryId: repo.id, documentId: docId }));
+	const docId = crypto.randomUUID();
+	newDocuments.push({ id: docId, ...buildDocument(file, repo.id, job.versionId) });
+	newSnippets.push(...parseFile(file, { repositoryId: repo.id, documentId: docId }));
 
-  // Count ALL files (including skipped) in progress
-  const totalProcessed = diff.unchanged.length + i + 1;
-  const progress = Math.round((totalProcessed / crawledResult.files.length) * 80);
-  this.updateJob(job.id, {
-    processedFiles: totalProcessed,
-    progress,
-  });
+	// Count ALL files (including skipped) in progress
+	const totalProcessed = diff.unchanged.length + i + 1;
+	const progress = Math.round((totalProcessed / crawledResult.files.length) * 80);
+	this.updateJob(job.id, {
+		processedFiles: totalProcessed,
+		progress
+	});
 }
 
 // Atomic replacement of only changed documents
@@ -123,6 +123,7 @@ this.replaceSnippets(repo.id, docIdsToDelete, newDocuments, newSnippets);
 ## Performance Impact
 
 For a typical repository with 1,000 files where 50 changed:
+
 - **Without incremental**: 1,000 files parsed + 1,000 embed batches
 - **With incremental**: 50 files parsed + 50 embed batches
 - Estimated speedup: ~20x for re-indexing
diff --git a/docs/features/TRUEREF-0018.md b/docs/features/TRUEREF-0018.md
index f370c2c..84943c1 100644
--- a/docs/features/TRUEREF-0018.md
+++ b/docs/features/TRUEREF-0018.md
@@ -32,24 +32,24 @@ A settings page within the web UI that allows users to configure the embedding p
 
 ```typescript
 const PROVIDER_PRESETS = [
-  {
-    name: 'OpenAI',
-    baseUrl: 'https://api.openai.com/v1',
-    model: 'text-embedding-3-small',
-    dimensions: 1536,
-  },
-  {
-    name: 'Ollama (local)',
-    baseUrl: 'http://localhost:11434/v1',
-    model: 'nomic-embed-text',
-    dimensions: 768,
-  },
-  {
-    name: 'Azure OpenAI',
-    baseUrl: 'https://{resource}.openai.azure.com/openai/deployments/{deployment}/v1',
-    model: 'text-embedding-3-small',
-    dimensions: 1536,
-  },
+	{
+		name: 'OpenAI',
+		baseUrl: 'https://api.openai.com/v1',
+		model: 'text-embedding-3-small',
+		dimensions: 1536
+	},
+	{
+		name: 'Ollama (local)',
+		baseUrl: 'http://localhost:11434/v1',
+		model: 'nomic-embed-text',
+		dimensions: 768
+	},
+	{
+		name: 'Azure OpenAI',
+		baseUrl: 'https://{resource}.openai.azure.com/openai/deployments/{deployment}/v1',
+		model: 'text-embedding-3-small',
+		dimensions: 1536
+	}
 ];
 ```
 
@@ -60,133 +60,157 @@ const PROVIDER_PRESETS = [
 ```svelte
 
 
 
 
-

Settings

+

Settings

-
-

Embedding Provider

-

- Embeddings enable semantic search. Without them, only keyword search (FTS5) is used. -

+
+

Embedding Provider

+

+ Embeddings enable semantic search. Without them, only keyword search (FTS5) is used. +

-
- {#each ['none', 'openai', 'local'] as p} - - {/each} -
+
+ {#each ['none', 'openai', 'local'] as p} + + {/each} +
- {#if provider === 'none'} -
- Search will use keyword matching only. Results may be less relevant for complex questions. -
- {/if} + {#if provider === 'none'} +
+ Search will use keyword matching only. Results may be less relevant for complex questions. +
+ {/if} - {#if provider === 'openai'} -
- -
- {#each PROVIDER_PRESETS as preset} - - {/each} -
+ {#if provider === 'openai'} +
+ +
+ {#each PROVIDER_PRESETS as preset} + + {/each} +
- + - + - + - + -
- - {#if testStatus === 'ok'} - ✓ Connection successful - {:else if testStatus === 'error'} - ✗ {testError} - {/if} -
-
- {/if} +
+ + {#if testStatus === 'ok'} + ✓ Connection successful + {:else if testStatus === 'error'} + ✗ {testError} + {/if} +
+
+ {/if} -
- -
-
+
+ +
+
``` diff --git a/docs/features/TRUEREF-0019.md b/docs/features/TRUEREF-0019.md index 94193f4..8c7769a 100644 --- a/docs/features/TRUEREF-0019.md +++ b/docs/features/TRUEREF-0019.md @@ -80,6 +80,7 @@ git -C /path/to/repo archive | tar -x -C /tmp/trueref-idx/-< ``` Advantages over `git checkout` or worktrees: + - Working directory is completely untouched - No `.git` directory in the output (cleaner for parsing) - Temp directory deleted after indexing with no git state to clean up @@ -102,26 +103,26 @@ Allow commit hashes to be pinned explicitly per version, overriding tag resoluti ```json { - "previousVersions": [ - { - "tag": "v2.0.0", - "title": "Version 2.0.0", - "commitHash": "a3f9c12abc..." - } - ] + "previousVersions": [ + { + "tag": "v2.0.0", + "title": "Version 2.0.0", + "commitHash": "a3f9c12abc..." + } + ] } ``` ### Edge Cases -| Case | Handling | -|------|----------| -| Annotated tags | `rev-parse ^{commit}` peels to commit automatically | -| Mutable tags (e.g. `latest`) | Re-resolve on re-index; warn in UI if hash has changed | -| Branch as version | `rev-parse origin/^{commit}` gives tip; re-resolves on re-index | -| Shallow clone | Run `git fetch --unshallow` before `git archive` if commit is unavailable | -| Submodules | `git archive --recurse-submodules` or document as a known limitation | -| Git LFS | `git lfs pull` required after archive if LFS-tracked files are needed for indexing | +| Case | Handling | +| ---------------------------- | ---------------------------------------------------------------------------------- | +| Annotated tags | `rev-parse ^{commit}` peels to commit automatically | +| Mutable tags (e.g. `latest`) | Re-resolve on re-index; warn in UI if hash has changed | +| Branch as version | `rev-parse origin/^{commit}` gives tip; re-resolves on re-index | +| Shallow clone | Run `git fetch --unshallow` before `git archive` if commit is unavailable | +| Submodules | `git archive --recurse-submodules` or document as a known limitation | +| Git LFS | `git lfs pull` required after archive if LFS-tracked files are needed for indexing | ### Acceptance Criteria @@ -169,12 +170,12 @@ fi Username conventions by server type: -| Server | HTTPS username | Password | -|--------|---------------|----------| -| Bitbucket Server / Data Center | `x-token-auth` | HTTP access token | -| Bitbucket Cloud | account username | App password | -| GitLab (self-hosted or cloud) | `oauth2` | Personal access token | -| GitLab deploy token | `gitlab-deploy-token` | Deploy token secret | +| Server | HTTPS username | Password | +| ------------------------------ | --------------------- | --------------------- | +| Bitbucket Server / Data Center | `x-token-auth` | HTTP access token | +| Bitbucket Cloud | account username | App password | +| GitLab (self-hosted or cloud) | `oauth2` | Personal access token | +| GitLab deploy token | `gitlab-deploy-token` | Deploy token secret | SSH authentication is also supported and preferred for long-lived deployments. The host SSH configuration (`~/.ssh/config`) handles per-host key selection and travels into the container via volume mount. @@ -220,7 +221,7 @@ services: web: build: . ports: - - "3000:3000" + - '3000:3000' volumes: - trueref-data:/data - ${USERPROFILE}/.ssh:/root/.ssh:ro @@ -228,20 +229,20 @@ services: - ${CORP_CA_CERT}:/certs/corp-ca.crt:ro environment: DATABASE_URL: /data/trueref.db - GIT_TOKEN_BITBUCKET: "${BITBUCKET_TOKEN}" - GIT_TOKEN_GITLAB: "${GITLAB_TOKEN}" - BITBUCKET_HOST: "${BITBUCKET_HOST}" - GITLAB_HOST: "${GITLAB_HOST}" + GIT_TOKEN_BITBUCKET: '${BITBUCKET_TOKEN}' + GIT_TOKEN_GITLAB: '${GITLAB_TOKEN}' + BITBUCKET_HOST: '${BITBUCKET_HOST}' + GITLAB_HOST: '${GITLAB_HOST}' restart: unless-stopped mcp: build: . command: mcp ports: - - "3001:3001" + - '3001:3001' environment: TRUEREF_API_URL: http://web:3000 - MCP_PORT: "3001" + MCP_PORT: '3001' depends_on: - web restart: unless-stopped diff --git a/docs/features/TRUEREF-0020.md b/docs/features/TRUEREF-0020.md index 6b3d996..9162494 100644 --- a/docs/features/TRUEREF-0020.md +++ b/docs/features/TRUEREF-0020.md @@ -107,16 +107,16 @@ An embedding profile is persisted configuration selecting one provider adapter p ```typescript interface EmbeddingProfile { - id: string; - providerKind: string; - title: string; - enabled: boolean; - isDefault: boolean; - config: Record; - model: string; - dimensions: number; - createdAt: number; - updatedAt: number; + id: string; + providerKind: string; + title: string; + enabled: boolean; + isDefault: boolean; + config: Record; + model: string; + dimensions: number; + createdAt: number; + updatedAt: number; } ``` @@ -317,4 +317,4 @@ Indexing must embed snippets against the default active profile, and profile cha ## Notes -This ticket intentionally leaves `libs/search` as keyword-only. The caller is expected to identify the target library and, when needed, pass a version-qualified library ID such as `/owner/repo/v1.2.3` before requesting semantic retrieval. \ No newline at end of file +This ticket intentionally leaves `libs/search` as keyword-only. The caller is expected to identify the target library and, when needed, pass a version-qualified library ID such as `/owner/repo/v1.2.3` before requesting semantic retrieval. diff --git a/src/hooks.server.ts b/src/hooks.server.ts index 2a783ef..56fb88d 100644 --- a/src/hooks.server.ts +++ b/src/hooks.server.ts @@ -9,7 +9,11 @@ import { initializeDatabase } from '$lib/server/db/index.js'; import { getClient } from '$lib/server/db/client.js'; import { initializePipeline } from '$lib/server/pipeline/startup.js'; -import { EMBEDDING_CONFIG_KEY, createProviderFromConfig, defaultEmbeddingConfig } from '$lib/server/embeddings/factory.js'; +import { + EMBEDDING_CONFIG_KEY, + createProviderFromConfig, + defaultEmbeddingConfig +} from '$lib/server/embeddings/factory.js'; import { EmbeddingService } from '$lib/server/embeddings/embedding.service.js'; import type { EmbeddingConfig } from '$lib/server/embeddings/factory.js'; import type { Handle } from '@sveltejs/kit'; diff --git a/src/lib/components/AddRepositoryModal.svelte b/src/lib/components/AddRepositoryModal.svelte index ca9fa9b..8ce3feb 100644 --- a/src/lib/components/AddRepositoryModal.svelte +++ b/src/lib/components/AddRepositoryModal.svelte @@ -115,7 +115,12 @@ /> {:else}
- { if (!title) title = p.split('/').at(-1) ?? ''; }} /> + { + if (!title) title = p.split('/').at(-1) ?? ''; + }} + />
{/if}
@@ -133,7 +138,8 @@ {#if source === 'github'}