From fef6f669303d56b9170226074b7787e69c93200c Mon Sep 17 00:00:00 2001 From: Giancarmine Salucci Date: Wed, 25 Mar 2026 19:03:22 +0100 Subject: [PATCH] wip(TRUEREF-0018): commit version-scoped indexing work --- .env.example | 20 + .github/agents | 1 + .github/schemas | 1 + .github/skills | 1 + Dockerfile | 3 + README.md | 90 +++ docker-compose.yml | 11 + docker-entrypoint.sh | 45 ++ src/lib/server/api/formatters.test.ts | 1 + .../db/migrations/0001_quick_nighthawk.sql | 1 + .../db/migrations/meta/0001_snapshot.json | 746 ++++++++++++++++++ .../server/db/migrations/meta/_journal.json | 7 + src/lib/server/db/schema.ts | 3 +- .../mappers/repository-version.mapper.ts | 2 + src/lib/server/models/repository-version.ts | 9 + src/lib/server/search/search.service.test.ts | 3 +- .../server/services/version.service.test.ts | 23 +- src/lib/server/services/version.service.ts | 75 +- src/lib/server/utils/git.ts | 163 ++++ src/lib/types.ts | 2 +- .../api/v1/api-contract.integration.test.ts | 20 +- 21 files changed, 1208 insertions(+), 19 deletions(-) create mode 120000 .github/agents create mode 120000 .github/schemas create mode 120000 .github/skills create mode 100644 src/lib/server/db/migrations/0001_quick_nighthawk.sql create mode 100644 src/lib/server/db/migrations/meta/0001_snapshot.json create mode 100644 src/lib/server/utils/git.ts diff --git a/.env.example b/.env.example index b86d245..d48f85d 100644 --- a/.env.example +++ b/.env.example @@ -1,2 +1,22 @@ # Drizzle DATABASE_URL=local.db + +# --------------------------------------------------------------------------- +# Corporate Deployment (TRUEREF-0019) +# --------------------------------------------------------------------------- +# These variables enable TrueRef to work with private git repositories +# hosted on corporate Bitbucket Server/Data Center or self-hosted GitLab. + +# Corporate CA certificate (PEM or DER format, auto-detected) +# Export from Windows: certmgr.msc → Trusted Root → Export with Base64 +# CORP_CA_CERT=C:/path/to/corp-ca.crt + +# Git remote hostnames (without https://) +# BITBUCKET_HOST=bitbucket.corp.example.com +# GITLAB_HOST=gitlab.corp.example.com + +# Personal access tokens (NEVER commit these to version control) +# Bitbucket Server: Settings → HTTP access tokens +# GitLab: User Settings → Access Tokens → read_repository scope +# GIT_TOKEN_BITBUCKET= +# GIT_TOKEN_GITLAB= diff --git a/.github/agents b/.github/agents new file mode 120000 index 0000000..e9ce9b9 --- /dev/null +++ b/.github/agents @@ -0,0 +1 @@ +/home/moze/Sources/copilot-agents/.github/agents \ No newline at end of file diff --git a/.github/schemas b/.github/schemas new file mode 120000 index 0000000..f200112 --- /dev/null +++ b/.github/schemas @@ -0,0 +1 @@ +/home/moze/Sources/copilot-agents/.github/schemas \ No newline at end of file diff --git a/.github/skills b/.github/skills new file mode 120000 index 0000000..74398ae --- /dev/null +++ b/.github/skills @@ -0,0 +1 @@ +/home/moze/Sources/copilot-agents/.github/skills \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 4ce7725..7cf8215 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,9 @@ RUN npm run build FROM node:20-alpine WORKDIR /app +# Install openssl for corporate CA certificate handling (TRUEREF-0019) +RUN apk add --no-cache openssl git + # Install all deps (includes tsx for the MCP server and drizzle-kit for migrations) COPY package*.json ./ RUN npm ci diff --git a/README.md b/README.md index d2336a8..0ab22fc 100644 --- a/README.md +++ b/README.md @@ -645,6 +645,96 @@ This builds the image and starts two services: The SQLite database is stored in a named Docker volume (`trueref-data`) and persists across restarts. +### Corporate deployment + +TrueRef supports deployment in corporate environments with private git repositories hosted on Bitbucket Server/Data Center and self-hosted GitLab instances (TRUEREF-0019). + +#### Required setup + +1. **Export your corporate CA certificate** (Windows): + - Open `certmgr.msc` → Trusted Root Certification Authorities + - Right-click your corporate CA → All Tasks → Export + - Choose Base64-encoded X.509 (.CER) format + - Save to a known location (e.g., `C:\certs\corp-ca.crt`) + +2. **Generate personal access tokens**: + - Bitbucket Server: Settings → HTTP access tokens (requires `REPO_READ` permission) + - GitLab: User Settings → Access Tokens (requires `read_repository` scope) + +3. **Update `.env` file**: + +```env +# Corporate CA certificate path (PEM or DER — auto-detected) +CORP_CA_CERT=C:/path/to/corp-ca.crt + +# Git remote hostnames (without https://) +BITBUCKET_HOST=bitbucket.corp.example.com +GITLAB_HOST=gitlab.corp.example.com + +# Personal access tokens (NEVER commit these) +GIT_TOKEN_BITBUCKET=your-bitbucket-token-here +GIT_TOKEN_GITLAB=your-gitlab-token-here +``` + +4. **Uncomment volume mounts in `docker-compose.yml`**: + +```yaml +services: + web: + volumes: + - trueref-data:/data + - ${USERPROFILE:-$HOME}/.ssh:/root/.ssh:ro + - ${USERPROFILE:-$HOME}/.gitconfig:/root/.gitconfig:ro + - ${CORP_CA_CERT}:/certs/corp-ca.crt:ro + environment: + BITBUCKET_HOST: "${BITBUCKET_HOST}" + GITLAB_HOST: "${GITLAB_HOST}" + GIT_TOKEN_BITBUCKET: "${GIT_TOKEN_BITBUCKET}" + GIT_TOKEN_GITLAB: "${GIT_TOKEN_GITLAB}" +``` + +5. **Start the services**: + +```sh +docker compose up --build +``` + +#### How it works + +The Docker entrypoint script (`docker-entrypoint.sh`) runs these steps in order: + +1. **Trust corporate CA**: Detects PEM/DER format and installs the certificate at the OS level so git, curl, and Node.js fetch all trust it automatically. +2. **Fix SSH key permissions**: Corrects world-readable permissions from Windows NTFS mounts so SSH works properly. +3. **Configure git credentials**: Sets up per-host credential helpers that provide the correct username and token for each remote. + +This setup works for: +- HTTPS cloning with personal access tokens +- SSH cloning with mounted SSH keys +- On-premise servers with custom CA certificates +- Mixed environments (multiple git remotes with different credentials) + +#### SSH authentication (alternative to HTTPS) + +For long-lived deployments, SSH authentication is recommended: + +1. Generate an SSH key pair if you don't have one: + ```sh + ssh-keygen -t ed25519 -C "trueref@your-company.com" + ``` + +2. Add the public key to your git hosting service: + - Bitbucket: Settings → SSH keys + - GitLab: User Settings → SSH Keys + +3. Ensure your `~/.ssh/config` has the correct host entries: + ``` + Host bitbucket.corp.example.com + IdentityFile ~/.ssh/id_ed25519 + User git + ``` + +4. The Docker Compose configuration already mounts `~/.ssh` read-only — no additional changes needed. + ### Environment variables | Variable | Default | Description | diff --git a/docker-compose.yml b/docker-compose.yml index 6841621..7e88f73 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,8 +5,19 @@ services: - "3000:3000" volumes: - trueref-data:/data + # Corporate deployment support (TRUEREF-0019) + # Uncomment these lines for corporate environments: + # - ${USERPROFILE:-$HOME}/.ssh:/root/.ssh:ro + # - ${USERPROFILE:-$HOME}/.gitconfig:/root/.gitconfig:ro + # - ${CORP_CA_CERT}:/certs/corp-ca.crt:ro environment: DATABASE_URL: /data/trueref.db + # Corporate git credentials (TRUEREF-0019) + # Uncomment and configure in .env for private repositories: + # BITBUCKET_HOST: "${BITBUCKET_HOST}" + # GITLAB_HOST: "${GITLAB_HOST}" + # GIT_TOKEN_BITBUCKET: "${GIT_TOKEN_BITBUCKET}" + # GIT_TOKEN_GITLAB: "${GIT_TOKEN_GITLAB}" restart: unless-stopped mcp: diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 8e0725c..f156dc3 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -1,6 +1,51 @@ #!/bin/sh set -e +# --------------------------------------------------------------------------- +# 1. Trust corporate CA — must run first +# --------------------------------------------------------------------------- +if [ -f /certs/corp-ca.crt ]; then + echo "[docker-entrypoint] Installing corporate CA certificate..." + if openssl x509 -inform PEM -in /certs/corp-ca.crt -noout 2>/dev/null; then + # PEM format — copy directly + cp /certs/corp-ca.crt /usr/local/share/ca-certificates/corp-ca.crt + else + # DER format — convert to PEM + openssl x509 -inform DER -in /certs/corp-ca.crt \ + -out /usr/local/share/ca-certificates/corp-ca.crt + fi + update-ca-certificates 2>/dev/null +fi + +# --------------------------------------------------------------------------- +# 2. Fix SSH key permissions (Windows mounts arrive world-readable) +# --------------------------------------------------------------------------- +if [ -d /root/.ssh ]; then + echo "[docker-entrypoint] Fixing SSH key permissions..." + chmod 700 /root/.ssh + chmod 600 /root/.ssh/* 2>/dev/null || true +fi + +# --------------------------------------------------------------------------- +# 3. Per-host HTTPS credential helpers +# --------------------------------------------------------------------------- +if [ -n "$GIT_TOKEN_BITBUCKET" ] && [ -n "$BITBUCKET_HOST" ]; then + echo "[docker-entrypoint] Configuring Bitbucket credential helper for ${BITBUCKET_HOST}..." + git config --global \ + "credential.https://${BITBUCKET_HOST}.helper" \ + "!f() { echo username=x-token-auth; echo password=\$GIT_TOKEN_BITBUCKET; }; f" +fi + +if [ -n "$GIT_TOKEN_GITLAB" ] && [ -n "$GITLAB_HOST" ]; then + echo "[docker-entrypoint] Configuring GitLab credential helper for ${GITLAB_HOST}..." + git config --global \ + "credential.https://${GITLAB_HOST}.helper" \ + "!f() { echo username=oauth2; echo password=\$GIT_TOKEN_GITLAB; }; f" +fi + +# --------------------------------------------------------------------------- +# 4. Start requested service +# --------------------------------------------------------------------------- case "${1:-web}" in web) echo "Running database migrations..." diff --git a/src/lib/server/api/formatters.test.ts b/src/lib/server/api/formatters.test.ts index a74993f..ad0c9c2 100644 --- a/src/lib/server/api/formatters.test.ts +++ b/src/lib/server/api/formatters.test.ts @@ -49,6 +49,7 @@ function makeVersion(tag: string): RepositoryVersion { repositoryId: '/facebook/react', tag, title: null, + commitHash: null, state: 'indexed', totalSnippets: 100, indexedAt: new Date(), diff --git a/src/lib/server/db/migrations/0001_quick_nighthawk.sql b/src/lib/server/db/migrations/0001_quick_nighthawk.sql new file mode 100644 index 0000000..dbec981 --- /dev/null +++ b/src/lib/server/db/migrations/0001_quick_nighthawk.sql @@ -0,0 +1 @@ +ALTER TABLE `repository_versions` ADD `commit_hash` text; \ No newline at end of file diff --git a/src/lib/server/db/migrations/meta/0001_snapshot.json b/src/lib/server/db/migrations/meta/0001_snapshot.json new file mode 100644 index 0000000..a2035ab --- /dev/null +++ b/src/lib/server/db/migrations/meta/0001_snapshot.json @@ -0,0 +1,746 @@ +{ + "version": "6", + "dialect": "sqlite", + "id": "60c9a1b5-449f-45fd-9b2d-1ab4cca78ab6", + "prevId": "9dec55ea-0c03-4c98-99a6-dd143b336791", + "tables": { + "documents": { + "name": "documents", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "version_id": { + "name": "version_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "file_path": { + "name": "file_path", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "language": { + "name": "language", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "token_count": { + "name": "token_count", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "checksum": { + "name": "checksum", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "indexed_at": { + "name": "indexed_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "documents_repository_id_repositories_id_fk": { + "name": "documents_repository_id_repositories_id_fk", + "tableFrom": "documents", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "documents_version_id_repository_versions_id_fk": { + "name": "documents_version_id_repository_versions_id_fk", + "tableFrom": "documents", + "tableTo": "repository_versions", + "columnsFrom": [ + "version_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "indexing_jobs": { + "name": "indexing_jobs", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "version_id": { + "name": "version_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'queued'" + }, + "progress": { + "name": "progress", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "total_files": { + "name": "total_files", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "processed_files": { + "name": "processed_files", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "error": { + "name": "error", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "started_at": { + "name": "started_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "completed_at": { + "name": "completed_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "indexing_jobs_repository_id_repositories_id_fk": { + "name": "indexing_jobs_repository_id_repositories_id_fk", + "tableFrom": "indexing_jobs", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "repositories": { + "name": "repositories", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "source": { + "name": "source", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "source_url": { + "name": "source_url", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "branch": { + "name": "branch", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": "'main'" + }, + "state": { + "name": "state", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'pending'" + }, + "total_snippets": { + "name": "total_snippets", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "total_tokens": { + "name": "total_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "trust_score": { + "name": "trust_score", + "type": "real", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "benchmark_score": { + "name": "benchmark_score", + "type": "real", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "stars": { + "name": "stars", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "github_token": { + "name": "github_token", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "last_indexed_at": { + "name": "last_indexed_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "repository_configs": { + "name": "repository_configs", + "columns": { + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "project_title": { + "name": "project_title", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "folders": { + "name": "folders", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "exclude_folders": { + "name": "exclude_folders", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "exclude_files": { + "name": "exclude_files", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "rules": { + "name": "rules", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "previous_versions": { + "name": "previous_versions", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "repository_configs_repository_id_repositories_id_fk": { + "name": "repository_configs_repository_id_repositories_id_fk", + "tableFrom": "repository_configs", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "repository_versions": { + "name": "repository_versions", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "tag": { + "name": "tag", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "commit_hash": { + "name": "commit_hash", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "state": { + "name": "state", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'pending'" + }, + "total_snippets": { + "name": "total_snippets", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "indexed_at": { + "name": "indexed_at", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "repository_versions_repository_id_repositories_id_fk": { + "name": "repository_versions_repository_id_repositories_id_fk", + "tableFrom": "repository_versions", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "settings": { + "name": "settings", + "columns": { + "key": { + "name": "key", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "value": { + "name": "value", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "updated_at": { + "name": "updated_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "snippet_embeddings": { + "name": "snippet_embeddings", + "columns": { + "snippet_id": { + "name": "snippet_id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "model": { + "name": "model", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "dimensions": { + "name": "dimensions", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "embedding": { + "name": "embedding", + "type": "blob", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "snippet_embeddings_snippet_id_snippets_id_fk": { + "name": "snippet_embeddings_snippet_id_snippets_id_fk", + "tableFrom": "snippet_embeddings", + "tableTo": "snippets", + "columnsFrom": [ + "snippet_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "snippets": { + "name": "snippets", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "document_id": { + "name": "document_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "version_id": { + "name": "version_id", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "language": { + "name": "language", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "breadcrumb": { + "name": "breadcrumb", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "token_count": { + "name": "token_count", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "snippets_document_id_documents_id_fk": { + "name": "snippets_document_id_documents_id_fk", + "tableFrom": "snippets", + "tableTo": "documents", + "columnsFrom": [ + "document_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "snippets_repository_id_repositories_id_fk": { + "name": "snippets_repository_id_repositories_id_fk", + "tableFrom": "snippets", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "snippets_version_id_repository_versions_id_fk": { + "name": "snippets_version_id_repository_versions_id_fk", + "tableFrom": "snippets", + "tableTo": "repository_versions", + "columnsFrom": [ + "version_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + } + }, + "views": {}, + "enums": {}, + "_meta": { + "schemas": {}, + "tables": {}, + "columns": {} + }, + "internal": { + "indexes": {} + } +} \ No newline at end of file diff --git a/src/lib/server/db/migrations/meta/_journal.json b/src/lib/server/db/migrations/meta/_journal.json index cccc65c..a1b1ea5 100644 --- a/src/lib/server/db/migrations/meta/_journal.json +++ b/src/lib/server/db/migrations/meta/_journal.json @@ -8,6 +8,13 @@ "when": 1774196053634, "tag": "0000_large_master_chief", "breakpoints": true + }, + { + "idx": 1, + "version": "6", + "when": 1774448049161, + "tag": "0001_quick_nighthawk", + "breakpoints": true } ] } \ No newline at end of file diff --git a/src/lib/server/db/schema.ts b/src/lib/server/db/schema.ts index 5581c4a..db72e05 100644 --- a/src/lib/server/db/schema.ts +++ b/src/lib/server/db/schema.ts @@ -37,6 +37,7 @@ export const repositoryVersions = sqliteTable('repository_versions', { .references(() => repositories.id, { onDelete: 'cascade' }), tag: text('tag').notNull(), // git tag or branch name title: text('title'), + commitHash: text('commit_hash'), // immutable commit SHA-1 resolved from tag state: text('state', { enum: ['pending', 'indexing', 'indexed', 'error'] }) @@ -135,7 +136,7 @@ export const repositoryConfigs = sqliteTable('repository_configs', { excludeFiles: text('exclude_files', { mode: 'json' }).$type(), rules: text('rules', { mode: 'json' }).$type(), previousVersions: text('previous_versions', { mode: 'json' }).$type< - { tag: string; title: string }[] + { tag: string; title: string; commitHash?: string }[] >(), updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull() }); diff --git a/src/lib/server/mappers/repository-version.mapper.ts b/src/lib/server/mappers/repository-version.mapper.ts index b1b3d83..ee4bd56 100644 --- a/src/lib/server/mappers/repository-version.mapper.ts +++ b/src/lib/server/mappers/repository-version.mapper.ts @@ -11,6 +11,7 @@ export class RepositoryVersionMapper { repositoryId: entity.repository_id, tag: entity.tag, title: entity.title, + commitHash: entity.commit_hash, state: entity.state, totalSnippets: entity.total_snippets ?? 0, indexedAt: entity.indexed_at != null ? new Date(entity.indexed_at * 1000) : null, @@ -24,6 +25,7 @@ export class RepositoryVersionMapper { repositoryId: domain.repositoryId, tag: domain.tag, title: domain.title, + commitHash: domain.commitHash, state: domain.state, totalSnippets: domain.totalSnippets, indexedAt: domain.indexedAt, diff --git a/src/lib/server/models/repository-version.ts b/src/lib/server/models/repository-version.ts index cf3ddcf..b1933e6 100644 --- a/src/lib/server/models/repository-version.ts +++ b/src/lib/server/models/repository-version.ts @@ -3,6 +3,7 @@ export interface RepositoryVersionEntityProps { repository_id: string; tag: string; title: string | null; + commit_hash: string | null; state: 'pending' | 'indexing' | 'indexed' | 'error'; total_snippets: number | null; indexed_at: number | null; @@ -14,6 +15,7 @@ export class RepositoryVersionEntity { repository_id: string; tag: string; title: string | null; + commit_hash: string | null; state: 'pending' | 'indexing' | 'indexed' | 'error'; total_snippets: number | null; indexed_at: number | null; @@ -24,6 +26,7 @@ export class RepositoryVersionEntity { this.repository_id = props.repository_id; this.tag = props.tag; this.title = props.title; + this.commit_hash = props.commit_hash; this.state = props.state; this.total_snippets = props.total_snippets; this.indexed_at = props.indexed_at; @@ -36,6 +39,7 @@ export interface RepositoryVersionProps { repositoryId: string; tag: string; title: string | null; + commitHash: string | null; state: 'pending' | 'indexing' | 'indexed' | 'error'; totalSnippets: number; indexedAt: Date | null; @@ -47,6 +51,7 @@ export class RepositoryVersion { repositoryId: string; tag: string; title: string | null; + commitHash: string | null; state: 'pending' | 'indexing' | 'indexed' | 'error'; totalSnippets: number; indexedAt: Date | null; @@ -57,6 +62,7 @@ export class RepositoryVersion { this.repositoryId = props.repositoryId; this.tag = props.tag; this.title = props.title; + this.commitHash = props.commitHash; this.state = props.state; this.totalSnippets = props.totalSnippets; this.indexedAt = props.indexedAt; @@ -69,6 +75,7 @@ export interface RepositoryVersionDtoProps { repositoryId: string; tag: string; title: string | null; + commitHash: string | null; state: 'pending' | 'indexing' | 'indexed' | 'error'; totalSnippets: number; indexedAt: Date | null; @@ -80,6 +87,7 @@ export class RepositoryVersionDto { repositoryId: string; tag: string; title: string | null; + commitHash: string | null; state: 'pending' | 'indexing' | 'indexed' | 'error'; totalSnippets: number; indexedAt: Date | null; @@ -90,6 +98,7 @@ export class RepositoryVersionDto { this.repositoryId = props.repositoryId; this.tag = props.tag; this.title = props.title; + this.commitHash = props.commitHash; this.state = props.state; this.totalSnippets = props.totalSnippets; this.indexedAt = props.indexedAt; diff --git a/src/lib/server/search/search.service.test.ts b/src/lib/server/search/search.service.test.ts index 31c8ddf..cec2e24 100644 --- a/src/lib/server/search/search.service.test.ts +++ b/src/lib/server/search/search.service.test.ts @@ -595,8 +595,7 @@ describe('formatLibraryResults', () => { id: '/facebook/react/v18', repositoryId: '/facebook/react', tag: 'v18', - title: 'React 18', - state: 'indexed', + title: 'React 18', commitHash: null, state: 'indexed', totalSnippets: 1000, indexedAt: null, createdAt: now diff --git a/src/lib/server/services/version.service.test.ts b/src/lib/server/services/version.service.test.ts index a7c6fe7..5df1949 100644 --- a/src/lib/server/services/version.service.test.ts +++ b/src/lib/server/services/version.service.test.ts @@ -23,17 +23,34 @@ function createTestDb(): Database.Database { client.pragma('foreign_keys = ON'); const migrationsFolder = join(import.meta.dirname, '../db/migrations'); - const migrationSql = readFileSync( + + // Apply all migration files in order + const migration0 = readFileSync( join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8' ); + const migration1 = readFileSync( + join(migrationsFolder, '0001_quick_nighthawk.sql'), + 'utf-8' + ); - const statements = migrationSql + // Apply first migration + const statements0 = migration0 .split('--> statement-breakpoint') .map((s) => s.trim()) .filter(Boolean); - for (const stmt of statements) { + for (const stmt of statements0) { + client.exec(stmt); + } + + // Apply second migration + const statements1 = migration1 + .split('--> statement-breakpoint') + .map((s) => s.trim()) + .filter(Boolean); + + for (const stmt of statements1) { client.exec(stmt); } diff --git a/src/lib/server/services/version.service.ts b/src/lib/server/services/version.service.ts index 9e69f9e..430c532 100644 --- a/src/lib/server/services/version.service.ts +++ b/src/lib/server/services/version.service.ts @@ -34,14 +34,22 @@ export class VersionService { * Add a new version record for a repository. * The version ID follows the convention: {repositoryId}/{tag} * + * @param commitHash Optional commit hash. If not provided and repository is local, + * will attempt to resolve the tag to a commit hash automatically. + * * @throws NotFoundError when the parent repository does not exist * @throws AlreadyExistsError when the tag is already registered */ - add(repositoryId: string, tag: string, title?: string): RepositoryVersion { + add( + repositoryId: string, + tag: string, + title?: string, + commitHash?: string + ): RepositoryVersion { // Verify parent repository exists. const repo = this.db - .prepare(`SELECT id FROM repositories WHERE id = ?`) - .get(repositoryId) as { id: string } | undefined; + .prepare(`SELECT id, source, source_url FROM repositories WHERE id = ?`) + .get(repositoryId) as { id: string; source: string; source_url: string } | undefined; if (!repo) { throw new NotFoundError(`Repository ${repositoryId} not found`); @@ -55,15 +63,29 @@ export class VersionService { throw new AlreadyExistsError(`Version ${tag} already exists for repository ${repositoryId}`); } + // For local repositories, attempt to resolve tag to commit hash if not provided + let resolvedCommitHash = commitHash; + if (!resolvedCommitHash && repo.source === 'local') { + try { + const { resolveTagToCommit } = require('$lib/server/utils/git.js'); + resolvedCommitHash = resolveTagToCommit({ repoPath: repo.source_url, tag }); + } catch (error) { + console.warn( + `[VersionService] Could not resolve tag '${tag}' to commit hash for ${repositoryId}: ${error instanceof Error ? error.message : String(error)}` + ); + // Continue without commit hash — non-blocking + } + } + const now = Math.floor(Date.now() / 1000); this.db .prepare( `INSERT INTO repository_versions - (id, repository_id, tag, title, state, total_snippets, indexed_at, created_at) - VALUES (?, ?, ?, ?, 'pending', 0, NULL, ?)` + (id, repository_id, tag, title, commit_hash, state, total_snippets, indexed_at, created_at) + VALUES (?, ?, ?, ?, ?, 'pending', 0, NULL, ?)` ) - .run(id, repositoryId, tag, title ?? null, now); + .run(id, repositoryId, tag, title ?? null, resolvedCommitHash ?? null, now); const row = this.db .prepare(`SELECT * FROM repository_versions WHERE id = ?`) @@ -105,11 +127,14 @@ export class VersionService { * Silently skips tags that are already registered (idempotent). * All new records are created with state = 'pending'. * + * Supports optional `commitHash` field to pin a version to a specific commit, + * overriding tag resolution (TRUEREF-0019). + * * @throws NotFoundError when the parent repository does not exist */ registerFromConfig( repositoryId: string, - previousVersions: { tag: string; title: string }[] + previousVersions: { tag: string; title: string; commitHash?: string }[] ): RepositoryVersion[] { // Verify parent repository exists. const repo = this.db @@ -122,7 +147,7 @@ export class VersionService { const registered: RepositoryVersion[] = []; - for (const { tag, title } of previousVersions) { + for (const { tag, title, commitHash } of previousVersions) { const existing = this.getByTag(repositoryId, tag); if (existing) { // Already registered — skip silently. @@ -130,10 +155,42 @@ export class VersionService { continue; } - const version = this.add(repositoryId, tag, title); + const version = this.add(repositoryId, tag, title, commitHash); registered.push(version); } return registered; } + + /** + * Discover all version tags from a local repository and return them + * along with their resolved commit hashes. + * + * This is used for tag auto-discovery when adding a repository or + * refreshing available versions (TRUEREF-0019). + * + * @returns Array of { tag, commitHash } objects, newest first + * @throws Error when repository is not local or git operations fail + */ + discoverTags(repositoryId: string): Array<{ tag: string; commitHash: string }> { + const repo = this.db + .prepare(`SELECT id, source, source_url FROM repositories WHERE id = ?`) + .get(repositoryId) as { id: string; source: string; source_url: string } | undefined; + + if (!repo) { + throw new NotFoundError(`Repository ${repositoryId} not found`); + } + + if (repo.source !== 'local') { + throw new Error('Tag discovery is only supported for local repositories'); + } + + const { discoverVersionTags, resolveTagToCommit } = require('$lib/server/utils/git.js'); + const tags = discoverVersionTags({ repoPath: repo.source_url }); + + return tags.map((tag: string) => { + const commitHash = resolveTagToCommit({ repoPath: repo.source_url, tag }); + return { tag, commitHash }; + }); + } } diff --git a/src/lib/server/utils/git.ts b/src/lib/server/utils/git.ts new file mode 100644 index 0000000..c13a3f6 --- /dev/null +++ b/src/lib/server/utils/git.ts @@ -0,0 +1,163 @@ +/** + * Git utilities for version indexing (TRUEREF-0019). + * + * Provides: + * - Tag-to-commit resolution + * - Tag auto-discovery + * - File extraction via `git archive` to temp directories + */ + +import { execSync } from 'node:child_process'; +import { mkdirSync, rmSync } from 'node:fs'; +import { join } from 'node:path'; + +export interface ResolveTagOptions { + repoPath: string; + tag: string; +} + +export interface DiscoverTagsOptions { + repoPath: string; +} + +export interface ExtractVersionOptions { + repoPath: string; + commitHash: string; + repositoryId: string; + versionTag: string; +} + +/** + * Resolve a git tag/branch to its underlying commit hash. + * + * Uses `git rev-parse ^{commit}` which automatically dereferences + * annotated tags to the commit they point at. + * + * @throws Error when the tag does not exist or git command fails + */ +export function resolveTagToCommit(options: ResolveTagOptions): string { + const { repoPath, tag } = options; + + try { + const commitHash = execSync(`git -C "${repoPath}" rev-parse "${tag}^{commit}"`, { + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'pipe'] + }).trim(); + + return commitHash; + } catch (error) { + throw new Error( + `Failed to resolve tag '${tag}' in repository at ${repoPath}: ${error instanceof Error ? error.message : String(error)}` + ); + } +} + +/** + * Discover all version tags in a repository. + * + * Returns an array of tag names sorted in reverse chronological order + * (most recent first). + * + * @throws Error when git command fails + */ +export function discoverVersionTags(options: DiscoverTagsOptions): string[] { + const { repoPath } = options; + + try { + // List all tags, sorted by commit date (newest first) + const output = execSync( + `git -C "${repoPath}" tag -l --sort=-creatordate`, + { + encoding: 'utf-8', + stdio: ['ignore', 'pipe', 'pipe'] + } + ).trim(); + + if (!output) return []; + + return output.split('\n').filter((tag) => tag.length > 0); + } catch (error) { + throw new Error( + `Failed to discover tags in repository at ${repoPath}: ${error instanceof Error ? error.message : String(error)}` + ); + } +} + +/** + * Extract a clean file tree from a specific commit using `git archive`. + * + * The extracted files are placed in a temporary directory under + * `prompts/{jira}/tmp/` with naming convention: + * `{repositoryId.replace(/\//g, '_')}-{versionTag}/` + * + * Example: + * repo: "/facebook/react" + * tag: "v18.3.0" + * → temp path: "{workspace}/prompts/TRUEREF-0019/tmp/_facebook_react-v18.3.0/" + * + * The temp directory MUST be deleted after indexing completes. + * + * @returns absolute path to the extracted directory + * @throws Error when git archive fails + */ +export function extractVersionToTemp(options: ExtractVersionOptions): string { + const { repoPath, commitHash, repositoryId, versionTag } = options; + + // Create workspace-local temp directory under prompts/ + // (agent-conventions rule: never use OS temp directories) + const workspaceRoot = process.cwd(); + const sanitizedRepoId = repositoryId.replace(/\//g, '_'); + const extractDirName = `${sanitizedRepoId}-${versionTag}`; + + // Note: This assumes a JIRA context exists. For non-JIRA workflows, + // you may need to adjust the temp path or pass it as a parameter. + const tempRoot = join(workspaceRoot, 'prompts', 'tmp'); + const extractPath = join(tempRoot, extractDirName); + + // Clean up any existing extraction for this version + try { + rmSync(extractPath, { recursive: true, force: true }); + } catch { + // Directory doesn't exist yet — no problem + } + + // Create temp directory + mkdirSync(extractPath, { recursive: true }); + + try { + // Extract files from the commit using git archive + // Format: tar (pipe directly to tar -x for extraction) + execSync(`git -C "${repoPath}" archive "${commitHash}" | tar -x -C "${extractPath}"`, { + stdio: ['ignore', 'pipe', 'pipe'], + shell: '/bin/sh' + }); + + return extractPath; + } catch (error) { + // Clean up on failure + try { + rmSync(extractPath, { recursive: true, force: true }); + } catch { + // Best effort cleanup + } + + throw new Error( + `Failed to extract commit ${commitHash} to ${extractPath}: ${error instanceof Error ? error.message : String(error)}` + ); + } +} + +/** + * Clean up a temp extraction directory created by extractVersionToTemp. + * + * This should be called after indexing completes (success or failure). + */ +export function cleanupTempExtraction(extractPath: string): void { + try { + rmSync(extractPath, { recursive: true, force: true }); + } catch (error) { + console.warn( + `[git.ts] Failed to cleanup temp extraction at ${extractPath}: ${error instanceof Error ? error.message : String(error)}` + ); + } +} diff --git a/src/lib/types.ts b/src/lib/types.ts index c41d8f3..37da65c 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -92,5 +92,5 @@ export interface TrueRefConfig { excludeFolders?: string[]; excludeFiles?: string[]; rules?: string[]; - previousVersions?: Array<{ tag: string; title: string }>; + previousVersions?: Array<{ tag: string; title: string; commitHash?: string }>; } diff --git a/src/routes/api/v1/api-contract.integration.test.ts b/src/routes/api/v1/api-contract.integration.test.ts index 65e90ae..981e068 100644 --- a/src/routes/api/v1/api-contract.integration.test.ts +++ b/src/routes/api/v1/api-contract.integration.test.ts @@ -35,14 +35,28 @@ function createTestDb(): Database.Database { client.pragma('foreign_keys = ON'); const migrationsFolder = join(import.meta.dirname, '../../../lib/server/db/migrations'); - const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8'); + + // Apply all migration files in order + const migration0 = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8'); + const migration1 = readFileSync(join(migrationsFolder, '0001_quick_nighthawk.sql'), 'utf-8'); - const statements = migrationSql + // Apply first migration + const statements0 = migration0 .split('--> statement-breakpoint') .map((statement) => statement.trim()) .filter(Boolean); - for (const statement of statements) { + for (const statement of statements0) { + client.exec(statement); + } + + // Apply second migration + const statements1 = migration1 + .split('--> statement-breakpoint') + .map((statement) => statement.trim()) + .filter(Boolean); + + for (const statement of statements1) { client.exec(statement); }