wip(TRUEREF-0018): commit version-scoped indexing work

This commit is contained in:
Giancarmine Salucci
2026-03-25 19:03:22 +01:00
parent b9d52405fa
commit fef6f66930
21 changed files with 1208 additions and 19 deletions

View File

@@ -1,2 +1,22 @@
# Drizzle # Drizzle
DATABASE_URL=local.db DATABASE_URL=local.db
# ---------------------------------------------------------------------------
# Corporate Deployment (TRUEREF-0019)
# ---------------------------------------------------------------------------
# These variables enable TrueRef to work with private git repositories
# hosted on corporate Bitbucket Server/Data Center or self-hosted GitLab.
# Corporate CA certificate (PEM or DER format, auto-detected)
# Export from Windows: certmgr.msc → Trusted Root → Export with Base64
# CORP_CA_CERT=C:/path/to/corp-ca.crt
# Git remote hostnames (without https://)
# BITBUCKET_HOST=bitbucket.corp.example.com
# GITLAB_HOST=gitlab.corp.example.com
# Personal access tokens (NEVER commit these to version control)
# Bitbucket Server: Settings → HTTP access tokens
# GitLab: User Settings → Access Tokens → read_repository scope
# GIT_TOKEN_BITBUCKET=
# GIT_TOKEN_GITLAB=

1
.github/agents vendored Symbolic link
View File

@@ -0,0 +1 @@
/home/moze/Sources/copilot-agents/.github/agents

1
.github/schemas vendored Symbolic link
View File

@@ -0,0 +1 @@
/home/moze/Sources/copilot-agents/.github/schemas

1
.github/skills vendored Symbolic link
View File

@@ -0,0 +1 @@
/home/moze/Sources/copilot-agents/.github/skills

View File

@@ -12,6 +12,9 @@ RUN npm run build
FROM node:20-alpine FROM node:20-alpine
WORKDIR /app WORKDIR /app
# Install openssl for corporate CA certificate handling (TRUEREF-0019)
RUN apk add --no-cache openssl git
# Install all deps (includes tsx for the MCP server and drizzle-kit for migrations) # Install all deps (includes tsx for the MCP server and drizzle-kit for migrations)
COPY package*.json ./ COPY package*.json ./
RUN npm ci RUN npm ci

View File

@@ -645,6 +645,96 @@ This builds the image and starts two services:
The SQLite database is stored in a named Docker volume (`trueref-data`) and persists across restarts. The SQLite database is stored in a named Docker volume (`trueref-data`) and persists across restarts.
### Corporate deployment
TrueRef supports deployment in corporate environments with private git repositories hosted on Bitbucket Server/Data Center and self-hosted GitLab instances (TRUEREF-0019).
#### Required setup
1. **Export your corporate CA certificate** (Windows):
- Open `certmgr.msc` → Trusted Root Certification Authorities
- Right-click your corporate CA → All Tasks → Export
- Choose Base64-encoded X.509 (.CER) format
- Save to a known location (e.g., `C:\certs\corp-ca.crt`)
2. **Generate personal access tokens**:
- Bitbucket Server: Settings → HTTP access tokens (requires `REPO_READ` permission)
- GitLab: User Settings → Access Tokens (requires `read_repository` scope)
3. **Update `.env` file**:
```env
# Corporate CA certificate path (PEM or DER — auto-detected)
CORP_CA_CERT=C:/path/to/corp-ca.crt
# Git remote hostnames (without https://)
BITBUCKET_HOST=bitbucket.corp.example.com
GITLAB_HOST=gitlab.corp.example.com
# Personal access tokens (NEVER commit these)
GIT_TOKEN_BITBUCKET=your-bitbucket-token-here
GIT_TOKEN_GITLAB=your-gitlab-token-here
```
4. **Uncomment volume mounts in `docker-compose.yml`**:
```yaml
services:
web:
volumes:
- trueref-data:/data
- ${USERPROFILE:-$HOME}/.ssh:/root/.ssh:ro
- ${USERPROFILE:-$HOME}/.gitconfig:/root/.gitconfig:ro
- ${CORP_CA_CERT}:/certs/corp-ca.crt:ro
environment:
BITBUCKET_HOST: "${BITBUCKET_HOST}"
GITLAB_HOST: "${GITLAB_HOST}"
GIT_TOKEN_BITBUCKET: "${GIT_TOKEN_BITBUCKET}"
GIT_TOKEN_GITLAB: "${GIT_TOKEN_GITLAB}"
```
5. **Start the services**:
```sh
docker compose up --build
```
#### How it works
The Docker entrypoint script (`docker-entrypoint.sh`) runs these steps in order:
1. **Trust corporate CA**: Detects PEM/DER format and installs the certificate at the OS level so git, curl, and Node.js fetch all trust it automatically.
2. **Fix SSH key permissions**: Corrects world-readable permissions from Windows NTFS mounts so SSH works properly.
3. **Configure git credentials**: Sets up per-host credential helpers that provide the correct username and token for each remote.
This setup works for:
- HTTPS cloning with personal access tokens
- SSH cloning with mounted SSH keys
- On-premise servers with custom CA certificates
- Mixed environments (multiple git remotes with different credentials)
#### SSH authentication (alternative to HTTPS)
For long-lived deployments, SSH authentication is recommended:
1. Generate an SSH key pair if you don't have one:
```sh
ssh-keygen -t ed25519 -C "trueref@your-company.com"
```
2. Add the public key to your git hosting service:
- Bitbucket: Settings → SSH keys
- GitLab: User Settings → SSH Keys
3. Ensure your `~/.ssh/config` has the correct host entries:
```
Host bitbucket.corp.example.com
IdentityFile ~/.ssh/id_ed25519
User git
```
4. The Docker Compose configuration already mounts `~/.ssh` read-only — no additional changes needed.
### Environment variables ### Environment variables
| Variable | Default | Description | | Variable | Default | Description |

View File

@@ -5,8 +5,19 @@ services:
- "3000:3000" - "3000:3000"
volumes: volumes:
- trueref-data:/data - trueref-data:/data
# Corporate deployment support (TRUEREF-0019)
# Uncomment these lines for corporate environments:
# - ${USERPROFILE:-$HOME}/.ssh:/root/.ssh:ro
# - ${USERPROFILE:-$HOME}/.gitconfig:/root/.gitconfig:ro
# - ${CORP_CA_CERT}:/certs/corp-ca.crt:ro
environment: environment:
DATABASE_URL: /data/trueref.db DATABASE_URL: /data/trueref.db
# Corporate git credentials (TRUEREF-0019)
# Uncomment and configure in .env for private repositories:
# BITBUCKET_HOST: "${BITBUCKET_HOST}"
# GITLAB_HOST: "${GITLAB_HOST}"
# GIT_TOKEN_BITBUCKET: "${GIT_TOKEN_BITBUCKET}"
# GIT_TOKEN_GITLAB: "${GIT_TOKEN_GITLAB}"
restart: unless-stopped restart: unless-stopped
mcp: mcp:

View File

@@ -1,6 +1,51 @@
#!/bin/sh #!/bin/sh
set -e set -e
# ---------------------------------------------------------------------------
# 1. Trust corporate CA — must run first
# ---------------------------------------------------------------------------
if [ -f /certs/corp-ca.crt ]; then
echo "[docker-entrypoint] Installing corporate CA certificate..."
if openssl x509 -inform PEM -in /certs/corp-ca.crt -noout 2>/dev/null; then
# PEM format — copy directly
cp /certs/corp-ca.crt /usr/local/share/ca-certificates/corp-ca.crt
else
# DER format — convert to PEM
openssl x509 -inform DER -in /certs/corp-ca.crt \
-out /usr/local/share/ca-certificates/corp-ca.crt
fi
update-ca-certificates 2>/dev/null
fi
# ---------------------------------------------------------------------------
# 2. Fix SSH key permissions (Windows mounts arrive world-readable)
# ---------------------------------------------------------------------------
if [ -d /root/.ssh ]; then
echo "[docker-entrypoint] Fixing SSH key permissions..."
chmod 700 /root/.ssh
chmod 600 /root/.ssh/* 2>/dev/null || true
fi
# ---------------------------------------------------------------------------
# 3. Per-host HTTPS credential helpers
# ---------------------------------------------------------------------------
if [ -n "$GIT_TOKEN_BITBUCKET" ] && [ -n "$BITBUCKET_HOST" ]; then
echo "[docker-entrypoint] Configuring Bitbucket credential helper for ${BITBUCKET_HOST}..."
git config --global \
"credential.https://${BITBUCKET_HOST}.helper" \
"!f() { echo username=x-token-auth; echo password=\$GIT_TOKEN_BITBUCKET; }; f"
fi
if [ -n "$GIT_TOKEN_GITLAB" ] && [ -n "$GITLAB_HOST" ]; then
echo "[docker-entrypoint] Configuring GitLab credential helper for ${GITLAB_HOST}..."
git config --global \
"credential.https://${GITLAB_HOST}.helper" \
"!f() { echo username=oauth2; echo password=\$GIT_TOKEN_GITLAB; }; f"
fi
# ---------------------------------------------------------------------------
# 4. Start requested service
# ---------------------------------------------------------------------------
case "${1:-web}" in case "${1:-web}" in
web) web)
echo "Running database migrations..." echo "Running database migrations..."

View File

@@ -49,6 +49,7 @@ function makeVersion(tag: string): RepositoryVersion {
repositoryId: '/facebook/react', repositoryId: '/facebook/react',
tag, tag,
title: null, title: null,
commitHash: null,
state: 'indexed', state: 'indexed',
totalSnippets: 100, totalSnippets: 100,
indexedAt: new Date(), indexedAt: new Date(),

View File

@@ -0,0 +1 @@
ALTER TABLE `repository_versions` ADD `commit_hash` text;

View File

@@ -0,0 +1,746 @@
{
"version": "6",
"dialect": "sqlite",
"id": "60c9a1b5-449f-45fd-9b2d-1ab4cca78ab6",
"prevId": "9dec55ea-0c03-4c98-99a6-dd143b336791",
"tables": {
"documents": {
"name": "documents",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"file_path": {
"name": "file_path",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"language": {
"name": "language",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"token_count": {
"name": "token_count",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"checksum": {
"name": "checksum",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"indexed_at": {
"name": "indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"documents_repository_id_repositories_id_fk": {
"name": "documents_repository_id_repositories_id_fk",
"tableFrom": "documents",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"documents_version_id_repository_versions_id_fk": {
"name": "documents_version_id_repository_versions_id_fk",
"tableFrom": "documents",
"tableTo": "repository_versions",
"columnsFrom": [
"version_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"indexing_jobs": {
"name": "indexing_jobs",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"status": {
"name": "status",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'queued'"
},
"progress": {
"name": "progress",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"total_files": {
"name": "total_files",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"processed_files": {
"name": "processed_files",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"error": {
"name": "error",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"started_at": {
"name": "started_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"completed_at": {
"name": "completed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"indexing_jobs_repository_id_repositories_id_fk": {
"name": "indexing_jobs_repository_id_repositories_id_fk",
"tableFrom": "indexing_jobs",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repositories": {
"name": "repositories",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"source": {
"name": "source",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"source_url": {
"name": "source_url",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"branch": {
"name": "branch",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": "'main'"
},
"state": {
"name": "state",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'pending'"
},
"total_snippets": {
"name": "total_snippets",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"total_tokens": {
"name": "total_tokens",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"trust_score": {
"name": "trust_score",
"type": "real",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"benchmark_score": {
"name": "benchmark_score",
"type": "real",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"stars": {
"name": "stars",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"github_token": {
"name": "github_token",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"last_indexed_at": {
"name": "last_indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repository_configs": {
"name": "repository_configs",
"columns": {
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"project_title": {
"name": "project_title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"folders": {
"name": "folders",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"exclude_folders": {
"name": "exclude_folders",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"exclude_files": {
"name": "exclude_files",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"rules": {
"name": "rules",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"previous_versions": {
"name": "previous_versions",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"repository_configs_repository_id_repositories_id_fk": {
"name": "repository_configs_repository_id_repositories_id_fk",
"tableFrom": "repository_configs",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"repository_versions": {
"name": "repository_versions",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"tag": {
"name": "tag",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"commit_hash": {
"name": "commit_hash",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"state": {
"name": "state",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false,
"default": "'pending'"
},
"total_snippets": {
"name": "total_snippets",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"indexed_at": {
"name": "indexed_at",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"repository_versions_repository_id_repositories_id_fk": {
"name": "repository_versions_repository_id_repositories_id_fk",
"tableFrom": "repository_versions",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"settings": {
"name": "settings",
"columns": {
"key": {
"name": "key",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"value": {
"name": "value",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"snippet_embeddings": {
"name": "snippet_embeddings",
"columns": {
"snippet_id": {
"name": "snippet_id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"model": {
"name": "model",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"dimensions": {
"name": "dimensions",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"embedding": {
"name": "embedding",
"type": "blob",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"snippet_embeddings_snippet_id_snippets_id_fk": {
"name": "snippet_embeddings_snippet_id_snippets_id_fk",
"tableFrom": "snippet_embeddings",
"tableTo": "snippets",
"columnsFrom": [
"snippet_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"snippets": {
"name": "snippets",
"columns": {
"id": {
"name": "id",
"type": "text",
"primaryKey": true,
"notNull": true,
"autoincrement": false
},
"document_id": {
"name": "document_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"repository_id": {
"name": "repository_id",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"version_id": {
"name": "version_id",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"type": {
"name": "type",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"title": {
"name": "title",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"content": {
"name": "content",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"language": {
"name": "language",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"breadcrumb": {
"name": "breadcrumb",
"type": "text",
"primaryKey": false,
"notNull": false,
"autoincrement": false
},
"token_count": {
"name": "token_count",
"type": "integer",
"primaryKey": false,
"notNull": false,
"autoincrement": false,
"default": 0
},
"created_at": {
"name": "created_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {
"snippets_document_id_documents_id_fk": {
"name": "snippets_document_id_documents_id_fk",
"tableFrom": "snippets",
"tableTo": "documents",
"columnsFrom": [
"document_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"snippets_repository_id_repositories_id_fk": {
"name": "snippets_repository_id_repositories_id_fk",
"tableFrom": "snippets",
"tableTo": "repositories",
"columnsFrom": [
"repository_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
},
"snippets_version_id_repository_versions_id_fk": {
"name": "snippets_version_id_repository_versions_id_fk",
"tableFrom": "snippets",
"tableTo": "repository_versions",
"columnsFrom": [
"version_id"
],
"columnsTo": [
"id"
],
"onDelete": "cascade",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
}
},
"views": {},
"enums": {},
"_meta": {
"schemas": {},
"tables": {},
"columns": {}
},
"internal": {
"indexes": {}
}
}

View File

@@ -8,6 +8,13 @@
"when": 1774196053634, "when": 1774196053634,
"tag": "0000_large_master_chief", "tag": "0000_large_master_chief",
"breakpoints": true "breakpoints": true
},
{
"idx": 1,
"version": "6",
"when": 1774448049161,
"tag": "0001_quick_nighthawk",
"breakpoints": true
} }
] ]
} }

View File

@@ -37,6 +37,7 @@ export const repositoryVersions = sqliteTable('repository_versions', {
.references(() => repositories.id, { onDelete: 'cascade' }), .references(() => repositories.id, { onDelete: 'cascade' }),
tag: text('tag').notNull(), // git tag or branch name tag: text('tag').notNull(), // git tag or branch name
title: text('title'), title: text('title'),
commitHash: text('commit_hash'), // immutable commit SHA-1 resolved from tag
state: text('state', { state: text('state', {
enum: ['pending', 'indexing', 'indexed', 'error'] enum: ['pending', 'indexing', 'indexed', 'error']
}) })
@@ -135,7 +136,7 @@ export const repositoryConfigs = sqliteTable('repository_configs', {
excludeFiles: text('exclude_files', { mode: 'json' }).$type<string[]>(), excludeFiles: text('exclude_files', { mode: 'json' }).$type<string[]>(),
rules: text('rules', { mode: 'json' }).$type<string[]>(), rules: text('rules', { mode: 'json' }).$type<string[]>(),
previousVersions: text('previous_versions', { mode: 'json' }).$type< previousVersions: text('previous_versions', { mode: 'json' }).$type<
{ tag: string; title: string }[] { tag: string; title: string; commitHash?: string }[]
>(), >(),
updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull() updatedAt: integer('updated_at', { mode: 'timestamp' }).notNull()
}); });

View File

@@ -11,6 +11,7 @@ export class RepositoryVersionMapper {
repositoryId: entity.repository_id, repositoryId: entity.repository_id,
tag: entity.tag, tag: entity.tag,
title: entity.title, title: entity.title,
commitHash: entity.commit_hash,
state: entity.state, state: entity.state,
totalSnippets: entity.total_snippets ?? 0, totalSnippets: entity.total_snippets ?? 0,
indexedAt: entity.indexed_at != null ? new Date(entity.indexed_at * 1000) : null, indexedAt: entity.indexed_at != null ? new Date(entity.indexed_at * 1000) : null,
@@ -24,6 +25,7 @@ export class RepositoryVersionMapper {
repositoryId: domain.repositoryId, repositoryId: domain.repositoryId,
tag: domain.tag, tag: domain.tag,
title: domain.title, title: domain.title,
commitHash: domain.commitHash,
state: domain.state, state: domain.state,
totalSnippets: domain.totalSnippets, totalSnippets: domain.totalSnippets,
indexedAt: domain.indexedAt, indexedAt: domain.indexedAt,

View File

@@ -3,6 +3,7 @@ export interface RepositoryVersionEntityProps {
repository_id: string; repository_id: string;
tag: string; tag: string;
title: string | null; title: string | null;
commit_hash: string | null;
state: 'pending' | 'indexing' | 'indexed' | 'error'; state: 'pending' | 'indexing' | 'indexed' | 'error';
total_snippets: number | null; total_snippets: number | null;
indexed_at: number | null; indexed_at: number | null;
@@ -14,6 +15,7 @@ export class RepositoryVersionEntity {
repository_id: string; repository_id: string;
tag: string; tag: string;
title: string | null; title: string | null;
commit_hash: string | null;
state: 'pending' | 'indexing' | 'indexed' | 'error'; state: 'pending' | 'indexing' | 'indexed' | 'error';
total_snippets: number | null; total_snippets: number | null;
indexed_at: number | null; indexed_at: number | null;
@@ -24,6 +26,7 @@ export class RepositoryVersionEntity {
this.repository_id = props.repository_id; this.repository_id = props.repository_id;
this.tag = props.tag; this.tag = props.tag;
this.title = props.title; this.title = props.title;
this.commit_hash = props.commit_hash;
this.state = props.state; this.state = props.state;
this.total_snippets = props.total_snippets; this.total_snippets = props.total_snippets;
this.indexed_at = props.indexed_at; this.indexed_at = props.indexed_at;
@@ -36,6 +39,7 @@ export interface RepositoryVersionProps {
repositoryId: string; repositoryId: string;
tag: string; tag: string;
title: string | null; title: string | null;
commitHash: string | null;
state: 'pending' | 'indexing' | 'indexed' | 'error'; state: 'pending' | 'indexing' | 'indexed' | 'error';
totalSnippets: number; totalSnippets: number;
indexedAt: Date | null; indexedAt: Date | null;
@@ -47,6 +51,7 @@ export class RepositoryVersion {
repositoryId: string; repositoryId: string;
tag: string; tag: string;
title: string | null; title: string | null;
commitHash: string | null;
state: 'pending' | 'indexing' | 'indexed' | 'error'; state: 'pending' | 'indexing' | 'indexed' | 'error';
totalSnippets: number; totalSnippets: number;
indexedAt: Date | null; indexedAt: Date | null;
@@ -57,6 +62,7 @@ export class RepositoryVersion {
this.repositoryId = props.repositoryId; this.repositoryId = props.repositoryId;
this.tag = props.tag; this.tag = props.tag;
this.title = props.title; this.title = props.title;
this.commitHash = props.commitHash;
this.state = props.state; this.state = props.state;
this.totalSnippets = props.totalSnippets; this.totalSnippets = props.totalSnippets;
this.indexedAt = props.indexedAt; this.indexedAt = props.indexedAt;
@@ -69,6 +75,7 @@ export interface RepositoryVersionDtoProps {
repositoryId: string; repositoryId: string;
tag: string; tag: string;
title: string | null; title: string | null;
commitHash: string | null;
state: 'pending' | 'indexing' | 'indexed' | 'error'; state: 'pending' | 'indexing' | 'indexed' | 'error';
totalSnippets: number; totalSnippets: number;
indexedAt: Date | null; indexedAt: Date | null;
@@ -80,6 +87,7 @@ export class RepositoryVersionDto {
repositoryId: string; repositoryId: string;
tag: string; tag: string;
title: string | null; title: string | null;
commitHash: string | null;
state: 'pending' | 'indexing' | 'indexed' | 'error'; state: 'pending' | 'indexing' | 'indexed' | 'error';
totalSnippets: number; totalSnippets: number;
indexedAt: Date | null; indexedAt: Date | null;
@@ -90,6 +98,7 @@ export class RepositoryVersionDto {
this.repositoryId = props.repositoryId; this.repositoryId = props.repositoryId;
this.tag = props.tag; this.tag = props.tag;
this.title = props.title; this.title = props.title;
this.commitHash = props.commitHash;
this.state = props.state; this.state = props.state;
this.totalSnippets = props.totalSnippets; this.totalSnippets = props.totalSnippets;
this.indexedAt = props.indexedAt; this.indexedAt = props.indexedAt;

View File

@@ -595,8 +595,7 @@ describe('formatLibraryResults', () => {
id: '/facebook/react/v18', id: '/facebook/react/v18',
repositoryId: '/facebook/react', repositoryId: '/facebook/react',
tag: 'v18', tag: 'v18',
title: 'React 18', title: 'React 18', commitHash: null, state: 'indexed',
state: 'indexed',
totalSnippets: 1000, totalSnippets: 1000,
indexedAt: null, indexedAt: null,
createdAt: now createdAt: now

View File

@@ -23,17 +23,34 @@ function createTestDb(): Database.Database {
client.pragma('foreign_keys = ON'); client.pragma('foreign_keys = ON');
const migrationsFolder = join(import.meta.dirname, '../db/migrations'); const migrationsFolder = join(import.meta.dirname, '../db/migrations');
const migrationSql = readFileSync(
// Apply all migration files in order
const migration0 = readFileSync(
join(migrationsFolder, '0000_large_master_chief.sql'), join(migrationsFolder, '0000_large_master_chief.sql'),
'utf-8' 'utf-8'
); );
const migration1 = readFileSync(
join(migrationsFolder, '0001_quick_nighthawk.sql'),
'utf-8'
);
const statements = migrationSql // Apply first migration
const statements0 = migration0
.split('--> statement-breakpoint') .split('--> statement-breakpoint')
.map((s) => s.trim()) .map((s) => s.trim())
.filter(Boolean); .filter(Boolean);
for (const stmt of statements) { for (const stmt of statements0) {
client.exec(stmt);
}
// Apply second migration
const statements1 = migration1
.split('--> statement-breakpoint')
.map((s) => s.trim())
.filter(Boolean);
for (const stmt of statements1) {
client.exec(stmt); client.exec(stmt);
} }

View File

@@ -34,14 +34,22 @@ export class VersionService {
* Add a new version record for a repository. * Add a new version record for a repository.
* The version ID follows the convention: {repositoryId}/{tag} * The version ID follows the convention: {repositoryId}/{tag}
* *
* @param commitHash Optional commit hash. If not provided and repository is local,
* will attempt to resolve the tag to a commit hash automatically.
*
* @throws NotFoundError when the parent repository does not exist * @throws NotFoundError when the parent repository does not exist
* @throws AlreadyExistsError when the tag is already registered * @throws AlreadyExistsError when the tag is already registered
*/ */
add(repositoryId: string, tag: string, title?: string): RepositoryVersion { add(
repositoryId: string,
tag: string,
title?: string,
commitHash?: string
): RepositoryVersion {
// Verify parent repository exists. // Verify parent repository exists.
const repo = this.db const repo = this.db
.prepare(`SELECT id FROM repositories WHERE id = ?`) .prepare(`SELECT id, source, source_url FROM repositories WHERE id = ?`)
.get(repositoryId) as { id: string } | undefined; .get(repositoryId) as { id: string; source: string; source_url: string } | undefined;
if (!repo) { if (!repo) {
throw new NotFoundError(`Repository ${repositoryId} not found`); throw new NotFoundError(`Repository ${repositoryId} not found`);
@@ -55,15 +63,29 @@ export class VersionService {
throw new AlreadyExistsError(`Version ${tag} already exists for repository ${repositoryId}`); throw new AlreadyExistsError(`Version ${tag} already exists for repository ${repositoryId}`);
} }
// For local repositories, attempt to resolve tag to commit hash if not provided
let resolvedCommitHash = commitHash;
if (!resolvedCommitHash && repo.source === 'local') {
try {
const { resolveTagToCommit } = require('$lib/server/utils/git.js');
resolvedCommitHash = resolveTagToCommit({ repoPath: repo.source_url, tag });
} catch (error) {
console.warn(
`[VersionService] Could not resolve tag '${tag}' to commit hash for ${repositoryId}: ${error instanceof Error ? error.message : String(error)}`
);
// Continue without commit hash — non-blocking
}
}
const now = Math.floor(Date.now() / 1000); const now = Math.floor(Date.now() / 1000);
this.db this.db
.prepare( .prepare(
`INSERT INTO repository_versions `INSERT INTO repository_versions
(id, repository_id, tag, title, state, total_snippets, indexed_at, created_at) (id, repository_id, tag, title, commit_hash, state, total_snippets, indexed_at, created_at)
VALUES (?, ?, ?, ?, 'pending', 0, NULL, ?)` VALUES (?, ?, ?, ?, ?, 'pending', 0, NULL, ?)`
) )
.run(id, repositoryId, tag, title ?? null, now); .run(id, repositoryId, tag, title ?? null, resolvedCommitHash ?? null, now);
const row = this.db const row = this.db
.prepare(`SELECT * FROM repository_versions WHERE id = ?`) .prepare(`SELECT * FROM repository_versions WHERE id = ?`)
@@ -105,11 +127,14 @@ export class VersionService {
* Silently skips tags that are already registered (idempotent). * Silently skips tags that are already registered (idempotent).
* All new records are created with state = 'pending'. * All new records are created with state = 'pending'.
* *
* Supports optional `commitHash` field to pin a version to a specific commit,
* overriding tag resolution (TRUEREF-0019).
*
* @throws NotFoundError when the parent repository does not exist * @throws NotFoundError when the parent repository does not exist
*/ */
registerFromConfig( registerFromConfig(
repositoryId: string, repositoryId: string,
previousVersions: { tag: string; title: string }[] previousVersions: { tag: string; title: string; commitHash?: string }[]
): RepositoryVersion[] { ): RepositoryVersion[] {
// Verify parent repository exists. // Verify parent repository exists.
const repo = this.db const repo = this.db
@@ -122,7 +147,7 @@ export class VersionService {
const registered: RepositoryVersion[] = []; const registered: RepositoryVersion[] = [];
for (const { tag, title } of previousVersions) { for (const { tag, title, commitHash } of previousVersions) {
const existing = this.getByTag(repositoryId, tag); const existing = this.getByTag(repositoryId, tag);
if (existing) { if (existing) {
// Already registered — skip silently. // Already registered — skip silently.
@@ -130,10 +155,42 @@ export class VersionService {
continue; continue;
} }
const version = this.add(repositoryId, tag, title); const version = this.add(repositoryId, tag, title, commitHash);
registered.push(version); registered.push(version);
} }
return registered; return registered;
} }
/**
* Discover all version tags from a local repository and return them
* along with their resolved commit hashes.
*
* This is used for tag auto-discovery when adding a repository or
* refreshing available versions (TRUEREF-0019).
*
* @returns Array of { tag, commitHash } objects, newest first
* @throws Error when repository is not local or git operations fail
*/
discoverTags(repositoryId: string): Array<{ tag: string; commitHash: string }> {
const repo = this.db
.prepare(`SELECT id, source, source_url FROM repositories WHERE id = ?`)
.get(repositoryId) as { id: string; source: string; source_url: string } | undefined;
if (!repo) {
throw new NotFoundError(`Repository ${repositoryId} not found`);
}
if (repo.source !== 'local') {
throw new Error('Tag discovery is only supported for local repositories');
}
const { discoverVersionTags, resolveTagToCommit } = require('$lib/server/utils/git.js');
const tags = discoverVersionTags({ repoPath: repo.source_url });
return tags.map((tag: string) => {
const commitHash = resolveTagToCommit({ repoPath: repo.source_url, tag });
return { tag, commitHash };
});
}
} }

163
src/lib/server/utils/git.ts Normal file
View File

@@ -0,0 +1,163 @@
/**
* Git utilities for version indexing (TRUEREF-0019).
*
* Provides:
* - Tag-to-commit resolution
* - Tag auto-discovery
* - File extraction via `git archive` to temp directories
*/
import { execSync } from 'node:child_process';
import { mkdirSync, rmSync } from 'node:fs';
import { join } from 'node:path';
export interface ResolveTagOptions {
repoPath: string;
tag: string;
}
export interface DiscoverTagsOptions {
repoPath: string;
}
export interface ExtractVersionOptions {
repoPath: string;
commitHash: string;
repositoryId: string;
versionTag: string;
}
/**
* Resolve a git tag/branch to its underlying commit hash.
*
* Uses `git rev-parse <ref>^{commit}` which automatically dereferences
* annotated tags to the commit they point at.
*
* @throws Error when the tag does not exist or git command fails
*/
export function resolveTagToCommit(options: ResolveTagOptions): string {
const { repoPath, tag } = options;
try {
const commitHash = execSync(`git -C "${repoPath}" rev-parse "${tag}^{commit}"`, {
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'pipe']
}).trim();
return commitHash;
} catch (error) {
throw new Error(
`Failed to resolve tag '${tag}' in repository at ${repoPath}: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Discover all version tags in a repository.
*
* Returns an array of tag names sorted in reverse chronological order
* (most recent first).
*
* @throws Error when git command fails
*/
export function discoverVersionTags(options: DiscoverTagsOptions): string[] {
const { repoPath } = options;
try {
// List all tags, sorted by commit date (newest first)
const output = execSync(
`git -C "${repoPath}" tag -l --sort=-creatordate`,
{
encoding: 'utf-8',
stdio: ['ignore', 'pipe', 'pipe']
}
).trim();
if (!output) return [];
return output.split('\n').filter((tag) => tag.length > 0);
} catch (error) {
throw new Error(
`Failed to discover tags in repository at ${repoPath}: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Extract a clean file tree from a specific commit using `git archive`.
*
* The extracted files are placed in a temporary directory under
* `prompts/{jira}/tmp/` with naming convention:
* `{repositoryId.replace(/\//g, '_')}-{versionTag}/`
*
* Example:
* repo: "/facebook/react"
* tag: "v18.3.0"
* → temp path: "{workspace}/prompts/TRUEREF-0019/tmp/_facebook_react-v18.3.0/"
*
* The temp directory MUST be deleted after indexing completes.
*
* @returns absolute path to the extracted directory
* @throws Error when git archive fails
*/
export function extractVersionToTemp(options: ExtractVersionOptions): string {
const { repoPath, commitHash, repositoryId, versionTag } = options;
// Create workspace-local temp directory under prompts/
// (agent-conventions rule: never use OS temp directories)
const workspaceRoot = process.cwd();
const sanitizedRepoId = repositoryId.replace(/\//g, '_');
const extractDirName = `${sanitizedRepoId}-${versionTag}`;
// Note: This assumes a JIRA context exists. For non-JIRA workflows,
// you may need to adjust the temp path or pass it as a parameter.
const tempRoot = join(workspaceRoot, 'prompts', 'tmp');
const extractPath = join(tempRoot, extractDirName);
// Clean up any existing extraction for this version
try {
rmSync(extractPath, { recursive: true, force: true });
} catch {
// Directory doesn't exist yet — no problem
}
// Create temp directory
mkdirSync(extractPath, { recursive: true });
try {
// Extract files from the commit using git archive
// Format: tar (pipe directly to tar -x for extraction)
execSync(`git -C "${repoPath}" archive "${commitHash}" | tar -x -C "${extractPath}"`, {
stdio: ['ignore', 'pipe', 'pipe'],
shell: '/bin/sh'
});
return extractPath;
} catch (error) {
// Clean up on failure
try {
rmSync(extractPath, { recursive: true, force: true });
} catch {
// Best effort cleanup
}
throw new Error(
`Failed to extract commit ${commitHash} to ${extractPath}: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Clean up a temp extraction directory created by extractVersionToTemp.
*
* This should be called after indexing completes (success or failure).
*/
export function cleanupTempExtraction(extractPath: string): void {
try {
rmSync(extractPath, { recursive: true, force: true });
} catch (error) {
console.warn(
`[git.ts] Failed to cleanup temp extraction at ${extractPath}: ${error instanceof Error ? error.message : String(error)}`
);
}
}

View File

@@ -92,5 +92,5 @@ export interface TrueRefConfig {
excludeFolders?: string[]; excludeFolders?: string[];
excludeFiles?: string[]; excludeFiles?: string[];
rules?: string[]; rules?: string[];
previousVersions?: Array<{ tag: string; title: string }>; previousVersions?: Array<{ tag: string; title: string; commitHash?: string }>;
} }

View File

@@ -35,14 +35,28 @@ function createTestDb(): Database.Database {
client.pragma('foreign_keys = ON'); client.pragma('foreign_keys = ON');
const migrationsFolder = join(import.meta.dirname, '../../../lib/server/db/migrations'); const migrationsFolder = join(import.meta.dirname, '../../../lib/server/db/migrations');
const migrationSql = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
const statements = migrationSql // Apply all migration files in order
const migration0 = readFileSync(join(migrationsFolder, '0000_large_master_chief.sql'), 'utf-8');
const migration1 = readFileSync(join(migrationsFolder, '0001_quick_nighthawk.sql'), 'utf-8');
// Apply first migration
const statements0 = migration0
.split('--> statement-breakpoint') .split('--> statement-breakpoint')
.map((statement) => statement.trim()) .map((statement) => statement.trim())
.filter(Boolean); .filter(Boolean);
for (const statement of statements) { for (const statement of statements0) {
client.exec(statement);
}
// Apply second migration
const statements1 = migration1
.split('--> statement-breakpoint')
.map((statement) => statement.trim())
.filter(Boolean);
for (const statement of statements1) {
client.exec(statement); client.exec(statement);
} }