Files
trueref-legacy/src/lib/server/services/repository.service.ts
2026-04-01 14:09:19 +02:00

400 lines
11 KiB
TypeScript

/**
* RepositoryService — CRUD operations for repositories.
* Operates directly on the raw better-sqlite3 client for synchronous queries.
*/
import type Database from 'better-sqlite3';
import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
import { IndexingJobMapper } from '$lib/server/mappers/indexing-job.mapper.js';
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
import { IndexingJob, IndexingJobEntity } from '$lib/server/models/indexing-job.js';
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
import { resolveGitHubId, resolveLocalId } from '$lib/server/utils/id-resolver';
import {
AlreadyExistsError,
InvalidInputError,
InvalidUrlError,
NotFoundError
} from '$lib/server/utils/validation';
export interface AddRepositoryInput {
source: 'github' | 'local';
sourceUrl: string;
title?: string;
description?: string;
branch?: string;
githubToken?: string;
}
export interface UpdateRepositoryInput {
title?: string;
description?: string;
branch?: string;
githubToken?: string;
}
export interface RepositoryStats {
totalSnippets: number;
totalTokens: number;
totalDocuments: number;
lastIndexedAt: Date | null;
}
export interface RepositoryIndexSummary {
embeddingCount: number;
indexedVersions: string[];
}
export class RepositoryService {
constructor(private readonly db: Database.Database) {}
/**
* List all repositories with optional filtering.
*/
list(options?: { state?: Repository['state']; limit?: number; offset?: number }): Repository[] {
const limit = options?.limit ?? 50;
const offset = options?.offset ?? 0;
if (options?.state) {
const rows = this.db
.prepare(
`SELECT * FROM repositories WHERE state = ? ORDER BY created_at DESC LIMIT ? OFFSET ?`
)
.all(options.state, limit, offset) as RepositoryEntity[];
return rows.map((row) => RepositoryMapper.fromEntity(new RepositoryEntity(row)));
}
const rows = this.db
.prepare(`SELECT * FROM repositories ORDER BY created_at DESC LIMIT ? OFFSET ?`)
.all(limit, offset) as RepositoryEntity[];
return rows.map((row) => RepositoryMapper.fromEntity(new RepositoryEntity(row)));
}
/**
* Count total repositories (optionally filtered by state).
*/
count(state?: Repository['state']): number {
if (state) {
const row = this.db
.prepare(`SELECT COUNT(*) as n FROM repositories WHERE state = ?`)
.get(state) as { n: number };
return row.n;
}
const row = this.db.prepare(`SELECT COUNT(*) as n FROM repositories`).get() as { n: number };
return row.n;
}
/**
* Get a single repository by ID.
*/
get(id: string): Repository | null {
const row = this.db.prepare(`SELECT * FROM repositories WHERE id = ?`).get(id) as
| RepositoryEntity
| undefined;
return row ? RepositoryMapper.fromEntity(new RepositoryEntity(row)) : null;
}
/**
* Add a new repository. Generates the canonical ID and queues an indexing job.
*/
add(input: AddRepositoryInput): Repository {
// Validate required fields
if (!input.sourceUrl?.trim()) {
throw new InvalidInputError('sourceUrl is required', [
{ field: 'sourceUrl', message: 'sourceUrl is required' }
]);
}
// Derive canonical ID
let id: string;
let title: string;
if (input.source === 'github') {
try {
id = resolveGitHubId(input.sourceUrl);
} catch {
throw new InvalidUrlError(
`Invalid GitHub URL: ${input.sourceUrl}. Expected format: https://github.com/owner/repo`
);
}
// Default title from owner/repo
const parts = id.split('/').filter(Boolean);
title = input.title ?? parts[1] ?? id;
} else {
// local
const existing = this.list({ limit: 9999 }).map((r) => r.id);
id = resolveLocalId(input.sourceUrl, existing);
const parts = input.sourceUrl.split('/');
title = input.title ?? parts.at(-1) ?? 'local-repo';
}
// Check for collision
const existing = this.get(id);
if (existing) {
throw new AlreadyExistsError(`Repository ${id} already exists`);
}
const now = Math.floor(Date.now() / 1000);
const repo = new Repository({
id,
title,
description: input.description ?? null,
source: input.source,
sourceUrl: input.sourceUrl,
branch: input.branch ?? 'main',
state: 'pending',
totalSnippets: 0,
totalTokens: 0,
trustScore: 0,
benchmarkScore: 0,
stars: null,
githubToken: input.githubToken ?? null,
lastIndexedAt: null,
createdAt: new Date(now * 1000),
updatedAt: new Date(now * 1000)
});
const entity = RepositoryMapper.toEntity(repo);
this.db
.prepare(
`INSERT INTO repositories
(id, title, description, source, source_url, branch, state,
total_snippets, total_tokens, trust_score, benchmark_score,
stars, github_token, last_indexed_at, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
)
.run(
entity.id,
entity.title,
entity.description,
entity.source,
entity.source_url,
entity.branch,
entity.state,
entity.total_snippets,
entity.total_tokens,
entity.trust_score,
entity.benchmark_score,
entity.stars,
entity.github_token,
entity.last_indexed_at,
entity.created_at,
entity.updated_at
);
return this.get(id)!;
}
/**
* Update repository metadata.
*/
update(id: string, input: UpdateRepositoryInput): Repository {
const existing = this.get(id);
if (!existing) throw new NotFoundError(`Repository ${id} not found`);
const now = Math.floor(Date.now() / 1000);
const updates: string[] = [];
const values: unknown[] = [];
if (input.title !== undefined) {
updates.push('title = ?');
values.push(input.title);
}
if (input.description !== undefined) {
updates.push('description = ?');
values.push(input.description);
}
if (input.branch !== undefined) {
updates.push('branch = ?');
values.push(input.branch);
}
if (input.githubToken !== undefined) {
updates.push('github_token = ?');
values.push(input.githubToken);
}
if (updates.length === 0) return existing;
updates.push('updated_at = ?');
values.push(now);
values.push(id);
this.db.prepare(`UPDATE repositories SET ${updates.join(', ')} WHERE id = ?`).run(...values);
return this.get(id)!;
}
/**
* Delete a repository and all associated data (cascades via FK).
*/
remove(id: string): void {
const existing = this.get(id);
if (!existing) throw new NotFoundError(`Repository ${id} not found`);
const sqliteVecStore = new SqliteVecStore(this.db);
this.db.transaction(() => {
sqliteVecStore.deleteEmbeddingsForRepository(id);
this.db.prepare(`DELETE FROM repositories WHERE id = ?`).run(id);
})();
}
/**
* Get aggregate statistics for a repository.
*/
getStats(id: string): RepositoryStats {
const existing = this.get(id);
if (!existing) throw new NotFoundError(`Repository ${id} not found`);
const snippetStats = this.db
.prepare(
`SELECT COUNT(*) as total_snippets, COALESCE(SUM(token_count), 0) as total_tokens
FROM snippets WHERE repository_id = ?`
)
.get(id) as { total_snippets: number; total_tokens: number };
const docStats = this.db
.prepare(`SELECT COUNT(*) as total_documents FROM documents WHERE repository_id = ?`)
.get(id) as { total_documents: number };
return {
totalSnippets: snippetStats.total_snippets,
totalTokens: snippetStats.total_tokens,
totalDocuments: docStats.total_documents,
lastIndexedAt: existing.lastIndexedAt
};
}
/**
* Get all versions for a repository.
*/
getVersions(repositoryId: string): string[] {
const rows = this.db
.prepare(
`SELECT tag FROM repository_versions WHERE repository_id = ? ORDER BY created_at DESC`
)
.all(repositoryId) as { tag: string }[];
return rows.map((r) => r.tag);
}
getIndexSummary(repositoryId: string): RepositoryIndexSummary {
const repository = this.get(repositoryId);
if (!repository) throw new NotFoundError(`Repository ${repositoryId} not found`);
const embeddingRow = this.db
.prepare(
`SELECT COUNT(*) AS count
FROM snippet_embeddings se
INNER JOIN snippets s ON s.id = se.snippet_id
WHERE s.repository_id = ?`
)
.get(repositoryId) as { count: number };
const versionRows = this.db
.prepare(
`SELECT tag FROM repository_versions
WHERE repository_id = ? AND state = 'indexed'
ORDER BY created_at DESC`
)
.all(repositoryId) as { tag: string }[];
const hasDefaultBranchIndex = Boolean(
this.db
.prepare(
`SELECT 1 AS found
FROM documents
WHERE repository_id = ? AND version_id IS NULL
LIMIT 1`
)
.get(repositoryId)
);
const indexedVersions = [
...(hasDefaultBranchIndex ? [repository.branch ?? 'default branch'] : []),
...versionRows.map((row) => row.tag)
];
return {
embeddingCount: embeddingRow.count,
indexedVersions: Array.from(new Set(indexedVersions))
};
}
/**
* Create an indexing job for a repository.
* If a job is already running, returns the existing job.
*/
createIndexingJob(repositoryId: string, versionId?: string): IndexingJob {
// Check for an existing queued/running job for this exact (repo, version) pair.
const resolvedVersionId = versionId ?? null;
const runningJob = this.db
.prepare(
`SELECT * FROM indexing_jobs
WHERE repository_id = ?
AND (version_id = ? OR (version_id IS NULL AND ? IS NULL))
AND status IN ('queued', 'running')
ORDER BY created_at DESC LIMIT 1`
)
.get(repositoryId, resolvedVersionId, resolvedVersionId) as IndexingJobEntity | undefined;
if (runningJob) return IndexingJobMapper.fromEntity(new IndexingJobEntity(runningJob));
const now = Math.floor(Date.now() / 1000);
const job = new IndexingJob({
id: crypto.randomUUID(),
repositoryId,
versionId: versionId ?? null,
status: 'queued',
progress: 0,
totalFiles: 0,
processedFiles: 0,
stage: 'queued',
stageDetail: null,
error: null,
startedAt: null,
completedAt: null,
createdAt: new Date(now * 1000)
});
const entity = new IndexingJobEntity({
id: job.id,
repository_id: job.repositoryId,
version_id: job.versionId,
status: job.status,
progress: job.progress,
total_files: job.totalFiles,
processed_files: job.processedFiles,
stage: 'queued',
stage_detail: null,
error: job.error,
started_at: null,
completed_at: null,
created_at: now
});
this.db
.prepare(
`INSERT INTO indexing_jobs
(id, repository_id, version_id, status, progress, total_files,
processed_files, error, started_at, completed_at, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
)
.run(
entity.id,
entity.repository_id,
entity.version_id,
entity.status,
entity.progress,
entity.total_files,
entity.processed_files,
entity.error,
entity.started_at,
entity.completed_at,
entity.created_at
);
const created = this.db
.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`)
.get(job.id) as IndexingJobEntity;
return IndexingJobMapper.fromEntity(new IndexingJobEntity(created));
}
}