400 lines
11 KiB
TypeScript
400 lines
11 KiB
TypeScript
/**
|
|
* RepositoryService — CRUD operations for repositories.
|
|
* Operates directly on the raw better-sqlite3 client for synchronous queries.
|
|
*/
|
|
|
|
import type Database from 'better-sqlite3';
|
|
import { RepositoryMapper } from '$lib/server/mappers/repository.mapper.js';
|
|
import { IndexingJobMapper } from '$lib/server/mappers/indexing-job.mapper.js';
|
|
import { Repository, RepositoryEntity } from '$lib/server/models/repository.js';
|
|
import { IndexingJob, IndexingJobEntity } from '$lib/server/models/indexing-job.js';
|
|
import { SqliteVecStore } from '$lib/server/search/sqlite-vec.store.js';
|
|
import { resolveGitHubId, resolveLocalId } from '$lib/server/utils/id-resolver';
|
|
import {
|
|
AlreadyExistsError,
|
|
InvalidInputError,
|
|
InvalidUrlError,
|
|
NotFoundError
|
|
} from '$lib/server/utils/validation';
|
|
|
|
export interface AddRepositoryInput {
|
|
source: 'github' | 'local';
|
|
sourceUrl: string;
|
|
title?: string;
|
|
description?: string;
|
|
branch?: string;
|
|
githubToken?: string;
|
|
}
|
|
|
|
export interface UpdateRepositoryInput {
|
|
title?: string;
|
|
description?: string;
|
|
branch?: string;
|
|
githubToken?: string;
|
|
}
|
|
|
|
export interface RepositoryStats {
|
|
totalSnippets: number;
|
|
totalTokens: number;
|
|
totalDocuments: number;
|
|
lastIndexedAt: Date | null;
|
|
}
|
|
|
|
export interface RepositoryIndexSummary {
|
|
embeddingCount: number;
|
|
indexedVersions: string[];
|
|
}
|
|
|
|
export class RepositoryService {
|
|
constructor(private readonly db: Database.Database) {}
|
|
|
|
/**
|
|
* List all repositories with optional filtering.
|
|
*/
|
|
list(options?: { state?: Repository['state']; limit?: number; offset?: number }): Repository[] {
|
|
const limit = options?.limit ?? 50;
|
|
const offset = options?.offset ?? 0;
|
|
|
|
if (options?.state) {
|
|
const rows = this.db
|
|
.prepare(
|
|
`SELECT * FROM repositories WHERE state = ? ORDER BY created_at DESC LIMIT ? OFFSET ?`
|
|
)
|
|
.all(options.state, limit, offset) as RepositoryEntity[];
|
|
return rows.map((row) => RepositoryMapper.fromEntity(new RepositoryEntity(row)));
|
|
}
|
|
|
|
const rows = this.db
|
|
.prepare(`SELECT * FROM repositories ORDER BY created_at DESC LIMIT ? OFFSET ?`)
|
|
.all(limit, offset) as RepositoryEntity[];
|
|
return rows.map((row) => RepositoryMapper.fromEntity(new RepositoryEntity(row)));
|
|
}
|
|
|
|
/**
|
|
* Count total repositories (optionally filtered by state).
|
|
*/
|
|
count(state?: Repository['state']): number {
|
|
if (state) {
|
|
const row = this.db
|
|
.prepare(`SELECT COUNT(*) as n FROM repositories WHERE state = ?`)
|
|
.get(state) as { n: number };
|
|
return row.n;
|
|
}
|
|
const row = this.db.prepare(`SELECT COUNT(*) as n FROM repositories`).get() as { n: number };
|
|
return row.n;
|
|
}
|
|
|
|
/**
|
|
* Get a single repository by ID.
|
|
*/
|
|
get(id: string): Repository | null {
|
|
const row = this.db.prepare(`SELECT * FROM repositories WHERE id = ?`).get(id) as
|
|
| RepositoryEntity
|
|
| undefined;
|
|
return row ? RepositoryMapper.fromEntity(new RepositoryEntity(row)) : null;
|
|
}
|
|
|
|
/**
|
|
* Add a new repository. Generates the canonical ID and queues an indexing job.
|
|
*/
|
|
add(input: AddRepositoryInput): Repository {
|
|
// Validate required fields
|
|
if (!input.sourceUrl?.trim()) {
|
|
throw new InvalidInputError('sourceUrl is required', [
|
|
{ field: 'sourceUrl', message: 'sourceUrl is required' }
|
|
]);
|
|
}
|
|
|
|
// Derive canonical ID
|
|
let id: string;
|
|
let title: string;
|
|
|
|
if (input.source === 'github') {
|
|
try {
|
|
id = resolveGitHubId(input.sourceUrl);
|
|
} catch {
|
|
throw new InvalidUrlError(
|
|
`Invalid GitHub URL: ${input.sourceUrl}. Expected format: https://github.com/owner/repo`
|
|
);
|
|
}
|
|
// Default title from owner/repo
|
|
const parts = id.split('/').filter(Boolean);
|
|
title = input.title ?? parts[1] ?? id;
|
|
} else {
|
|
// local
|
|
const existing = this.list({ limit: 9999 }).map((r) => r.id);
|
|
id = resolveLocalId(input.sourceUrl, existing);
|
|
const parts = input.sourceUrl.split('/');
|
|
title = input.title ?? parts.at(-1) ?? 'local-repo';
|
|
}
|
|
|
|
// Check for collision
|
|
const existing = this.get(id);
|
|
if (existing) {
|
|
throw new AlreadyExistsError(`Repository ${id} already exists`);
|
|
}
|
|
|
|
const now = Math.floor(Date.now() / 1000);
|
|
const repo = new Repository({
|
|
id,
|
|
title,
|
|
description: input.description ?? null,
|
|
source: input.source,
|
|
sourceUrl: input.sourceUrl,
|
|
branch: input.branch ?? 'main',
|
|
state: 'pending',
|
|
totalSnippets: 0,
|
|
totalTokens: 0,
|
|
trustScore: 0,
|
|
benchmarkScore: 0,
|
|
stars: null,
|
|
githubToken: input.githubToken ?? null,
|
|
lastIndexedAt: null,
|
|
createdAt: new Date(now * 1000),
|
|
updatedAt: new Date(now * 1000)
|
|
});
|
|
const entity = RepositoryMapper.toEntity(repo);
|
|
|
|
this.db
|
|
.prepare(
|
|
`INSERT INTO repositories
|
|
(id, title, description, source, source_url, branch, state,
|
|
total_snippets, total_tokens, trust_score, benchmark_score,
|
|
stars, github_token, last_indexed_at, created_at, updated_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
)
|
|
.run(
|
|
entity.id,
|
|
entity.title,
|
|
entity.description,
|
|
entity.source,
|
|
entity.source_url,
|
|
entity.branch,
|
|
entity.state,
|
|
entity.total_snippets,
|
|
entity.total_tokens,
|
|
entity.trust_score,
|
|
entity.benchmark_score,
|
|
entity.stars,
|
|
entity.github_token,
|
|
entity.last_indexed_at,
|
|
entity.created_at,
|
|
entity.updated_at
|
|
);
|
|
|
|
return this.get(id)!;
|
|
}
|
|
|
|
/**
|
|
* Update repository metadata.
|
|
*/
|
|
update(id: string, input: UpdateRepositoryInput): Repository {
|
|
const existing = this.get(id);
|
|
if (!existing) throw new NotFoundError(`Repository ${id} not found`);
|
|
|
|
const now = Math.floor(Date.now() / 1000);
|
|
const updates: string[] = [];
|
|
const values: unknown[] = [];
|
|
|
|
if (input.title !== undefined) {
|
|
updates.push('title = ?');
|
|
values.push(input.title);
|
|
}
|
|
if (input.description !== undefined) {
|
|
updates.push('description = ?');
|
|
values.push(input.description);
|
|
}
|
|
if (input.branch !== undefined) {
|
|
updates.push('branch = ?');
|
|
values.push(input.branch);
|
|
}
|
|
if (input.githubToken !== undefined) {
|
|
updates.push('github_token = ?');
|
|
values.push(input.githubToken);
|
|
}
|
|
|
|
if (updates.length === 0) return existing;
|
|
|
|
updates.push('updated_at = ?');
|
|
values.push(now);
|
|
values.push(id);
|
|
|
|
this.db.prepare(`UPDATE repositories SET ${updates.join(', ')} WHERE id = ?`).run(...values);
|
|
|
|
return this.get(id)!;
|
|
}
|
|
|
|
/**
|
|
* Delete a repository and all associated data (cascades via FK).
|
|
*/
|
|
remove(id: string): void {
|
|
const existing = this.get(id);
|
|
if (!existing) throw new NotFoundError(`Repository ${id} not found`);
|
|
|
|
const sqliteVecStore = new SqliteVecStore(this.db);
|
|
this.db.transaction(() => {
|
|
sqliteVecStore.deleteEmbeddingsForRepository(id);
|
|
this.db.prepare(`DELETE FROM repositories WHERE id = ?`).run(id);
|
|
})();
|
|
}
|
|
|
|
/**
|
|
* Get aggregate statistics for a repository.
|
|
*/
|
|
getStats(id: string): RepositoryStats {
|
|
const existing = this.get(id);
|
|
if (!existing) throw new NotFoundError(`Repository ${id} not found`);
|
|
|
|
const snippetStats = this.db
|
|
.prepare(
|
|
`SELECT COUNT(*) as total_snippets, COALESCE(SUM(token_count), 0) as total_tokens
|
|
FROM snippets WHERE repository_id = ?`
|
|
)
|
|
.get(id) as { total_snippets: number; total_tokens: number };
|
|
|
|
const docStats = this.db
|
|
.prepare(`SELECT COUNT(*) as total_documents FROM documents WHERE repository_id = ?`)
|
|
.get(id) as { total_documents: number };
|
|
|
|
return {
|
|
totalSnippets: snippetStats.total_snippets,
|
|
totalTokens: snippetStats.total_tokens,
|
|
totalDocuments: docStats.total_documents,
|
|
lastIndexedAt: existing.lastIndexedAt
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Get all versions for a repository.
|
|
*/
|
|
getVersions(repositoryId: string): string[] {
|
|
const rows = this.db
|
|
.prepare(
|
|
`SELECT tag FROM repository_versions WHERE repository_id = ? ORDER BY created_at DESC`
|
|
)
|
|
.all(repositoryId) as { tag: string }[];
|
|
return rows.map((r) => r.tag);
|
|
}
|
|
|
|
getIndexSummary(repositoryId: string): RepositoryIndexSummary {
|
|
const repository = this.get(repositoryId);
|
|
if (!repository) throw new NotFoundError(`Repository ${repositoryId} not found`);
|
|
|
|
const embeddingRow = this.db
|
|
.prepare(
|
|
`SELECT COUNT(*) AS count
|
|
FROM snippet_embeddings se
|
|
INNER JOIN snippets s ON s.id = se.snippet_id
|
|
WHERE s.repository_id = ?`
|
|
)
|
|
.get(repositoryId) as { count: number };
|
|
|
|
const versionRows = this.db
|
|
.prepare(
|
|
`SELECT tag FROM repository_versions
|
|
WHERE repository_id = ? AND state = 'indexed'
|
|
ORDER BY created_at DESC`
|
|
)
|
|
.all(repositoryId) as { tag: string }[];
|
|
|
|
const hasDefaultBranchIndex = Boolean(
|
|
this.db
|
|
.prepare(
|
|
`SELECT 1 AS found
|
|
FROM documents
|
|
WHERE repository_id = ? AND version_id IS NULL
|
|
LIMIT 1`
|
|
)
|
|
.get(repositoryId)
|
|
);
|
|
|
|
const indexedVersions = [
|
|
...(hasDefaultBranchIndex ? [repository.branch ?? 'default branch'] : []),
|
|
...versionRows.map((row) => row.tag)
|
|
];
|
|
|
|
return {
|
|
embeddingCount: embeddingRow.count,
|
|
indexedVersions: Array.from(new Set(indexedVersions))
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Create an indexing job for a repository.
|
|
* If a job is already running, returns the existing job.
|
|
*/
|
|
createIndexingJob(repositoryId: string, versionId?: string): IndexingJob {
|
|
// Check for an existing queued/running job for this exact (repo, version) pair.
|
|
const resolvedVersionId = versionId ?? null;
|
|
const runningJob = this.db
|
|
.prepare(
|
|
`SELECT * FROM indexing_jobs
|
|
WHERE repository_id = ?
|
|
AND (version_id = ? OR (version_id IS NULL AND ? IS NULL))
|
|
AND status IN ('queued', 'running')
|
|
ORDER BY created_at DESC LIMIT 1`
|
|
)
|
|
.get(repositoryId, resolvedVersionId, resolvedVersionId) as IndexingJobEntity | undefined;
|
|
|
|
if (runningJob) return IndexingJobMapper.fromEntity(new IndexingJobEntity(runningJob));
|
|
|
|
const now = Math.floor(Date.now() / 1000);
|
|
const job = new IndexingJob({
|
|
id: crypto.randomUUID(),
|
|
repositoryId,
|
|
versionId: versionId ?? null,
|
|
status: 'queued',
|
|
progress: 0,
|
|
totalFiles: 0,
|
|
processedFiles: 0,
|
|
stage: 'queued',
|
|
stageDetail: null,
|
|
error: null,
|
|
startedAt: null,
|
|
completedAt: null,
|
|
createdAt: new Date(now * 1000)
|
|
});
|
|
const entity = new IndexingJobEntity({
|
|
id: job.id,
|
|
repository_id: job.repositoryId,
|
|
version_id: job.versionId,
|
|
status: job.status,
|
|
progress: job.progress,
|
|
total_files: job.totalFiles,
|
|
processed_files: job.processedFiles,
|
|
stage: 'queued',
|
|
stage_detail: null,
|
|
error: job.error,
|
|
started_at: null,
|
|
completed_at: null,
|
|
created_at: now
|
|
});
|
|
|
|
this.db
|
|
.prepare(
|
|
`INSERT INTO indexing_jobs
|
|
(id, repository_id, version_id, status, progress, total_files,
|
|
processed_files, error, started_at, completed_at, created_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
)
|
|
.run(
|
|
entity.id,
|
|
entity.repository_id,
|
|
entity.version_id,
|
|
entity.status,
|
|
entity.progress,
|
|
entity.total_files,
|
|
entity.processed_files,
|
|
entity.error,
|
|
entity.started_at,
|
|
entity.completed_at,
|
|
entity.created_at
|
|
);
|
|
|
|
const created = this.db
|
|
.prepare(`SELECT * FROM indexing_jobs WHERE id = ?`)
|
|
.get(job.id) as IndexingJobEntity;
|
|
return IndexingJobMapper.fromEntity(new IndexingJobEntity(created));
|
|
}
|
|
}
|