Initial commit: trueref v0.1.0-SNAPSHOT
Some checks failed
Build and publish Docker image / Build and push (push) Failing after 1m27s

Java 21 / Spring Boot 3.5.3 multi-module Maven project.
Hybrid BM25+HNSW search with RRF, cross-encoder reranker,
ONNX Runtime 1.22.0 (CPU + CUDA 12 GPU variants).
This commit is contained in:
moze
2026-05-06 00:49:16 +02:00
commit c5f950c2c0
132 changed files with 11287 additions and 0 deletions

21
trueref-domain/pom.xml Normal file
View File

@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.trueref</groupId>
<artifactId>trueref-parent</artifactId>
<version>0.1.0-SNAPSHOT</version>
</parent>
<artifactId>trueref-domain</artifactId>
<name>trueref-domain</name>
<description>Pure domain model + ports. No Spring, no I/O, no third-party libs beyond JSpecify.</description>
<!-- Hexagonal contract: domain has ZERO runtime dependencies beyond JSpecify (annotations only). -->
<dependencies>
<!-- inherits jspecify + test deps from parent -->
</dependencies>
</project>

View File

@@ -0,0 +1,9 @@
package com.trueref.domain.error;
import org.jspecify.annotations.Nullable;
public final class IngestionFailed extends TrueRefException {
public IngestionFailed(String message, @Nullable Throwable cause) {
super("ingestion_failed", message, cause);
}
}

View File

@@ -0,0 +1,7 @@
package com.trueref.domain.error;
public final class InvalidSearchRequest extends TrueRefException {
public InvalidSearchRequest(String message) {
super("invalid_search_request", message, null);
}
}

View File

@@ -0,0 +1,7 @@
package com.trueref.domain.error;
public final class RepositoryAlreadyRegistered extends TrueRefException {
public RepositoryAlreadyRegistered(String name) {
super("repository_already_registered", "Repository already registered: " + name, null);
}
}

View File

@@ -0,0 +1,7 @@
package com.trueref.domain.error;
public final class RepositoryNotFound extends TrueRefException {
public RepositoryNotFound(String idOrName) {
super("repository_not_found", "Repository not found: " + idOrName, null);
}
}

View File

@@ -0,0 +1,7 @@
package com.trueref.domain.error;
public final class TagNotFound extends TrueRefException {
public TagNotFound(String repo, String tag) {
super("tag_not_found", "Tag not found in repository: " + repo + "@" + tag, null);
}
}

View File

@@ -0,0 +1,25 @@
package com.trueref.domain.error;
import org.jspecify.annotations.Nullable;
/** Root of all domain errors. Carries a stable string {@link #code()} for client localization. */
public abstract sealed class TrueRefException extends RuntimeException
permits RepositoryAlreadyRegistered,
RepositoryNotFound,
VersionNotFound,
VersionNotIndexed,
TagNotFound,
IngestionFailed,
InvalidSearchRequest {
private final String code;
protected TrueRefException(String code, String message, @Nullable Throwable cause) {
super(message, cause);
this.code = code;
}
public String code() {
return code;
}
}

View File

@@ -0,0 +1,7 @@
package com.trueref.domain.error;
public final class VersionNotFound extends TrueRefException {
public VersionNotFound(String repo, String version) {
super("version_not_found", "Version not found: " + repo + "@" + version, null);
}
}

View File

@@ -0,0 +1,8 @@
package com.trueref.domain.error;
/** Thrown when a search request targets a known version that has not been indexed yet. */
public final class VersionNotIndexed extends TrueRefException {
public VersionNotIndexed(String repo, String version) {
super("version_not_indexed", "Version not yet indexed: " + repo + "@" + version, null);
}
}

View File

@@ -0,0 +1,5 @@
/**
* Sealed exception hierarchy for the domain. Adapters translate these to HTTP / JSON-RPC responses.
*/
@org.jspecify.annotations.NullMarked
package com.trueref.domain.error;

View File

@@ -0,0 +1,18 @@
package com.trueref.domain.model;
import org.jspecify.annotations.Nullable;
/**
* A globally-deduplicated piece of content (function, class, markdown section, sliding-window
* fallback). Identified by {@link #contentHash()}: two chunks with the same hash are the same
* chunk, regardless of which repo/tag/file they originated from.
*
* @param symbol AST symbol name when applicable (e.g. function or class), null for prose chunks
*/
public record Chunk(
ChunkId id,
String contentHash,
String content,
String language,
@Nullable String symbol,
int tokenCount) {}

View File

@@ -0,0 +1,16 @@
package com.trueref.domain.model;
public record ChunkId(java.util.UUID value) {
public static ChunkId random() {
return new ChunkId(java.util.UUID.randomUUID());
}
public static ChunkId of(String s) {
return new ChunkId(java.util.UUID.fromString(s));
}
@Override
public String toString() {
return value.toString();
}
}

View File

@@ -0,0 +1,8 @@
package com.trueref.domain.model;
/**
* Many-to-many edge between a {@link Chunk} and a {@link Version}. Carries the location of the
* chunk inside the version's source tree.
*/
public record ChunkVersion(
ChunkId chunkId, VersionId versionId, String filePath, int startLine, int endLine) {}

View File

@@ -0,0 +1,19 @@
package com.trueref.domain.model;
/** Vector representation of a {@link Chunk}. Dense float vector; sparse channel deferred. */
public record Embedding(ChunkId chunkId, float[] vector) {
public Embedding {
// Defensive copy to make the record effectively immutable.
vector = vector.clone();
}
@Override
public float[] vector() {
return vector.clone();
}
public int dimension() {
return vector.length;
}
}

View File

@@ -0,0 +1,25 @@
package com.trueref.domain.model;
import java.time.Instant;
import java.util.List;
import org.jspecify.annotations.Nullable;
/**
* A unit of orchestrated work. One job has many {@link JobStage stages} executed in sequence.
*
* @param versionId null for repo-level jobs (e.g. {@link JobType#DISCOVER_TAGS})
*/
public record IngestionJob(
JobId id,
RepositoryId repoId,
@Nullable VersionId versionId,
JobType type,
JobStatus status,
@Nullable Instant startedAt,
@Nullable Instant finishedAt,
List<JobStage> stages) {
public IngestionJob {
stages = List.copyOf(stages);
}
}

View File

@@ -0,0 +1,16 @@
package com.trueref.domain.model;
public record JobId(java.util.UUID value) {
public static JobId random() {
return new JobId(java.util.UUID.randomUUID());
}
public static JobId of(String s) {
return new JobId(java.util.UUID.fromString(s));
}
@Override
public String toString() {
return value.toString();
}
}

View File

@@ -0,0 +1,20 @@
package com.trueref.domain.model;
import java.time.Instant;
import org.jspecify.annotations.Nullable;
/** A single emitted observability event for an ingestion job. Streamed via SSE to the UI. */
public record JobLogEvent(
JobId jobId,
Instant ts,
Level level,
JobStage.@Nullable StageName stage,
String message) {
public enum Level {
DEBUG,
INFO,
WARN,
ERROR
}
}

View File

@@ -0,0 +1,37 @@
package com.trueref.domain.model;
import java.time.Instant;
import org.jspecify.annotations.Nullable;
public record JobStage(
JobId jobId,
StageName name,
StageStatus status,
@Nullable Instant startedAt,
@Nullable Instant finishedAt,
long itemsProcessed,
long itemsTotal,
long bytesProcessed,
@Nullable String errorMessage) {
public enum StageName {
CLONE,
FETCH,
CHECKOUT,
DISCOVER_FILES,
DIFF_FILES,
PARSE,
CHUNK,
EMBED,
INDEX,
COMMIT
}
public enum StageStatus {
PENDING,
RUNNING,
SUCCEEDED,
FAILED,
SKIPPED
}
}

View File

@@ -0,0 +1,9 @@
package com.trueref.domain.model;
public enum JobStatus {
QUEUED,
RUNNING,
SUCCEEDED,
FAILED,
CANCELLED
}

View File

@@ -0,0 +1,8 @@
package com.trueref.domain.model;
public enum JobType {
DISCOVER_TAGS,
INDEX_VERSION,
REFRESH,
COMPACT
}

View File

@@ -0,0 +1,37 @@
package com.trueref.domain.model;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
import org.jspecify.annotations.Nullable;
/**
* A registered git repository (local or remote-cloned). The {@code localPath} is always present;
* for remote repositories it points to our managed clone directory and {@code managedClone} is true.
*
* @param remoteUrl git URL when {@code managedClone} is true; null otherwise
* @param ignoreGlobs per-repo globs ANDed with .gitignore + built-in defaults
* @param maxFileSizeBytes files larger than this are skipped during ingestion
* @param pollInterval scheduled fetch interval; {@link Duration#ZERO} disables polling
* @param tagCap max most-recent tags to auto-index; UI/MCP can index more on demand
* @param versionMappingRules ordered patterns mapping a client version (e.g. {@code "1.2.3"}) to a tag
*/
public record Repository(
RepositoryId id,
String name,
@Nullable String remoteUrl,
String localPath,
boolean managedClone,
List<String> ignoreGlobs,
long maxFileSizeBytes,
Duration pollInterval,
int tagCap,
List<TagPattern> versionMappingRules,
Instant createdAt,
Instant updatedAt) {
public Repository {
ignoreGlobs = List.copyOf(ignoreGlobs);
versionMappingRules = List.copyOf(versionMappingRules);
}
}

View File

@@ -0,0 +1,17 @@
package com.trueref.domain.model;
/** Type-safe identifier for a registered repository. */
public record RepositoryId(java.util.UUID value) {
public static RepositoryId random() {
return new RepositoryId(java.util.UUID.randomUUID());
}
public static RepositoryId of(String s) {
return new RepositoryId(java.util.UUID.fromString(s));
}
@Override
public String toString() {
return value.toString();
}
}

View File

@@ -0,0 +1,18 @@
package com.trueref.domain.model;
import org.jspecify.annotations.Nullable;
/** A single ranked snippet returned from a search. */
public record SearchHit(
ChunkId chunkId,
RepositoryId repoId,
VersionId versionId,
String repoName,
String tag,
String filePath,
int startLine,
int endLine,
String language,
@Nullable String symbol,
String content,
double score) {}

View File

@@ -0,0 +1,16 @@
package com.trueref.domain.model;
import java.util.List;
/**
* Defines the (repo, version) scope of a search request. Multiple scopes can be ORed together so a
* single query may span "spring-boot v3.5.4" and "spring-boot v3.4.0", for example.
*/
public record SearchScope(List<RepoVersionRef> refs) {
public SearchScope {
refs = List.copyOf(refs);
}
public record RepoVersionRef(RepositoryId repoId, VersionId versionId) {}
}

View File

@@ -0,0 +1,24 @@
package com.trueref.domain.model;
/**
* Strategy for mapping a client-supplied version string to a git tag in a repository. Patterns are
* tried in order; the first match wins. Built-in patterns: EXACT, V_PREFIX, RELEASE_PREFIX,
* SEMVER_FUZZY. CUSTOM allows a user-supplied template like {@code "release-{semver}"}.
*/
public sealed interface TagPattern {
/** {@code "1.2.3"} → tag {@code "1.2.3"}. */
record Exact() implements TagPattern {}
/** {@code "1.2.3"} → tag {@code "v1.2.3"}. */
record VPrefix() implements TagPattern {}
/** {@code "1.2.3"} → tag {@code "release-1.2.3"}. */
record ReleasePrefix() implements TagPattern {}
/** Any tag whose semver is closest to the requested version. */
record SemverFuzzy() implements TagPattern {}
/** Custom template containing {@code {version}} or {@code {semver}} placeholders. */
record Custom(String template) implements TagPattern {}
}

View File

@@ -0,0 +1,15 @@
package com.trueref.domain.model;
import java.time.Instant;
import org.jspecify.annotations.Nullable;
/** A specific git tag (or branch) of a {@link Repository} that may be indexed independently. */
public record Version(
VersionId id,
RepositoryId repoId,
String tag,
String commitSha,
VersionStatus status,
@Nullable Instant indexedAt,
int chunkCount,
@Nullable String errorMessage) {}

View File

@@ -0,0 +1,16 @@
package com.trueref.domain.model;
public record VersionId(java.util.UUID value) {
public static VersionId random() {
return new VersionId(java.util.UUID.randomUUID());
}
public static VersionId of(String s) {
return new VersionId(java.util.UUID.fromString(s));
}
@Override
public String toString() {
return value.toString();
}
}

View File

@@ -0,0 +1,14 @@
package com.trueref.domain.model;
public enum VersionStatus {
/** Tag known but not yet indexed. */
DISCOVERED,
/** Indexing job currently running. */
INDEXING,
/** Successfully indexed and queryable. */
INDEXED,
/** Last indexing attempt failed; see {@link Version#errorMessage()}. */
FAILED,
/** Tag no longer exists upstream; chunks reclaimable by compaction. */
INACTIVE
}

View File

@@ -0,0 +1,7 @@
/**
* Pure domain model for trueref. Contains records and enums describing repositories, versions,
* chunks, ingestion jobs, and search results. <strong>Must remain free of any I/O, Spring,
* Jackson, or other framework concerns.</strong> JSpecify nullability annotations are allowed.
*/
@org.jspecify.annotations.NullMarked
package com.trueref.domain.model;

View File

@@ -0,0 +1,12 @@
package com.trueref.domain.port.in;
import com.trueref.domain.model.RepositoryId;
import com.trueref.domain.model.Version;
import java.util.List;
/** Use case: discover/refresh git tags of a repository. */
public interface DiscoverVersions {
/** Performs git fetch (if managed) + tag enumeration. Returns the now-known versions. */
List<Version> discover(RepositoryId repoId);
}

View File

@@ -0,0 +1,12 @@
package com.trueref.domain.port.in;
import com.trueref.domain.model.JobId;
import com.trueref.domain.model.RepositoryId;
import com.trueref.domain.model.VersionId;
/** Use case: schedule indexing of a specific (repo, tag/version). */
public interface IndexVersion {
/** Enqueues an INDEX_VERSION job. Returns immediately with the job id. */
JobId enqueue(RepositoryId repoId, VersionId versionId, boolean force);
}

View File

@@ -0,0 +1,27 @@
package com.trueref.domain.port.in;
import com.trueref.domain.model.IngestionJob;
import com.trueref.domain.model.JobId;
import com.trueref.domain.model.JobLogEvent;
import com.trueref.domain.model.JobStatus;
import com.trueref.domain.model.RepositoryId;
import com.trueref.domain.model.VersionId;
import java.util.List;
import java.util.Optional;
import java.util.function.Consumer;
import org.jspecify.annotations.Nullable;
/** Use case: read jobs and subscribe to job/log streams (for SSE in the UI). */
public interface ObserveJobs {
Optional<IngestionJob> findJob(JobId id);
List<IngestionJob> listJobs(
@Nullable RepositoryId repoId, @Nullable VersionId versionId, @Nullable JobStatus status, int limit);
/** Subscribes to live status updates of all jobs. Returns an unsubscribe handle. */
AutoCloseable subscribeJobs(Consumer<IngestionJob> listener);
/** Subscribes to log events of a single job. Returns an unsubscribe handle. */
AutoCloseable subscribeLogs(JobId jobId, Consumer<JobLogEvent> listener);
}

View File

@@ -0,0 +1,17 @@
package com.trueref.domain.port.in;
import com.trueref.domain.model.Repository;
import com.trueref.domain.model.RepositoryId;
import com.trueref.domain.model.Version;
import java.util.List;
import java.util.Optional;
/** Use case: read-only access to repositories and their versions. */
public interface QueryCatalog {
List<Repository> listRepositories();
Optional<Repository> findRepository(RepositoryId id);
List<Version> listVersions(RepositoryId repoId);
}

View File

@@ -0,0 +1,32 @@
package com.trueref.domain.port.in;
import com.trueref.domain.model.Repository;
import com.trueref.domain.model.RepositoryId;
import com.trueref.domain.model.TagPattern;
import java.time.Duration;
import java.util.List;
import org.jspecify.annotations.Nullable;
/** Use case: register a new repository (local path or remote URL). */
public interface RegisterRepository {
Repository register(Command cmd);
record Command(
String name,
@Nullable String remoteUrl,
@Nullable String localPath,
List<String> ignoreGlobs,
@Nullable Long maxFileSizeBytes,
@Nullable Duration pollInterval,
@Nullable Integer tagCap,
List<TagPattern> versionMappingRules) {
public Command {
ignoreGlobs = List.copyOf(ignoreGlobs);
versionMappingRules = List.copyOf(versionMappingRules);
}
}
void unregister(RepositoryId id);
}

View File

@@ -0,0 +1,40 @@
package com.trueref.domain.port.in;
import com.trueref.domain.model.RepositoryId;
import com.trueref.domain.model.VersionId;
import com.trueref.domain.model.VersionStatus;
import java.util.List;
import org.jspecify.annotations.Nullable;
/**
* Use case: turn a fuzzy library name (and optional version) into one or more concrete (repo,
* version) handles, ranked by relevance. Mirrors Context7's {@code resolve-library-id}.
*/
public interface ResolveLibraryId {
Result resolve(Query query);
record Query(String libraryName, @Nullable String query, @Nullable String version) {}
record Result(List<Match> matches) {
public Result {
matches = List.copyOf(matches);
}
}
record Match(
RepositoryId repoId,
String libraryId, // "/owner/repo[/version]"
String name,
@Nullable String description,
int snippetCount,
List<VersionRef> availableVersions,
double score) {
public Match {
availableVersions = List.copyOf(availableVersions);
}
}
record VersionRef(VersionId versionId, String tag, VersionStatus status) {}
}

View File

@@ -0,0 +1,30 @@
package com.trueref.domain.port.in;
import com.trueref.domain.model.SearchHit;
import com.trueref.domain.model.SearchScope;
import java.util.List;
import org.jspecify.annotations.Nullable;
/** Use case: hybrid (BM25 + dense) search with rerank, scoped to specific (repo, version) pairs. */
public interface SearchLibraryDocs {
Result search(Query query);
record Query(
String text,
@Nullable String topic,
SearchScope scope,
int tokensBudget,
int maxHits) {}
/**
* @param hits ranked snippets, packed to fit within {@link Query#tokensBudget()}
* @param totalTokensReturned cumulative token count of returned snippets
*/
record Result(List<SearchHit> hits, int totalTokensReturned) {
public Result {
hits = List.copyOf(hits);
}
}
}

View File

@@ -0,0 +1,6 @@
/**
* Driving ports — interfaces implemented by the application layer and called by adapters
* (REST controllers, MCP tool handlers, scheduled tasks, etc.).
*/
@org.jspecify.annotations.NullMarked
package com.trueref.domain.port.in;