chore(FEEDBACK-0001): linting

2026-03-27 02:23:01 +01:00
parent 16436bfab2
commit 5a3c27224d
102 changed files with 5108 additions and 4976 deletions
--- a/docs/features/TRUEREF-0005.md
+++ b/docs/features/TRUEREF-0005.md
@@ -30,19 +30,19 @@ Implement the document parsing and chunking pipeline that transforms raw file co

 ## Supported File Types

-| Extension | Parser Strategy |
-|-----------|----------------|
-| `.md`, `.mdx` | Heading-based section splitting + code block extraction |
-| `.txt`, `.rst` | Paragraph-based splitting |
-| `.ts`, `.tsx`, `.js`, `.jsx` | AST-free: function/class boundary detection via regex |
-| `.py` | `def`/`class` boundary detection |
-| `.go` | `func`/`type` boundary detection |
-| `.rs` | `fn`/`impl`/`struct` boundary detection |
-| `.java`, `.cs`, `.kt`, `.swift` | Class/method boundary detection |
-| `.rb` | `def`/`class` boundary detection |
-| `.json`, `.yaml`, `.yml`, `.toml` | Structural chunking (top-level keys) |
-| `.html`, `.svelte`, `.vue` | Text content extraction + script block splitting |
-| Other code | Line-count-based sliding window (200 lines per chunk) |
+| Extension                         | Parser Strategy                                         |
+| --------------------------------- | ------------------------------------------------------- |
+| `.md`, `.mdx`                     | Heading-based section splitting + code block extraction |
+| `.txt`, `.rst`                    | Paragraph-based splitting                               |
+| `.ts`, `.tsx`, `.js`, `.jsx`      | AST-free: function/class boundary detection via regex   |
+| `.py`                             | `def`/`class` boundary detection                        |
+| `.go`                             | `func`/`type` boundary detection                        |
+| `.rs`                             | `fn`/`impl`/`struct` boundary detection                 |
+| `.java`, `.cs`, `.kt`, `.swift`   | Class/method boundary detection                         |
+| `.rb`                             | `def`/`class` boundary detection                        |
+| `.json`, `.yaml`, `.yml`, `.toml` | Structural chunking (top-level keys)                    |
+| `.html`, `.svelte`, `.vue`        | Text content extraction + script block splitting        |
+| Other code                        | Line-count-based sliding window (200 lines per chunk)   |

 ---

@@ -52,9 +52,9 @@ Use a simple character-based approximation (no tokenizer library needed for v1):

 ```typescript
 function estimateTokens(text: string): number {
-  // Empirically: ~4 chars per token for English prose
-  // ~3 chars per token for code (more symbols)
-  return Math.ceil(text.length / 3.5);
+	// Empirically: ~4 chars per token for English prose
+	// ~3 chars per token for code (more symbols)
+	return Math.ceil(text.length / 3.5);
 }
 ```

@@ -74,49 +74,49 @@ The Markdown parser is the most important parser as most documentation is Markdo

 ```typescript
 interface MarkdownSection {
-  headings: string[];    // heading stack at this point
-  content: string;       // text content (sans code blocks)
-  codeBlocks: { language: string; code: string }[];
+	headings: string[]; // heading stack at this point
+	content: string; // text content (sans code blocks)
+	codeBlocks: { language: string; code: string }[];
 }

 function parseMarkdown(content: string, filePath: string): Snippet[] {
-  const sections = splitIntoSections(content);
-  const snippets: Snippet[] = [];
+	const sections = splitIntoSections(content);
+	const snippets: Snippet[] = [];

-  for (const section of sections) {
-    const breadcrumb = section.headings.join(' > ');
-    const title = section.headings.at(-1) ?? path.basename(filePath);
+	for (const section of sections) {
+		const breadcrumb = section.headings.join(' > ');
+		const title = section.headings.at(-1) ?? path.basename(filePath);

-    // Emit info snippet for text content
-    if (section.content.trim().length >= 20) {
-      const chunks = chunkText(section.content, MAX_TOKENS, OVERLAP_TOKENS);
-      for (const chunk of chunks) {
-        snippets.push({
-          type: 'info',
-          title,
-          content: chunk,
-          breadcrumb,
-          tokenCount: estimateTokens(chunk),
-        });
-      }
-    }
+		// Emit info snippet for text content
+		if (section.content.trim().length >= 20) {
+			const chunks = chunkText(section.content, MAX_TOKENS, OVERLAP_TOKENS);
+			for (const chunk of chunks) {
+				snippets.push({
+					type: 'info',
+					title,
+					content: chunk,
+					breadcrumb,
+					tokenCount: estimateTokens(chunk)
+				});
+			}
+		}

-    // Emit code snippets for each code block
-    for (const block of section.codeBlocks) {
-      if (block.code.trim().length >= 20) {
-        snippets.push({
-          type: 'code',
-          title,
-          content: block.code,
-          language: block.language || detectLanguage('.' + block.language),
-          breadcrumb,
-          tokenCount: estimateTokens(block.code),
-        });
-      }
-    }
-  }
+		// Emit code snippets for each code block
+		for (const block of section.codeBlocks) {
+			if (block.code.trim().length >= 20) {
+				snippets.push({
+					type: 'code',
+					title,
+					content: block.code,
+					language: block.language || detectLanguage('.' + block.language),
+					breadcrumb,
+					tokenCount: estimateTokens(block.code)
+				});
+			}
+		}
+	}

-  return snippets;
+	return snippets;
 }
 ```

@@ -135,43 +135,41 @@ For non-Markdown code files, use regex-based function/class boundary detection.

 ```typescript
 const BOUNDARY_PATTERNS: Record<string, RegExp> = {
-  typescript: /^(export\s+)?(async\s+)?(function|class|interface|type|const|let|var)\s+\w+/m,
-  python: /^(async\s+)?(def|class)\s+\w+/m,
-  go: /^(func|type|var|const)\s+\w+/m,
-  rust: /^(pub\s+)?(fn|impl|struct|enum|trait)\s+\w+/m,
-  java: /^(public|private|protected|static).*?(class|interface|enum|void|\w+)\s+\w+\s*[({]/m,
+	typescript: /^(export\s+)?(async\s+)?(function|class|interface|type|const|let|var)\s+\w+/m,
+	python: /^(async\s+)?(def|class)\s+\w+/m,
+	go: /^(func|type|var|const)\s+\w+/m,
+	rust: /^(pub\s+)?(fn|impl|struct|enum|trait)\s+\w+/m,
+	java: /^(public|private|protected|static).*?(class|interface|enum|void|\w+)\s+\w+\s*[({]/m
 };

-function parseCodeFile(
-  content: string,
-  filePath: string,
-  language: string
-): Snippet[] {
-  const pattern = BOUNDARY_PATTERNS[language];
-  const breadcrumb = filePath;
-  const title = path.basename(filePath);
+function parseCodeFile(content: string, filePath: string, language: string): Snippet[] {
+	const pattern = BOUNDARY_PATTERNS[language];
+	const breadcrumb = filePath;
+	const title = path.basename(filePath);

-  if (!pattern) {
-    // Fallback: sliding window
-    return slidingWindowChunks(content, filePath, language);
-  }
+	if (!pattern) {
+		// Fallback: sliding window
+		return slidingWindowChunks(content, filePath, language);
+	}

-  const chunks = splitAtBoundaries(content, pattern);
-  return chunks
-    .filter(chunk => chunk.trim().length >= 20)
-    .flatMap(chunk => {
-      if (estimateTokens(chunk) <= MAX_TOKENS) {
-        return [{
-          type: 'code' as const,
-          title,
-          content: chunk,
-          language,
-          breadcrumb,
-          tokenCount: estimateTokens(chunk),
-        }];
-      }
-      return slidingWindowChunks(chunk, filePath, language);
-    });
+	const chunks = splitAtBoundaries(content, pattern);
+	return chunks
+		.filter((chunk) => chunk.trim().length >= 20)
+		.flatMap((chunk) => {
+			if (estimateTokens(chunk) <= MAX_TOKENS) {
+				return [
+					{
+						type: 'code' as const,
+						title,
+						content: chunk,
+						language,
+						breadcrumb,
+						tokenCount: estimateTokens(chunk)
+					}
+				];
+			}
+			return slidingWindowChunks(chunk, filePath, language);
+		});
 }
 ```

@@ -188,27 +186,23 @@ const MIN_CONTENT_LENGTH = 20; // characters
 ### Sliding Window Chunker

 ```typescript
-function chunkText(
-  text: string,
-  maxTokens: number,
-  overlapTokens: number
-): string[] {
-  const words = text.split(/\s+/);
-  const wordsPerToken = 0.75; // ~0.75 words per token
-  const maxWords = Math.floor(maxTokens * wordsPerToken);
-  const overlapWords = Math.floor(overlapTokens * wordsPerToken);
+function chunkText(text: string, maxTokens: number, overlapTokens: number): string[] {
+	const words = text.split(/\s+/);
+	const wordsPerToken = 0.75; // ~0.75 words per token
+	const maxWords = Math.floor(maxTokens * wordsPerToken);
+	const overlapWords = Math.floor(overlapTokens * wordsPerToken);

-  const chunks: string[] = [];
-  let start = 0;
+	const chunks: string[] = [];
+	let start = 0;

-  while (start < words.length) {
-    const end = Math.min(start + maxWords, words.length);
-    chunks.push(words.slice(start, end).join(' '));
-    if (end === words.length) break;
-    start = end - overlapWords;
-  }
+	while (start < words.length) {
+		const end = Math.min(start + maxWords, words.length);
+		chunks.push(words.slice(start, end).join(' '));
+		if (end === words.length) break;
+		start = end - overlapWords;
+	}

-  return chunks;
+	return chunks;
 }
 ```

@@ -218,34 +212,42 @@ function chunkText(

 ```typescript
 const LANGUAGE_MAP: Record<string, string> = {
-  '.ts': 'typescript', '.tsx': 'typescript',
-  '.js': 'javascript', '.jsx': 'javascript',
-  '.py': 'python',
-  '.rb': 'ruby',
-  '.go': 'go',
-  '.rs': 'rust',
-  '.java': 'java',
-  '.cs': 'csharp',
-  '.cpp': 'cpp', '.c': 'c', '.h': 'c',
-  '.swift': 'swift',
-  '.kt': 'kotlin',
-  '.php': 'php',
-  '.scala': 'scala',
-  '.sh': 'bash', '.bash': 'bash', '.zsh': 'bash',
-  '.md': 'markdown', '.mdx': 'markdown',
-  '.json': 'json',
-  '.yaml': 'yaml', '.yml': 'yaml',
-  '.toml': 'toml',
-  '.html': 'html',
-  '.css': 'css',
-  '.svelte': 'svelte',
-  '.vue': 'vue',
-  '.sql': 'sql',
+	'.ts': 'typescript',
+	'.tsx': 'typescript',
+	'.js': 'javascript',
+	'.jsx': 'javascript',
+	'.py': 'python',
+	'.rb': 'ruby',
+	'.go': 'go',
+	'.rs': 'rust',
+	'.java': 'java',
+	'.cs': 'csharp',
+	'.cpp': 'cpp',
+	'.c': 'c',
+	'.h': 'c',
+	'.swift': 'swift',
+	'.kt': 'kotlin',
+	'.php': 'php',
+	'.scala': 'scala',
+	'.sh': 'bash',
+	'.bash': 'bash',
+	'.zsh': 'bash',
+	'.md': 'markdown',
+	'.mdx': 'markdown',
+	'.json': 'json',
+	'.yaml': 'yaml',
+	'.yml': 'yaml',
+	'.toml': 'toml',
+	'.html': 'html',
+	'.css': 'css',
+	'.svelte': 'svelte',
+	'.vue': 'vue',
+	'.sql': 'sql'
 };

 function detectLanguage(filePath: string): string {
-  const ext = path.extname(filePath).toLowerCase();
-  return LANGUAGE_MAP[ext] ?? 'text';
+	const ext = path.extname(filePath).toLowerCase();
+	return LANGUAGE_MAP[ext] ?? 'text';
 }
 ```

@@ -255,32 +257,32 @@ function detectLanguage(filePath: string): string {

 ```typescript
 export interface ParseOptions {
-  repositoryId: string;
-  documentId: string;
-  versionId?: string;
+	repositoryId: string;
+	documentId: string;
+	versionId?: string;
 }

-export function parseFile(
-  file: CrawledFile,
-  options: ParseOptions
-): NewSnippet[] {
-  const language = detectLanguage(file.path);
-  let rawSnippets: Omit<NewSnippet, 'id' | 'repositoryId' | 'documentId' | 'versionId' | 'createdAt'>[];
+export function parseFile(file: CrawledFile, options: ParseOptions): NewSnippet[] {
+	const language = detectLanguage(file.path);
+	let rawSnippets: Omit<
+		NewSnippet,
+		'id' | 'repositoryId' | 'documentId' | 'versionId' | 'createdAt'
+	>[];

-  if (language === 'markdown') {
-    rawSnippets = parseMarkdown(file.content, file.path);
-  } else {
-    rawSnippets = parseCodeFile(file.content, file.path, language);
-  }
+	if (language === 'markdown') {
+		rawSnippets = parseMarkdown(file.content, file.path);
+	} else {
+		rawSnippets = parseCodeFile(file.content, file.path, language);
+	}

-  return rawSnippets.map(s => ({
-    ...s,
-    id: crypto.randomUUID(),
-    repositoryId: options.repositoryId,
-    documentId: options.documentId,
-    versionId: options.versionId ?? null,
-    createdAt: new Date(),
-  }));
+	return rawSnippets.map((s) => ({
+		...s,
+		id: crypto.randomUUID(),
+		repositoryId: options.repositoryId,
+		documentId: options.documentId,
+		versionId: options.versionId ?? null,
+		createdAt: new Date()
+	}));
 }
 ```