/** * Unit tests for the Markdown parser (TRUEREF-0005). */ import { describe, it, expect } from 'vitest'; import { parseMarkdown } from './markdown.parser.js'; // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- /** Build a fenced code block string without nesting backticks in template literals. */ function fence(lang: string, code: string): string { return '```' + lang + '\n' + code + '\n' + '```'; } function tildeFence(lang: string, code: string): string { return '~~~' + lang + '\n' + code + '\n' + '~~~'; } // --------------------------------------------------------------------------- // Basic section splitting // --------------------------------------------------------------------------- describe('parseMarkdown — section splitting', () => { it('produces no snippets for empty content', () => { expect(parseMarkdown('', 'README.md')).toHaveLength(0); }); it('skips content shorter than 20 characters', () => { const result = parseMarkdown('# Title\n\nShort.\n', 'README.md'); expect(result).toHaveLength(0); }); it('parses a single heading section into an info snippet', () => { const source = [ '# Introduction', '', 'This is a paragraph with enough content to pass the minimum length check.' ].join('\n'); const snippets = parseMarkdown(source, 'README.md'); expect(snippets.length).toBeGreaterThanOrEqual(1); const info = snippets.find((s) => s.type === 'info'); expect(info).toBeDefined(); expect(info?.title).toBe('Introduction'); expect(info?.breadcrumb).toBe('Introduction'); }); it('builds correct breadcrumb for nested headings', () => { const source = [ '# Getting Started', '', 'Intro text that is long enough to be included here.', '', '## Installation', '', 'Install by running the command shown below in your terminal.' ].join('\n'); const snippets = parseMarkdown(source, 'README.md'); const installation = snippets.find((s) => s.title === 'Installation'); expect(installation).toBeDefined(); expect(installation?.breadcrumb).toBe('Getting Started > Installation'); }); it('resets heading stack correctly when headings ascend', () => { const source = [ '# H1', '', 'Some introductory prose that is longer than twenty characters.', '', '## H2', '', 'More content here, also long enough to pass the threshold check.', '', '# Second H1', '', 'Content for second top-level heading, long enough to be included.' ].join('\n'); const snippets = parseMarkdown(source, 'doc.md'); const secondH1 = snippets.find((s) => s.title === 'Second H1'); expect(secondH1).toBeDefined(); expect(secondH1?.breadcrumb).toBe('Second H1'); }); it('falls back to filename when no heading is present', () => { const source = 'This is some standalone prose content that is long enough to pass.'; const snippets = parseMarkdown(source, 'notes.md'); expect(snippets.length).toBeGreaterThanOrEqual(1); expect(snippets[0]?.title).toBe('notes.md'); }); }); // --------------------------------------------------------------------------- // Fenced code block extraction // --------------------------------------------------------------------------- describe('parseMarkdown — code block extraction', () => { it('extracts a fenced code block as a code snippet', () => { const codeBlock = fence('typescript', 'function hello(name: string): string {\n return `Hello, ${name}!`;\n}'); const source = [ '# Example', '', 'Some prose here that is long enough to pass the minimum check.', '', codeBlock ].join('\n'); const snippets = parseMarkdown(source, 'README.md'); const code = snippets.find((s) => s.type === 'code'); expect(code).toBeDefined(); expect(code?.language).toBe('typescript'); expect(code?.content).toContain('function hello'); }); it('extracts multiple code blocks from the same section', () => { const bashBlock = fence('bash', 'npm install my-library --save-dev'); const jsBlock = fence('javascript', "const lib = require('my-lib');\nlib.doSomething();"); const source = [ '# Usage', '', 'Description of the usage pattern with enough text here.', '', bashBlock, '', 'More text in between the two code blocks, just enough.', '', jsBlock ].join('\n'); const snippets = parseMarkdown(source, 'README.md'); const codeSnippets = snippets.filter((s) => s.type === 'code'); expect(codeSnippets.length).toBe(2); const langs = codeSnippets.map((s) => s.language); expect(langs).toContain('bash'); expect(langs).toContain('javascript'); }); it('skips code blocks shorter than 20 characters', () => { const shortBlock = fence('', 'x = 1'); const source = [ '# Example', '', 'Some prose here that is long enough to pass.', '', shortBlock ].join('\n'); const snippets = parseMarkdown(source, 'README.md'); expect(snippets.every((s) => s.type === 'info')).toBe(true); }); it('handles tilde-fenced code blocks', () => { const pyBlock = tildeFence('python', 'def greet(name):\n return f"Hello, {name}"'); const source = [ '# Section', '', 'Long enough prose content for the section to be included here.', '', pyBlock ].join('\n'); const snippets = parseMarkdown(source, 'README.md'); const code = snippets.find((s) => s.type === 'code'); expect(code).toBeDefined(); expect(code?.language).toBe('python'); }); it('preserves breadcrumb on code snippets', () => { const codeBlock = fence( 'typescript', 'function connect(url: string): Promise {\n return Promise.resolve();\n}' ); const source = [ '# API Reference', '', '## Methods', '', 'Overview of the methods available in this library.', '', codeBlock ].join('\n'); const snippets = parseMarkdown(source, 'API.md'); const code = snippets.find((s) => s.type === 'code'); expect(code).toBeDefined(); expect(code?.breadcrumb).toBe('API Reference > Methods'); }); }); // --------------------------------------------------------------------------- // Token counting // --------------------------------------------------------------------------- describe('parseMarkdown — token counting', () => { it('attaches a non-zero tokenCount to every snippet', () => { const source = [ '# Overview', '', 'This section contains enough text to produce an info snippet for the test.' ].join('\n'); const snippets = parseMarkdown(source, 'README.md'); for (const s of snippets) { expect(s.tokenCount).toBeGreaterThan(0); } }); }); // --------------------------------------------------------------------------- // Large content chunking // --------------------------------------------------------------------------- describe('parseMarkdown — large content chunking', () => { it('splits a very large prose section into multiple snippets', () => { // Generate ~4 000 characters of prose (well above the ~1 800-char window) const longParagraph = 'word '.repeat(800).trim(); const source = `# Big Section\n\n${longParagraph}`; const snippets = parseMarkdown(source, 'big.md'); const infoSnippets = snippets.filter((s) => s.type === 'info'); expect(infoSnippets.length).toBeGreaterThan(1); }); }); // --------------------------------------------------------------------------- // Real-world sample // --------------------------------------------------------------------------- describe('parseMarkdown — real-world sample', () => { it('correctly parses a realistic README excerpt', () => { const bashInstall = fence('bash', 'npm install my-library'); const tsUsage = fence('typescript', "import { doTheThing } from 'my-library';\n\ndoTheThing({ verbose: true });"); const source = [ '# My Library', '', 'A handy library for doing things quickly and efficiently.', '', '## Installation', '', 'Install via npm using the following command in your project directory:', '', bashInstall, '', '## Usage', '', 'Import the library and call the main function as shown below:', '', tsUsage, '', '## API', '', '### doTheThing(options)', '', 'Performs the main operation. Options are passed as a plain object.' ].join('\n'); const snippets = parseMarkdown(source, 'README.md'); // Should have both info and code snippets expect(snippets.some((s) => s.type === 'info')).toBe(true); expect(snippets.some((s) => s.type === 'code')).toBe(true); // Breadcrumb depth check const apiSnippet = snippets.find((s) => s.title === 'doTheThing(options)'); expect(apiSnippet).toBeDefined(); expect(apiSnippet?.breadcrumb).toBe('My Library > API > doTheThing(options)'); }); });