Skip to content

Commit

Permalink
add support for links inside headings (#21)
Browse files Browse the repository at this point in the history
  • Loading branch information
yielder committed Jan 10, 2025
1 parent 0682dea commit 0096bb3
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 23 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ dist_browser
node_modules
.yarn
version.sh
*.copa
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"rollup": "^4.20.0",
"ts-jest": "^29.2.4",
"tslib": "^2.6.3",
"typescript": "^5.5.4"
"typescript": "^5.6.3"
},
"directories": {
"example": "examples",
Expand Down
11 changes: 6 additions & 5 deletions src/core/htmlToMarkdownAST.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ export function htmlToMarkdownAST(element: Element, options?: ConversionOptions,
const elem = childElement as Element;
if (/^h[1-6]$/i.test(elem.tagName)) {
const level = parseInt(elem.tagName.substring(1)) as 1 | 2 | 3 | 4 | 5 | 6;
const content = escapeMarkdownCharacters(elem.textContent || '').trim();
if (content) {
debugLog(`Heading ${level}: '${elem.textContent}'`);
result.push({type: 'heading', level, content});
}
debugLog(`Heading ${level}`);
result.push({
type: 'heading',
level,
content: htmlToMarkdownAST(elem, options) // Process child elements
});
} else if (elem.tagName.toLowerCase() === 'p') {
debugLog("Paragraph");
result.push(...htmlToMarkdownAST(elem, options));
Expand Down
13 changes: 8 additions & 5 deletions src/core/markdownASTToString.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,16 @@ function markdownContentASTToString(nodes: SemanticMarkdownAST[], options?: Conv
let content = node.content as string; // might be a nodes array but we take care of that below
if (Array.isArray(node.content)) {
content = markdownContentASTToString(node.content, options, indentLevel);
}
const isMarkdownStringNotEmpty = markdownString.length > 0;
}

const isMarkdownStringNotEmpty = markdownString.length > 0;
const isFirstCharOfContentWhitespace = /\s/.test(content.slice(0, 1));
const isLastCharOfMarkdownWhitespace = /\s/.test(markdownString.slice(-1));
const isContentPunctuation = content.length === 1 && /^[.,!?;:]/.test(content);

if (isMarkdownStringNotEmpty && !isContentPunctuation && !isFirstCharOfContentWhitespace && !isLastCharOfMarkdownWhitespace) {
markdownString += ' ';
}
}

if (node.type === 'text') {
markdownString += `${indent}${content}`;
Expand Down Expand Up @@ -122,7 +122,10 @@ function markdownContentASTToString(nodes: SemanticMarkdownAST[], options?: Conv
if (!isEndsWithNewLine) {
markdownString += '\n';
}
markdownString += `${'#'.repeat(node.level)} ${node.content}\n\n`;
const headingContent = typeof node.content === 'string'
? node.content
: markdownContentASTToString(node.content, options, indentLevel);
markdownString += `${'#'.repeat(node.level)} ${headingContent}\n\n`;
break;
case 'image':
if (!node.alt?.trim() || !!node.src?.trim()) {
Expand Down
2 changes: 1 addition & 1 deletion src/types/markdownTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export type StrikethroughNode = {
export type HeadingNode = {
type: 'heading';
level: 1 | 2 | 3 | 4 | 5 | 6;
content: string;
content: string | SemanticMarkdownAST[];
};
// Define links and images
export type LinkNode = {
Expand Down
76 changes: 65 additions & 11 deletions tests/markdownConversion.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
convertElementToMarkdown,
ConversionOptions
} from '../src';
import {_Node} from "../src/core/ElementNode";

// Helper function to create a DOM element
function createElement(html: string): Element {
Expand Down Expand Up @@ -33,6 +34,48 @@ describe('HTML to Markdown conversion', () => {
expect(convertHtmlToMarkdown(html, {overrideDOMParser: new dom.window.DOMParser()}).trim()).toBe(expected);
});

test('converts headings with links', () => {
const html = `
<h1>Welcome to <a href="/home">Homepage</a></h1>
<h2>Read our <a href="/blog">Blog</a> section</h2>
<h3>Contact <a href="mailto:[email protected]">Support</a></h3>
`;
const expected =
'# Welcome to [Homepage](/home)\n\n' +
'## Read our [Blog](/blog) section\n\n' +
'### Contact [Support](mailto:[email protected])';

expect(convertHtmlToMarkdown(html, {
overrideDOMParser: new dom.window.DOMParser()
}).trim()).toBe(expected);
});

test('converts headings with mixed inline elements', () => {
const html = `
<h1>Welcome to <a href="/home">Home</a> - <strong>Start Here</strong></h1>
<h2>Latest <em>Updates</em> and <a href="/news">News</a></h2>
`;
const expected =
'# Welcome to [Home](/home) - **Start Here**\n\n' +
'## Latest *Updates* and [News](/news)';

expect(convertHtmlToMarkdown(html, {
overrideDOMParser: new dom.window.DOMParser()
}).trim()).toBe(expected);
});

test('converts headings with nested links', () => {
const html = `
<h2>Resources: <span><a href="/docs">Documentation</a></span> and <span><a href="/api">API</a></span></h2>
`;
const expected = '## Resources: [Documentation](/docs) and [API](/api)';

expect(convertHtmlToMarkdown(html, {
overrideDOMParser: new dom.window.DOMParser()
}).trim()).toBe(expected);
});


test('converts unordered list', () => {
const html = '<ul><li>Item 1</li><li>Item 2</li><li>Item 3</li></ul>';
const expected = '- Item 1\n- Item 2\n- Item 3';
Expand Down Expand Up @@ -172,11 +215,11 @@ describe('HTML to Markdown conversion', () => {
`;

const expected =
`| TITLE A <!-- col-0 --> <!-- colspan: 5 --> | | | | |\n` +
`| ROW 1 COL 1 <!-- col-0 --> | ROW 1 COL 2 <!-- col-1 --> | ROW 1 COL 3 <!-- col-2 --> | ROW 1 COL 4 <!-- col-3 --> | ROW 1 COL 5 <!-- col-4 --> |\n` +
`| ROW 2-3 COL 1 <!-- col-0 --> <!-- rowspan: 2 --> | ROW 2 COL 2-3 <!-- col-1 --> <!-- colspan: 2 --> | | ROW 2 COL 4 <!-- col-3 --> | ROW 2 COL 5 <!-- col-4 --> |\n` +
`| ROW 3 COL 1 <!-- col-0 --> | ROW 3 COL 2 <!-- col-1 --> | ROW 3 COL 3 <!-- col-2 --> | ROW 3 COL 4 <!-- col-3 --> | |\n` +
`| TITLE B <!-- col-0 --> <!-- colspan: 5 --> | | | | |\n`;
`| TITLE A <!-- col-0 --> <!-- colspan: 5 --> | | | | |\n` +
`| ROW 1 COL 1 <!-- col-0 --> | ROW 1 COL 2 <!-- col-1 --> | ROW 1 COL 3 <!-- col-2 --> | ROW 1 COL 4 <!-- col-3 --> | ROW 1 COL 5 <!-- col-4 --> |\n` +
`| ROW 2-3 COL 1 <!-- col-0 --> <!-- rowspan: 2 --> | ROW 2 COL 2-3 <!-- col-1 --> <!-- colspan: 2 --> | | ROW 2 COL 4 <!-- col-3 --> | ROW 2 COL 5 <!-- col-4 --> |\n` +
`| ROW 3 COL 1 <!-- col-0 --> | ROW 3 COL 2 <!-- col-1 --> | ROW 3 COL 3 <!-- col-2 --> | ROW 3 COL 4 <!-- col-3 --> | |\n` +
`| TITLE B <!-- col-0 --> <!-- colspan: 5 --> | | | | |\n`;

expect(convertHtmlToMarkdown(html, {
overrideDOMParser: new dom.window.DOMParser(),
Expand Down Expand Up @@ -264,7 +307,7 @@ describe('HTML to Markdown conversion', () => {
}).trim()
).toBe(expected);
});

});

describe('Custom Element Processing and Rendering', () => {
Expand Down Expand Up @@ -311,8 +354,13 @@ describe('Custom Element Processing and Rendering', () => {
const html = '<h1>Title</h1>';
const options: ConversionOptions = {
overrideNodeRenderer: (node) => {
if (node.type === 'heading' && node.level === 1) {
return `==== ${node.content} ====\n`;
if (node.type === 'heading' && node.level === 1 && typeof node.content !== 'string') {
if (node.content[0].type === 'text') {
return `==== ${node.content[0].content} ====\n`;
} else {
return "unexpected"
}

}
},
overrideDOMParser: new dom.window.DOMParser()
Expand All @@ -325,8 +373,12 @@ describe('Custom Element Processing and Rendering', () => {
const html = '<custom-element>Custom content</custom-element><h1>Title</h1>';
const options: ConversionOptions = {
overrideElementProcessing: (element) => {
if (element.tagName.toLowerCase() === 'custom-element') {
if (element.nodeType === _Node.TEXT_NODE && element.textContent) {
return [{type: "text", content: element.textContent}]
} else if (element.tagName.toLowerCase() === 'custom-element') {
return [{type: 'custom', content: element.textContent}];
} else {
return undefined;
}
},
renderCustomNode: (node) => {
Expand All @@ -335,8 +387,10 @@ describe('Custom Element Processing and Rendering', () => {
}
},
overrideNodeRenderer: (node) => {
if (node.type === 'heading' && node.level === 1) {
return `==== ${node.content} ====\n`;
if (node.type === 'heading' && node.level === 1
&& typeof node.content !== "string"
&& node.content[0].type === 'text') {
return `==== ${node.content[0].content} ====\n`;
}
},
overrideDOMParser: new dom.window.DOMParser()
Expand Down

0 comments on commit 0096bb3

Please sign in to comment.