markdown-live-preview icon indicating copy to clipboard operation
markdown-live-preview copied to clipboard

consider to add feature reverse HTML to Markdown

Open amlan-sw opened this issue 1 year ago • 0 comments

This ChatGPT-generated code, already tuned and tested, can reverse markdown-live-preview default HTML output. 

file: html_to_markdown.html

<!DOCTYPE html>
<head>
    <meta charset="utf-8">
    <title>HTML to MD</title>
    <meta name="description" content="">
    <meta name="author" content="">
    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
</head>
<body>

<script type="text/javascript">

function html_to_markdown(html) {
    var tempDiv = document.createElement('div');
    tempDiv.innerHTML = html;

    function handleStyle(element) {
        const style = element.getAttribute('style') || '';
        const fontWeight = style.includes('font-weight: bold') ? '**' : '';
        const textDecoration = style.includes('text-decoration: underline') ? '__' : '';
        return fontWeight + textDecoration;
    }

    function parseNode(node) {
        let result = '';

        if (node.nodeType === Node.TEXT_NODE) {
            return node.nodeValue;
        }

        if (node.nodeType === Node.ELEMENT_NODE) {
            const tag = node.tagName.toLowerCase();
            const stylePrefix = handleStyle(node);

            switch (tag) {
                case 'b':
                case 'strong':
                    result = '**' + parseChildren(node) + '**';
                    break;
                case 'i':
                case 'em':
                    result = '*' + parseChildren(node) + '*';
                    break;
                case 'u':
                    result = '__' + parseChildren(node) + '__';
                    break;
                case 'a':
                    const href = node.getAttribute('href');
                    result = '[' + parseChildren(node) + '](' + href + ')';
                    break;
                case 'img':
                    const src = node.getAttribute('src');
                    const alt = node.getAttribute('alt') || '';
                    const title = node.getAttribute('title') ? ` "${node.getAttribute('title')}"` : '';
                    result = '![' + alt + '](' + src + title + ')';
                    break;
                case 'table':
                    result = parseTable(node);
                    break;
                case 'tr':
                    result = '| ' + parseChildren(node).trim() + ' |\n';
                    break;
                case 'th':
                    result = '**' + parseChildren(node) + '**';
                    break;
                case 'td':
                    result = parseTableData(node);
                    break;
                case 'br':
                    result = '\n';
                    break;
                case 'p':
                    result = '\n\n' + parseChildren(node) + '\n\n';
                    break;
                case 'h1':
                    result = '\n# ' + parseChildren(node) + '\n\n';
                    break;
                case 'h2':
                    result = '\n## ' + parseChildren(node) + '\n\n';
                    break;
                case 'h3':
                    result = '\n### ' + parseChildren(node) + '\n\n';
                    break;
                case 'h4':
                    result = '\n#### ' + parseChildren(node) + '\n\n';
                    break;
                case 'h5':
                    result = '\n##### ' + parseChildren(node) + '\n\n';
                    break;
                case 'h6':
                    result = '\n###### ' + parseChildren(node) + '\n\n';
                    break;
                case 'ul':
                    result = '\n' + parseList(node, '* ') + '\n';
                    break;
                case 'ol':
                    result = '\n' + parseList(node, '1. ') + '\n';
                    break;
                case 'blockquote':
                    result = '\n> ' + parseChildren(node).trim().replace(/\n/g, '\n> ') + '\n';
                    break;
                case 'code':
                    result = '`' + parseChildren(node) + '`';
                    break;
                case 'pre':
                    result = '\n```\n' + parseChildren(node) + '\n```\n';
                    break;
                default:
                    result = stylePrefix + parseChildren(node) + stylePrefix;
                    break;
            }
        }

        return result;
    }

    function parseChildren(node) {
        let result = '';
        node.childNodes.forEach(child => {
            result += parseNode(child);
        });
        return result;
    }

    function parseTable(node) {
        let rows = node.getElementsByTagName('tr');
        let tableData = [];

        // Gather all rows and cells, and calculate max column widths
        let columnWidths = [];

        for (let i = 0; i < rows.length; i++) {
            let row = [];
            let cells = rows[i].getElementsByTagName('th').length > 0 ? rows[i].getElementsByTagName('th') : rows[i].getElementsByTagName('td');

            for (let j = 0; j < cells.length; j++) {
                let cellContent = parseNode(cells[j]).trim();

                // Ensure that columnWidths array has an entry for this column
                columnWidths[j] = Math.max(columnWidths[j] || 0, cellContent.length);

                row.push(cellContent);
            }
            tableData.push(row);
        }

        // Build the markdown table with aligned columns
        let result = '\n';
        for (let i = 0; i < tableData.length; i++) {
            let row = tableData[i];

            // Format each row with the appropriate column widths
            let formattedRow = '| ';
            for (let j = 0; j < row.length; j++) {
                let cell = row[j];
                let paddedCell = cell.padEnd(columnWidths[j], ' ');  // Pad the cell to the max column width
                formattedRow += paddedCell + ' | ';
            }

            result += formattedRow + '\n';

            // Add separator after header row
            if (i === 0) {
                let separator = '| ';
                for (let j = 0; j < row.length; j++) {
                    separator += '-'.repeat(columnWidths[j]) + ' | ';
                }
                result += separator + '\n';
            }
        }

        return result + '\n';
    }

    function parseTableData(node) {
        const align = node.getAttribute('align') || 'left';
        const content = parseChildren(node);
        if (align === 'center') {
            return ':' + content + ':';
        } else if (align === 'right') {
            return content + ':';
        }
        return content;
    }

    function parseList(node, bullet, level = 0) {
        let result = '';
        let counter = 1;  

        node.childNodes.forEach(item => {
            if (item.tagName && item.tagName.toLowerCase() === 'li') {
                
                let listBullet = (bullet === '1. ') ? (counter++ + '. ') : bullet;

                let indentation = ' '.repeat(level * 4);  

                let nestedList = item.querySelector('ul, ol');

                if (nestedList) {
                    item.removeChild(nestedList);
                }

                result += indentation + listBullet + parseChildren(item).trim() + '\n';

                if (nestedList) {
                    let nestedBullet = (nestedList.tagName.toLowerCase() === 'ol') ? '1. ' : '* ';
                    result += parseList(nestedList, nestedBullet, level + 1);  // Rekursi untuk nested list
                }
            }
        });

        return result;
    }

    return parseChildren(tempDiv)
        .replace(/\s*\n\s*\n+/gs, '\n\n')  // max  \n\n
        .replace(/\n```\n\s*`\n*/gs, '\n```\n')  //handling <code> and <pre>  
        .replace(/\n*\s*`\s*\n```\n/gs, '\n```\n')  
        .trim() + '\n';
}

function run_test(){

// Example

    var html = `
<div>

    <h1>Markdown syntax guide</h1>

    <h2>Headers</h2>
    <h1>This is a Heading h1</h1>
    <h2>This is a Heading h2</h2>
    <h6>This is a Heading h6</h6>

    <h2>Emphasis</h2>
    <p><em>This text will be italic</em><br><em>This will also be italic</em></p>
    <p><strong>This text will be bold</strong><br><strong>This will also be bold</strong></p>
    <p><em>You <strong>can</strong> combine them</em></p>

    <h2>Lists</h2>

    <h3>Unordered</h3>
    <ul>
        <li>Item 1</li>
        <li>Item 2</li>
        <li>Item 2a</li>
        <li>Item 2b</li>
    </ul>

    <h3>Ordered</h3>
    <ol>
        <li>Item 1</li>
        <li>Item 2</li>
        <li>
            Item 3
            <ol>
                <li>Item 3a</li>
                <li>Item 3b</li>
            </ol>
        </li>
    </ol>

    <h2>Images</h2>

    <p><img title="This is a sample image." alt="This is an alt text." src="image/sample.webp"> </p>

    <h2>Links</h2>

    <p>You may be using <a href="https://markdownlivepreview.com/">Markdown Live Preview</a>.</p>

    <h2>Blockquotes</h2>

    <blockquote>
        <p>Markdown is a lightweight markup language with plain-text-formatting syntax, created in 2004 by John Gruber with Aaron Swartz.</p>
        <blockquote>
            <p>Markdown is often used to format readme files, for writing messages in online discussion forums, and to create rich text using a plain text editor.</p>
        </blockquote>
    </blockquote>

    <h2>Tables</h2>

    <table>
        <thead>
            <tr>
                <th>Left columns</th>
                <th align="center">Right columns</th>
            </tr>
        </thead>
        <tbody>
            <tr>
                <td>left foo</td>
                <td align="center">right foo</td>
            </tr>
            <tr>
                <td>left bar</td>
                <td align="center">right bar</td>
            </tr>
            <tr>
                <td>left baz</td>
                <td align="center">right baz</td>
            </tr>
        </tbody>
    </table>

    <h2>Blocks of code</h2>

    <pre>
        <code>
let message = 'Hello world';
alert(message);
        </code>
    </pre>

    <h2>Inline code</h2>

    <p>This web site is using <code>markedjs/marked</code>.</p>
</div>

    `;

    var md = html_to_markdown(html);
    alert('COPIED TO CLIBBOARD:\n\n'+md);
    navigator.clipboard.writeText(md);
}

</script>

<button onclick="run_test()"> Test and Copy </button>

</body>
</html>

amlan-sw avatar Sep 15 '24 03:09 amlan-sw