14

Does JavaScript have a way of truncating HTML text without all the headaches of matching tags etc etc?

Thank you.

Francisc
  • 77,430
  • 63
  • 180
  • 276

9 Answers9

23

I know this question is old, but I recently had the same problem. I wrote the following library, which truncates valid HTML safely: https://github.com/arendjr/text-clipper

arendjr
  • 679
  • 6
  • 12
11

I had the same problem, and wound up writing the following to deal with it. It truncates HTML to a give length, cleans up any start / end tags that might have gotten snipped off at the end, and then closes any tags left unclosed:

function truncateHTML(text, length) {
    var truncated = text.substring(0, length);
    // Remove line breaks and surrounding whitespace
    truncated = truncated.replace(/(\r\n|\n|\r)/gm,"").trim();
    // If the text ends with an incomplete start tag, trim it off
    truncated = truncated.replace(/<(\w*)(?:(?:\s\w+(?:={0,1}(["']{0,1})\w*\2{0,1})))*$/g, '');
    // If the text ends with a truncated end tag, fix it.
    var truncatedEndTagExpr = /<\/((?:\w*))$/g;
    var truncatedEndTagMatch = truncatedEndTagExpr.exec(truncated);
    if (truncatedEndTagMatch != null) {
        var truncatedEndTag = truncatedEndTagMatch[1];
        // Check to see if there's an identifiable tag in the end tag
        if (truncatedEndTag.length > 0) {
            // If so, find the start tag, and close it
            var startTagExpr = new RegExp(
                "<(" + truncatedEndTag + "\\w?)(?:(?:\\s\\w+(?:=([\"\'])\\w*\\2)))*>");
            var testString = truncated;
            var startTagMatch = startTagExpr.exec(testString);

            var startTag = null;
            while (startTagMatch != null) {
                startTag = startTagMatch[1];
                testString = testString.replace(startTagExpr, '');
                startTagMatch = startTagExpr.exec(testString);
            }
            if (startTag != null) {
                truncated = truncated.replace(truncatedEndTagExpr, '</' + startTag + '>');
            }
        } else {
            // Otherwise, cull off the broken end tag
            truncated = truncated.replace(truncatedEndTagExpr, '');
        }
    }
    // Now the tricky part. Reverse the text, and look for opening tags. For each opening tag,
    //  check to see that he closing tag before it is for that tag. If not, append a closing tag.
    var testString = reverseHtml(truncated);
    var reverseTagOpenExpr = /<(?:(["'])\w*\1=\w+ )*(\w*)>/;
    var tagMatch = reverseTagOpenExpr.exec(testString);
    while (tagMatch != null) {
        var tag = tagMatch[0];
        var tagName = tagMatch[2];
        var startPos = tagMatch.index;
        var endPos = startPos + tag.length;
        var fragment = testString.substring(0, endPos);
        // Test to see if an end tag is found in the fragment. If not, append one to the end
        //  of the truncated HTML, thus closing the last unclosed tag
        if (!new RegExp("<" + tagName + "\/>").test(fragment)) {
            truncated += '</' + reverseHtml(tagName) + '>';
        }
        // Get rid of the already tested fragment
        testString = testString.replace(fragment, '');
        // Get another tag to test
        tagMatch = reverseTagOpenExpr.exec(testString);
    }
    return truncated;
}

function reverseHtml(str) {
    var ph = String.fromCharCode(206);
    var result = str.split('').reverse().join('');
    while (result.indexOf('<') > -1) {
        result = result.replace('<',ph);
    }
    while (result.indexOf('>') > -1) {
        result = result.replace('>', '<');
    }
    while (result.indexOf(ph) > -1) {
        result = result.replace(ph, '>');
    }
    return result;
}
Minouris
  • 147
  • 1
  • 6
  • it broke the HTML provided. Tested with:

    Heading 1


    Heading 2

     

    Titulo 3
     

    O Lorem Ipsum é um texto modelo da indústria tipográfica e de impressão. O Lorem Ipsum tem vindo a ser o texto padrão usado por estas indústrias desde o ano de 1500, quando uma misturou os caracteres de um texto para criar um espécime de livro. Este texto não só sobreviveu 5 séculos, mas também o salto para a tipografia electrónica, mantendo-se essencialmente inalterada.

    – Carlos Oliveira Oct 21 '20 at 19:35
7

There's nothing built-in javascript. There's a jQuery plugin that you might take a look at.

Grzegorz Rożniecki
  • 27,415
  • 11
  • 90
  • 112
Darin Dimitrov
  • 1,023,142
  • 271
  • 3,287
  • 2,928
2

There's a mootools plugin which does exactly what you need: mooReadAll at mootools forge

abidibo
  • 4,175
  • 2
  • 25
  • 33
1

If you want a light-weight solution in vanilla JS, this should do the trick, although it'll leave empty elements around, so it depends on if you care about those. Also note that it mutates the nodes in-place.

function truncateNode(node, limit) {
  if (node.nodeType === Node.TEXT_NODE) {
    node.textContent = node.textContent.substring(0, limit);
    return limit - node.textContent.length;
  }

  node.childNodes.forEach((child) => {
    limit = truncateNode(child, limit);
  });

  return limit;
}
const span = document.createElement('span');
span.innerHTML = '<b>foo</b><i>bar</i><u>baz</u>';
truncateNode(span, 5);
expect(span.outerHTML).toEqual('<span><b>foo</b><i>ba</i><u></u></span>');
Alec
  • 2,432
  • 2
  • 18
  • 28
1

This works with multiple nesting levels:

let truncate = (content, maxLength = 255, append = '…') => {
    let container = document.createElement('div');
    container.innerHTML = content;

    let limitReached = false;
    let counted = 0;

    let nodeHandler = node => {
        if ( limitReached ) {
            node.remove();
            return;
        }

        let childNodes = Array.from( node.childNodes );
        if ( childNodes.length ) {
            childNodes.forEach( childNode => nodeHandler( childNode ) );
        } else {
            counted += node.textContent.length;
            if ( counted >= maxLength ) {
                limitReached = true;
                if ( counted > maxLength ) {
                    node.textContent = node.textContent.slice( 0, -(counted - maxLength) )
                }

                node.textContent += append;
            }
        }
    };

    nodeHandler( container );

    return container.innerHTML;
};
Albion S.
  • 218
  • 2
  • 13
0

None of the above solutions corresponded perfectly to my use case, so I created myself a small vanilla javascript function. It leaves empty elements but it could be corrected easily.

const truncateWithHTML = (string, length) => {
    // string = "<span class='className'>My long string that</span> I want shorter<span> but just a little bit</span>"

    const noHTML = string.replace(/<[^>]*>/g, '');

    // if the string does not need to be truncated
    if (noHTML.length <= max){
        return string;
    }

    // if the string does not contains tags
    if (noHTML.length === string.length){
        // add <span title=""> to allow complete string to appear on hover
        return `<span title="${string}">${string.substring(0, max).trim()}…</span>`;
    }

    const substrings =  string.split(/(<[^>]*>)/g).filter(Boolean);
    // substrings = ["<span class='className'>","My long string that","</span>"," I want shorter","<span>"," but just a little bit","</span>"]

    let count = 0;
    let truncated = [];
    for (let i = 0; i < substrings.length; i++) {
        let substr = substrings[i];
        // if the substring isn't an HTML tag
        if (! substr.startsWith("<")){
            if (count > length){
                continue;
            } else if (substr.length > (length-count-1)){
                truncated.push(substr.substring(0, (length-count) - 1) + '…');
            } else {
                truncated.push(substr);
            }
            count += substr.length;
        } else {
            truncated.push(substr);
        }
    }

    return `<span title="${noHTML}">${truncated.join("")}…</span>`;
}

Examples:

string = "<span class='className'>My long string that</span> I want shorter<span> but just a little bit</span>";

truncateWithHTML(string,10); // "<span title='My long string that I want shorter but just a little bit'><span class='className'>My long s…</span><span></span></span>"
truncateWithHTML(string,22); // "<span title='My long string that I want shorter but just a little bit'><span class='className'>My long string that</span> I…<span></span></span>"
Seglinglin
  • 447
  • 1
  • 4
  • 17
0

I just recently finished a jQuery function to do this using the width & height of the container. Test it out and see if it works for you. I'm not yet sure of all the compatibility issues, bugs, or limitations but I've tested it in FF, Chrome, and IE7.

-4

That's quite challenging.

If you don't have any HTML markup, the following might be useful.

TK.
  • 27,073
  • 20
  • 64
  • 72