3

I have a string as follows:

<div> Here is some text&nbsp;</div>
<div>Here is some text </div>
<ul>
    <li>test 1 </li>
    <li> test 2</li>
    <li>&nbsp;test 3&nbsp;</li>
</ul>
<div> Here is some text </div>

I need to loop through each element and trim the whitespace and any leading or trailing &nbsp; entities using JavaScript so that the end result is as follows:

<div>Here is some text</div>
<div>Here is some text</div>
<ul>
    <li>test 1</li>
    <li>test 2</li>
    <li>test 3</li>
</ul>
<div>Here is some text</div>

To provide more background, this is a string that is being pasted into a WYSIWYG editor. So I am simply attempting to clean up the string as it is being pasted. I'm not great with JavaScript, but even if I were to use the method suggested in the Javascript: How to loop through ALL DOM elements on a page? post, I am uncertain as to how to utilise document.getElementsByTagName("*"); in a string.

UPDATE Using @Bharata's answer, I was able to achieve the clean up using the following:

var str = "<div> Here is some text&nbsp;</div>" + 
"<div>Here is some text </div>" + 
"<ul>" + 
"    <li>test 1 </li>" + 
"    <li> test 2</li>" + 
"    <li>&nbsp;test 3&nbsp;</li>" + 
"</ul>" + 
"<div> Here is some text </div>";

var cleanHtml = cleanUp(str);

function cleanUp(content) {
    var dom = document.createElement("div");
    dom.innerHTML = content;
    var elems = dom.getElementsByTagName('*');
    for (var i = 0; i < elems.length; i++) {
        if (elems[i].innerHTML) {
            elems[i].innerHTML = elems[i].innerHTML.trim();
        }
    }
    return dom.innerHTML;
}
Scho
  • 351
  • 1
  • 2
  • 12
  • 2
    Possible duplicate of [Javascript: How to loop through ALL DOM elements on a page?](https://stackoverflow.com/questions/4256339/javascript-how-to-loop-through-all-dom-elements-on-a-page) ... once you have done this, you may simply run `trim()` on the inner HTML of each element. – Tim Biegeleisen Sep 16 '18 at 15:18
  • I'm not looping through all the elements in a page, but a string. – Scho Sep 16 '18 at 15:31
  • 1
    What environment are you running in, nodejs? If so that affects how you would do this as you would need (or well should) use a DOM library like jsDOM to parse the string into elements to make it easier in trimming the contents. – Patrick Evans Sep 16 '18 at 15:39
  • is this being done in browser or other environment? – charlietfl Sep 16 '18 at 15:40
  • Sorry for the confusion guys - please see my edit. – Scho Sep 16 '18 at 16:00
  • this should probably help: https://stackoverflow.com/questions/2579666/getelementsbytagname-equivalent-for-textnodes – Yukulélé Sep 16 '18 at 16:04

2 Answers2

3

you have to get all text nodes and trim content:

var walker = document.createTreeWalker(
    document.querySelector('#my-div'), 
    NodeFilter.SHOW_TEXT, 
    null, 
    false
);

var node;

while(node = walker.nextNode()) {
    if(!node.previousSibling)
       node.textContent = node.textContent.replace(/^(&nbsp;|\s)*/, '')

    if(!node.nextSibling)
       node.textContent = node.textContent.replace(/(&nbsp;|\s)*$/, '')
}
Yukulélé
  • 15,644
  • 10
  • 70
  • 94
2

You can use JS DOM API for this case like follows:

  • Add your text to hidden div element
  • Replace all spaces in innerHTML content using trim() and replace() functions.

Full example

var str = "<div> Here is some text&nbsp;</div>" + 
"<div>Here is some text </div>" + 
"<ul>" + 
"    <li>test 1 </li>" + 
"    <li> test 2</li>" + 
"    <li>&nbsp;test 3&nbsp;</li>" + 
"</ul>" + 
"<div> Here is some text </div>";

var hiddenDiv = document.querySelector('#hidden-div');
hiddenDiv.innerHTML = str;

var hiddenElems = hiddenDiv.getElementsByTagName('*');

for(var i = 0; i < hiddenElems.length; i++)
{
    if(hiddenElems[i].innerHTML)
    {
        hiddenElems[i].innerHTML = hiddenElems[i].innerHTML.trim().replace(/&nbsp;/g, '');
    }
}

document.write('<pre>' + hiddenDiv.innerHTML + '</pre>');
<div id="hidden-div" style="display:none"></div>
Bharata
  • 13,509
  • 6
  • 36
  • 50