1

I have no control over the string source, I can only do html(), but I need to clean up the mess in the source,

I want to delete all the <div class="page"></div> but keep its content, and because the content also contains html tags which may not be fully completed like <div class="

http://jsbin.com/vukogobuze/1/edit?html,js,console,output

expected output

<div class="nodejs-class">La France està déployer ses porte-avions Charles de Gaulle pour soutenir les opérations contre l'État islamique ( IS) en Syrie et en Irak la présidence française a déclaré après une réunion du cabinet de défense</div>
Rohit Nayal
  • 272
  • 4
  • 15

2 Answers2

1

Iterate over the .pageWrap, generate a temporary element with HTML content as the text content and finally get the html content within .page and join them.

var str = '<div id="source-wrapper"><div class="pageWrap">&lt;div class="page"&gt;&amp;lt;div class="nodejs-c&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;lass"&amp;gt;La France est &lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;à déployer ses porte&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;-avions Charles de G&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;aulle pour soutenir &lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;les opérations contr&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;e l\'État islamique (&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt; IS) en Syrie et en &lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;Irak la présidence f&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;rançaise a déclaré a&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;près une réunion du &lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;cabinet de défense&amp;lt;/&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;div&amp;gt;&lt;/div&gt;</div></div>';


$('body').html(
  $(str)
  // get all pageWrap elements
  .find('.pageWrap')
  // iterate over the elements
  .map(function() {
    // generate a temporaray element with the 
    // text content of current element as its html
    return $('<div>', {
        html: $(this).text()
      })
      // get page element from the temporary element
      .find('.page')
      // get html content from page
      .html();


    // instead of creating a temporary element you 
    // can also do something simple like this by
    // parsing the content
    // return $.parseHTML($(this).text())[0].innerHTML

  })
  // get result as an array
  .get()
  // join them
  .join('')
);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

UPDATE : If you want to update content into the .source-wrapper div then use html() method with callback.

var str = '<div id="source-wrapper"><div class="pageWrap">&lt;div class="page"&gt;&amp;lt;div class="nodejs-c&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;lass"&amp;gt;La France est &lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;à déployer ses porte&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;-avions Charles de G&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;aulle pour soutenir &lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;les opérations contr&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;e l\'État islamique (&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt; IS) en Syrie et en &lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;Irak la présidence f&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;rançaise a déclaré a&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;près une réunion du &lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;cabinet de défense&amp;lt;/&lt;/div&gt;</div><div class="pageWrap">&lt;div class="page"&gt;div&amp;gt;&lt;/div&gt;</div></div>';


$('body').html(
  $(str)
  // get all pageWrap elements
  .find('.pageWrap')
  // iterate over the elements
  .html(function() {
    // generate a temporaray element with the 
    // text content of current element as its html
    return $('<div>', {
        html: $(this).text()
      })
      // get page element from the temporary element
      .find('.page')
      // get html content from page
      .html();

    // instead of creating a temporary element you 
    // can also do something simple like this by
    // parsing the content
    // return $.parseHTML($(this).text())[0].innerHTML

  }).closest('#source-wrapper')[0].outerHTML
);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
Pranav C Balan
  • 113,687
  • 23
  • 165
  • 188
0
var list = document.getElementsByTagName("div");
for(i=0;i<list.length;i++){
   var div = list[i];
if(div.class == "page"){
  div.outerHTML = (div.outerHTML).replace('<div class="page">',"");
// Just do this again with '</div>' if you are sure there isnt a div inside of this one, else use this hard way
var diva = (div.outerHTML).split("</div>");
diva.pop(); // Remove the last </div> of the element
div.outerHTML = diva.join("</div>"); // Give back the removed DIV endings that arent the one you are replacing
}
}
WIPocket
  • 78
  • 7