2

I want to replace specific texts on a page with javascript. For simplicty lets say I want to replace all letters A with the letter X. Important is that it's not going to break inline HTML.

Is there a simple way to iterate over all DOM elements and only change actual texts?

<span>hello world <a href="/">abcd</a>..</span>

should become

<span>hello world <a href="/">xbcd</a>..</span>

and not

<spxn>hello world <x href="/">xbcd</x>..</spxn>
boop
  • 7,413
  • 13
  • 50
  • 94
  • Assign all the spans the same class id then use JQuery to change the text of all elements with that class name. – SPlatten Sep 06 '19 at 10:57
  • Possible duplicate of [Javascript .replace command replace page text?](https://stackoverflow.com/questions/7275650/javascript-replace-command-replace-page-text) – wOxxOm Sep 06 '19 at 11:01

3 Answers3

3

Iterate over all text nodes, and change their nodeValue if they contain an a:

function getAllTextNodes() {
    var walker = document.createTreeWalker(
        document.body, 
        NodeFilter.SHOW_TEXT, 
        null, 
        false
    );

    var node;
    var textNodes = [];

    while(node = walker.nextNode()) {
        textNodes.push(node);
    }
    return textNodes;
}

getAllTextNodes().forEach((node) => {
  const { nodeValue } = node;
  const newValue = nodeValue.replace(/a/g, 'x');
  if (newValue !== nodeValue) {
    node.nodeValue = newValue;
  }
});
<a href="/">abcd</a>

You can also create a whitelist or blacklist of parents whose text nodes are changeable, if you want:

function getAllTextNodes() {
    var walker = document.createTreeWalker(
        document.body, 
        NodeFilter.SHOW_TEXT, 
        null, 
        false
    );

    var node;
    var textNodes = [];

    while(node = walker.nextNode()) {
        textNodes.push(node);
    }
    return textNodes;
}

const tagNamesToKeepUnchanged = ['SCRIPT'];

getAllTextNodes().forEach((node) => {
  if (tagNamesToKeepUnchanged.includes(node.parentNode.tagName)) {
    return;
  }
  const { nodeValue } = node;
  const newValue = nodeValue.replace(/a/g, 'x');
  if (newValue !== nodeValue) {
    node.nodeValue = newValue;
  }
});

const obj = JSON.parse(
  document.querySelector('script[type="application/json"]').textContent
);
console.log(obj.key);
<a href="/">abcd</a>
<p>foo bar</p>
<script type="application/json">{"key":"value"}</script>

This will preserve tag names, event listeners, and pretty much everything except the content of certain text nodes.

CertainPerformance
  • 356,069
  • 52
  • 309
  • 320
  • This changes the content of – boop Sep 06 '19 at 11:01
  • 1
    Oh, that's true, though luckily it probably doesn't matter since the scripts will have already been executed. You can make a whitelist of parent elements whose text is changeable (or a blacklist) if you want – CertainPerformance Sep 06 '19 at 11:03
  • 1
    It doesn't matter because scripts runs just once when they were added, and it's really trivial to check if for some reason someone wants that. – wOxxOm Sep 06 '19 at 11:03
  • 1
    This is cool answer, I didn't know about `createTreeWalker`! +1 – Tomáš Zato Sep 06 '19 at 11:11
  • 1
    it actually matters. it manipulates functions within script blocks which gets me exceptions later on. – boop Sep 06 '19 at 11:13
2

I usually use this:

/**
 * Executes operation over all text nodes in a document
 * @param {HTMLElement} element
 * @param {function(Text):void} callback
 */
function processTextNodes(element, callback) {
    // For text node, execute callback
    if (element.nodeType == Node.TEXT_NODE)
        callback(element);
    // Otherwise, loop over child nodes
    else if (element.childNodes.length > 0) {
        for (const childNode of element.childNodes) {
            if (childNode.nodeType == Node.TEXT_NODE)
                callback(childNode);
            // Recursion to child nodes
            else {
                processTextNodes(childNode, callback);
            }
        }
    }
}

For example try this:

processTextNodes(document.body, (el)=>{el.data = el.data.toUpperCase()})

I used this in several userscripts that replace words in news articles to make them more fun.

Tomáš Zato
  • 50,171
  • 52
  • 268
  • 778
0

The crawler by @CertainPerformance made JSfiddle.net crash for me. I also need to replace the text node with an html element, so I had to move away from text nodes and I settled for more modest solution with an extensive regex lookaround to ensure html tags, properties and values are (mostly) not edited.

var list = ["crabe", "eau", "voir", "nom", "de", "des", "le", "les"];
var colorThoseWords = function(arr) {
  words = arr.join('|');
  // Regex lookareound: https://regular-expressions.info/lookaround.html
  // Regex `negative lookbehind` and `negative lookahead`
  // Run it: https://regex101.com/r/NZ5LQZ/1
  var s = `(?<![<=#"'\`:;,./({[-])\\b(${words})\\b(?![>=#"'\`:)\]}-])`,
    r = new RegExp(s, 'gi');
  console.log({r});

  $("p,li,h2,h3,a").each(function() {
    var text = $(this).html();
    $(this).html(text.replace(r, "<i class='signit-colored'>$1</i>"));
  });
};
var uncolorWords = function() {
  $(".signit-colored").each(function() {
    var $text = $(this).text();
    $(this).replaceWith($text);
  });
};
colorThoseWords(list);
// uncolorWords();

See https://jsfiddle.net/x7f24qnv/13/

I suspect @Tomáš_Zato_-_Reinstate_Monica's transverse solution may be best if we edit the TEXT_NODE's parent.

Hugolpz
  • 17,296
  • 26
  • 100
  • 187