2

Given a DOM structure like this:

<div>
  <div>
    <span>
      <img/>
      <i>
        <span></span>
        <meter></meter>
      </i>
      <a><span></span></a>
    </span>
  </div>
  <nav>
    <form>
      <input/>
      <button></button>
    </form>
  </nav>
</div>

Wondering how you take that and then return a flat array of all the selectors:

[
  'div > div > span > img',
  'div > div > span > i > span',
  'div > div > span > i > meter',
  'div > div > span > a > span',
  'div > nav > form > input',
  'div > nav > form > button'
]

My attempt hasn't gotten anywhere:

function outputSelectors(array, node) {
  var tag = node.tagName
  array.push(tag)
  for (var i = 0, n = node.children.length; i < n; i++) {
    var child = node.children[i]
    outputSelectors(array, child)
  }
}

outputSelectors([], document.body.children[0])

Not sure where to go from here.

Lance
  • 75,200
  • 93
  • 289
  • 503

4 Answers4

1

You can map all elements on a page using the getPath method from this answer.

Best try this in your own console, as the snippet takes some time to run, and the snippet's console doesn't seem to handle the output properly.

jQuery.fn.extend({
    getPath: function () {
        var path, node = this;
        while (node.length) {
            var realNode = node[0], name = realNode.localName;
            if (!name) break;
            name = name.toLowerCase();

            var parent = node.parent();

            var sameTagSiblings = parent.children(name);
            if (sameTagSiblings.length > 1) { 
                allSiblings = parent.children();
                var index = allSiblings.index(realNode) + 1;
                if (index > 1) {
                    name += ':nth-child(' + index + ')';
                }
            }

            path = name + (path ? '>' + path : '');
            node = parent;
        }

        return path;
    }
});

const allElements = $("*");
const allPaths = allElements.map((_, e) => $(e).getPath());

console.log(allPaths);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

Here is a version without jQuery, if that's preferable:

function getPath (node) {
    var path;
    while (node.parentElement) {
        var name = node.localName;
        if (!name) break;
        name = name.toLowerCase();

        var parent = node.parentElement;

        var sameTagSiblings = [...parent.children].filter(e => e.localName === name);
        if (sameTagSiblings.length > 1) { 
            allSiblings = parent.children;
            var index = [...allSiblings].indexOf(node) + 1;
            if (index > 1) {
                name += ':nth-child(' + index + ')';
            }
        }

        path = name + (path ? '>' + path : '');
        node = parent;
    }

    return path;
};

const allElements = document.querySelectorAll("*");
const allPaths = [...allElements].map(e => getPath(e));

console.log(allPaths);
Cerbrus
  • 70,800
  • 18
  • 132
  • 147
  • So this algorithm essentially finds every leaf node then builds a path up to the top. Interesting. That cuts out the part of the algorithm of traversing down the nodes, the `*` in jquery. Was wondering if there is a way to build it top down as you go, maybe that is the wrong approach. – Lance Jan 30 '18 at 13:43
  • Why jQuery? From OP: *I would like to know how to do the algorithm, not use a built in method.* – Ele Jan 30 '18 at 13:44
  • @Ele: There isn't very much jQuery in there. Just slightly more readable code. The algorithm is right there, in plain JS. – Cerbrus Jan 30 '18 at 13:45
  • I know, but I think is very verbose the usage of jQuery plugin. – Ele Jan 30 '18 at 13:47
  • @Ele: I added a non-jQuery version :-) – Cerbrus Jan 30 '18 at 13:54
  • @Cerbrus Nice! :-) – Ele Jan 30 '18 at 13:55
1

One possible, a non-recursive approach going from top (root, to be precise) to bottom:

function collectLeafNodePathes(root) {
  const paths = [];
  const selectorParts = [];
  let el = root;
  while (el) {
    const tagName = el.tagName.toLowerCase();
    if (el.childElementCount) {
      selectorParts.push(tagName);
      el = el.firstElementChild;
      continue;
    }

    paths.push(selectorParts.concat([tagName]).join(' > '));
    do {
       if (el.nextElementSibling) {
         el = el.nextElementSibling;
         break;
       }
       el = el.parentNode;
       selectorParts.pop();         
       if (el === root) {
         el = null;
       }
    } while (el);
  }
  return paths;
}

const selectors = collectLeafNodePathes(document.getElementById('xxx'));
console.log(selectors);
<div id="xxx">
  <div>
    <span>
      <img/>
      <i>
        <span></span>
        <meter></meter>
      </i>
      <a><span></span></a>
    </span>
  </div>
  <nav>
    <form>
      <input/>
      <button></button>
    </form>
  </nav>
</div>

That last part (do-while loop) is a bit rough around the edges, though; open to any improvement.

I've used helper properties (childElementCount, firstElementChild, nextElementSibling) to skip checking for text nodes and stuff. If that's not an option (because of compatibility reasons), it's easy to either implement polyfills or just 'rewind' the loop on non-element nodes.

raina77ow
  • 103,633
  • 15
  • 192
  • 229
0

Slightly modifying this solution to get path and this one to get leaf nodes.

function getPath(node) 
{
  var path;
  while (node.parentNode ) 
  {
    name = node.nodeName;
    if (!name) break;

    var parent = node.parentNode;
    path = name + (path ? ' > ' + path : '');
    node = parent;
  }
  return path;
}

function getLeafNodes() 
{
    var allNodes = document.getElementsByTagName("*");
    var leafNodes = Array.from( allNodes ).filter(function(elem) {
        return !elem.hasChildNodes();
    });
    return leafNodes;
}

var leadNodes = getLeafNodes() ;
var output = leadNodes.map( s => getPath(s) );
console.log(output);
<div>
  <div>
    <span>
      <img/>
      <i>
        <span></span>
        <meter></meter>
      </i>
      <a><span></span></a>
    </span>
  </div>
  <nav>
    <form>
      <input/>
      <button></button>
    </form>
  </nav>
</div>
gurvinder372
  • 66,980
  • 10
  • 72
  • 94
  • Now what if you have duplicate siblings? Then you won't have valid selectors any more. – Cerbrus Jan 30 '18 at 13:46
  • OP hasn't asked for unique selector in his question. – gurvinder372 Jan 30 '18 at 13:48
  • Hmm, this would work but I would like to try to do it top-down, similar to this: https://github.com/hughsk/flat/blob/b6633d0f2c1f06080e6fb51913ec5de749b884dc/index.js#L14-L35, was hoping for an answer along those lines. Can't figure it out. – Lance Jan 30 '18 at 13:49
  • @gurvinder372: True, but it's not an uncommon use case in HTML – Cerbrus Jan 30 '18 at 13:56
  • @Cerbrus In case a valid selector is required (for later access), then I would rather approach this by generating and assigning unique ids to the leaf nodes and keep the array of such ids. – gurvinder372 Jan 30 '18 at 14:05
0

You can create recursive function and check if current element contains children using children() method.

const result = []

const getTag = (el) => el.prop('tagName').toLowerCase()
function print(el, prev = '') {
  prev = prev.length ? prev : getTag(el)
  const children = el.children();
  if(!children.length) result.push(prev)
  else {
    children.each(function() {
     let tag = getTag($(this))
      let str = prev + (prev.length ? ' > ' : '') + tag;
      print($(this), str)
    })
  }
}

print($('#start'))
console.log(result)
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<div id="start">
  <div>
    <span>
      <img/>
      <i>
        <span></span>
        <meter></meter>
      </i>
      <a><span></span></a>
    </span>
  </div>
  <nav>
    <form>
      <input/>
      <button></button>
    </form>
  </nav>
</div>

To get array of unique selectors you can use Set on final result to remove duplicates.

let result = []

const getTag = (el) => el.prop('tagName').toLowerCase()
function print(el, prev = '') {
  prev = prev.length ? prev : getTag(el)
  const children = el.children();
  if(!children.length) result.push(prev)
  else {
    children.each(function() {
     let tag = getTag($(this))
      let str = prev + (prev.length ? ' > ' : '') + tag;
      print($(this), str)
    })
  }
}

print($('#start'))
result = [...new Set(result)]
console.log(result)
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<div id="start">
  <div>
    <span>
      <img/>
      <i>
        <span></span>
        <meter></meter>
      </i>
      <a><span></span></a>
      <a><span></span></a>
    </span>
  </div>
  <nav>
    <form>
      <input/>
      <button></button>
    </form>
  </nav>
</div>
Nenad Vracar
  • 118,580
  • 15
  • 151
  • 176