82

Is there anyway to return an XPath string of a DOM element in Javascript?

Louis
  • 4,172
  • 4
  • 45
  • 62

9 Answers9

96

I refactored this from another example. It will attempt to check or there is for sure a unique id and if so use that case to shorten the expression.

Please note if one of the nodes has siblings using the same class attribute value, the XPath will not differentiate which sibling you are trying to select.

function createXPathFromElement(elm) { 
    var allNodes = document.getElementsByTagName('*'); 
    for (var segs = []; elm && elm.nodeType == 1; elm = elm.parentNode) 
    { 
        if (elm.hasAttribute('id')) { 
                var uniqueIdCount = 0; 
                for (var n=0;n < allNodes.length;n++) { 
                    if (allNodes[n].hasAttribute('id') && allNodes[n].id == elm.id) uniqueIdCount++; 
                    if (uniqueIdCount > 1) break; 
                }; 
                if ( uniqueIdCount == 1) { 
                    segs.unshift('id("' + elm.getAttribute('id') + '")'); 
                    return segs.join('/'); 
                } else { 
                    segs.unshift(elm.localName.toLowerCase() + '[@id="' + elm.getAttribute('id') + '"]'); 
                } 
        } else if (elm.hasAttribute('class')) { 
            segs.unshift(elm.localName.toLowerCase() + '[@class="' + elm.getAttribute('class') + '"]'); 
        } else { 
            for (i = 1, sib = elm.previousSibling; sib; sib = sib.previousSibling) { 
                if (sib.localName == elm.localName)  i++; }; 
                segs.unshift(elm.localName.toLowerCase() + '[' + i + ']'); 
        }; 
    }; 
    return segs.length ? '/' + segs.join('/') : null; 
}; 

function lookupElementByXPath(path) { 
    var evaluator = new XPathEvaluator(); 
    var result = evaluator.evaluate(path, document.documentElement, null,XPathResult.FIRST_ORDERED_NODE_TYPE, null); 
    return  result.singleNodeValue; 
} 
Nate Anderson
  • 18,334
  • 18
  • 100
  • 135
stijn de ryck
  • 961
  • 1
  • 6
  • 3
  • Have tested the XPaths these produce using PHP's DOMDocument and DOMXPath objects – they seem to work really well. – tonyhb Feb 18 '13 at 14:43
  • 1
    This is great! I've looking for something like this for a while, and this is really the most complete solution I've seen. You got my +1. Thanks! – Alejandro Piad Mar 18 '14 at 02:42
  • 1
    `segs` becomes a global variable here. – mattsven Apr 16 '14 at 16:59
  • 2
    This doesn't work on [this page](http://www.icanvas.com/anderson-design-group-marthas-vineyard-maryland-blue-canvas-print-art.html?utm_source=google+utm_medium=cse+utm_campaign=GoogleProducts&gclid=CI2ghPf4isUCFYpgfgod6l8Ang), e.g. 1. In Chrome dev tools, click on an unselected DOM element for a price where the price is not the first listed price. Save that element into variable. 2. Run the algorithm on that element. 3. It brings you back to the first element in that pane only. – yangmillstheory May 05 '15 at 19:53
  • This is awesome !! – Ashok Krishnamoorthy Dec 14 '16 at 10:57
  • This is a great function! Is there something similar to this for getting the CSS selector of an element? – Deven Jan 03 '18 at 12:30
  • This is excellent work! btw, if your node is case sensitive, simply remove all `.toLowerCase()` – MewX Oct 31 '18 at 02:55
  • 4
    not entirely accurate since when two siblings have the same `class` attribute the first will always get picked up... – Andrei Roba Nov 09 '19 at 14:08
  • 1
    @AndreiRoba, I see the same problem. When there is siblings, only the first child's xpath get picked up. Have you found an alertnative? – oldpride Sep 18 '22 at 01:24
49

There's not a unique XPath to a node, so you'll have to decide what's the most appropriate way of constructing a path. Use IDs where available? Numeral position in the document? Position relative to other elements?

See getPathTo() in this answer for one possible approach.

Community
  • 1
  • 1
bobince
  • 528,062
  • 107
  • 651
  • 834
  • 1
    Hey, thanks that looks like a nice function. I made another question more appropriate and more context: http://stackoverflow.com/questions/2661918/javascript-crazy-idea-finding-a-node . In retrospect I should have edited this one... oops lol. – Louis Apr 18 '10 at 10:38
  • +1 “there’s not a unique XPath to a node” (and the feasible alternatives). – dakab Aug 22 '15 at 17:10
  • 2
    XPath is well defined as path to a node from root of the document. – Tomáš Zato Sep 27 '16 at 11:56
  • I'm taking "there's not a unique XPath" to mean "there's many ways to skin this cat". – Nick Grealy Oct 09 '19 at 23:36
27

Here is a functional programming style ES6 function for the job:

function getXPathForElement(element) {
    const idx = (sib, name) => sib 
        ? idx(sib.previousElementSibling, name||sib.localName) + (sib.localName == name)
        : 1;
    const segs = elm => !elm || elm.nodeType !== 1 
        ? ['']
        : elm.id && document.getElementById(elm.id) === elm
            ? [`id("${elm.id}")`]
            : [...segs(elm.parentNode), `${elm.localName.toLowerCase()}[${idx(elm)}]`];
    return segs(element).join('/');
}

function getElementByXPath(path) { 
    return (new XPathEvaluator()) 
        .evaluate(path, document.documentElement, null, 
                        XPathResult.FIRST_ORDERED_NODE_TYPE, null) 
        .singleNodeValue; 
} 

// Demo:
const li = document.querySelector('li:nth-child(2)');
const path = getXPathForElement(li);
console.log(path);
console.log(li === getElementByXPath(path)); // true
<div>
    <table id="start"></table>
    <div>
        <ul><li>option</ul></ul> 
        <span>title</span>
        <ul>
            <li>abc</li>
            <li>select this</li>
        </ul>
    </div>
</div>

It will use an id selector, unless the element is not the first one with that id. Class selectors are not used, because in interactive web pages classes may change often.

trincot
  • 317,000
  • 35
  • 244
  • 286
  • 1
    instead of ```document.querySelector(`#${elm.id}`)``` just use `document.getElementById(elm.id)` because the former fails on Chrome when id contains only digits. – erdos Jan 12 '19 at 21:05
  • Just wanted to point to `Rohit Luthra's` modification of this anser to address SVG elements. https://stackoverflow.com/a/55793129/3917091. – Regular Jo Jun 05 '21 at 23:38
  • 1
    "Class selectors are not used, because in interactive web pages classes may change often" -- this also applies to XPaths in general. Here's an example I just copied in devtools: `id("mG61Hd")/div[2]/div[1]/div[2]/div[12]/div[1]/div[1]/div[2]/div[1]/div[4]/div[1]/div[1]/div[2]` <-- this is not going to be a very reliable selector – thdoan Feb 05 '22 at 00:29
  • 1
    @thdoan, true, but this question is about xpaths... ;-) And if we must choose *within* the context of xpaths, the bet on *structure* is more reliable than on *classes*. – trincot Feb 05 '22 at 09:19
18

I've adapted the algorithm Chromium uses to calculate the XPath from devtools below.

To use this as-written you'd call Elements.DOMPath.xPath(<some DOM node>, false). The last parameter controls whether you get the shorter "Copy XPath" (if true) or "Copy full XPath".

// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

Elements = {};
Elements.DOMPath = {};

/**
 * @param {!Node} node
 * @param {boolean=} optimized
 * @return {string}
 */
Elements.DOMPath.xPath = function (node, optimized) {
    if (node.nodeType === Node.DOCUMENT_NODE) {
        return '/';
    }

    const steps = [];
    let contextNode = node;
    while (contextNode) {
        const step = Elements.DOMPath._xPathValue(contextNode, optimized);
        if (!step) {
            break;
        }  // Error - bail out early.
        steps.push(step);
        if (step.optimized) {
            break;
        }
        contextNode = contextNode.parentNode;
    }

    steps.reverse();
    return (steps.length && steps[0].optimized ? '' : '/') + steps.join('/');
};

/**
 * @param {!Node} node
 * @param {boolean=} optimized
 * @return {?Elements.DOMPath.Step}
 */
Elements.DOMPath._xPathValue = function (node, optimized) {
    let ownValue;
    const ownIndex = Elements.DOMPath._xPathIndex(node);
    if (ownIndex === -1) {
        return null;
    }  // Error.

    switch (node.nodeType) {
        case Node.ELEMENT_NODE:
            if (optimized && node.getAttribute('id')) {
                return new Elements.DOMPath.Step('//*[@id="' + node.getAttribute('id') + '"]', true);
            }
            ownValue = node.localName;
            break;
        case Node.ATTRIBUTE_NODE:
            ownValue = '@' + node.nodeName;
            break;
        case Node.TEXT_NODE:
        case Node.CDATA_SECTION_NODE:
            ownValue = 'text()';
            break;
        case Node.PROCESSING_INSTRUCTION_NODE:
            ownValue = 'processing-instruction()';
            break;
        case Node.COMMENT_NODE:
            ownValue = 'comment()';
            break;
        case Node.DOCUMENT_NODE:
            ownValue = '';
            break;
        default:
            ownValue = '';
            break;
    }

    if (ownIndex > 0) {
        ownValue += '[' + ownIndex + ']';
    }

    return new Elements.DOMPath.Step(ownValue, node.nodeType === Node.DOCUMENT_NODE);
};

/**
 * @param {!Node} node
 * @return {number}
 */
Elements.DOMPath._xPathIndex = function (node) {
    // Returns -1 in case of error, 0 if no siblings matching the same expression,
    // <XPath index among the same expression-matching sibling nodes> otherwise.
    function areNodesSimilar(left, right) {
        if (left === right) {
            return true;
        }

        if (left.nodeType === Node.ELEMENT_NODE && right.nodeType === Node.ELEMENT_NODE) {
            return left.localName === right.localName;
        }

        if (left.nodeType === right.nodeType) {
            return true;
        }

        // XPath treats CDATA as text nodes.
        const leftType = left.nodeType === Node.CDATA_SECTION_NODE ? Node.TEXT_NODE : left.nodeType;
        const rightType = right.nodeType === Node.CDATA_SECTION_NODE ? Node.TEXT_NODE : right.nodeType;
        return leftType === rightType;
    }

    const siblings = node.parentNode ? node.parentNode.children : null;
    if (!siblings) {
        return 0;
    }  // Root node - no siblings.
    let hasSameNamedElements;
    for (let i = 0; i < siblings.length; ++i) {
        if (areNodesSimilar(node, siblings[i]) && siblings[i] !== node) {
            hasSameNamedElements = true;
            break;
        }
    }
    if (!hasSameNamedElements) {
        return 0;
    }
    let ownIndex = 1;  // XPath indices start with 1.
    for (let i = 0; i < siblings.length; ++i) {
        if (areNodesSimilar(node, siblings[i])) {
            if (siblings[i] === node) {
                return ownIndex;
            }
            ++ownIndex;
        }
    }
    return -1;  // An error occurred: |node| not found in parent's children.
};

/**
 * @unrestricted
 */
Elements.DOMPath.Step = class {
    /**
     * @param {string} value
     * @param {boolean} optimized
     */
    constructor(value, optimized) {
        this.value = value;
        this.optimized = optimized || false;
    }

    /**
     * @override
     * @return {string}
     */
    toString() {
        return this.value;
    }
};

Update 2022-08-14: Here is a TypeScript version.

dcmorse
  • 1,011
  • 11
  • 15
  • The link you provided doesn't work anymore and I think they maybe updated the source code, because I downloaded the latest version and couldn't find any trace of this code. However, the code you provided does it's job very well! And could you maybe let know which file you found the original code for this? – Vasco Nov 29 '19 at 13:00
  • 2
    Swapped the link with the github mirror, should work now. – dcmorse Nov 29 '19 at 20:08
  • What if we want to get classes instead of xpaths? Can you please share me the code here https://stackoverflow.com/questions/60524774/is-there-any-way-to-get-class-of-the-page-in-pyqt5-in-browser? @dcmorse – Abhay Salvi Mar 04 '20 at 12:44
  • 2
    @dcmorse, Can you please give a short block of code illustrating how to use the code you posted above, Thanks :), (no-js developer) – octopus May 16 '20 at 13:43
  • 2
    @octopus , Just paste the above code and pass the element window.onclick = function (e) { // alert(e.target) x = Elements.DOMPath.xPath(e.target) alert(x) } – PankajKushwaha Jan 08 '22 at 11:10
6

A similar solution is given by the function getXPathForElement on the MDN

The following function allows one to pass an element and an XML document to find a unique string XPath expression leading back to that element.

Note this function works on XML documents, and may not work on HTML documents due to HTML's capitalization of the nodeName value a seen in comments...

Also this may not produce a "unique string XPath"; not unique in either sense :

function getXPathForElement(el, xml) {
    var xpath = '';
    var pos, tempitem2;
    
    while(el !== xml.documentElement) {     
        pos = 0;
        tempitem2 = el;
        while(tempitem2) {
            if (tempitem2.nodeType === 1 && tempitem2.nodeName === el.nodeName) { // If it is ELEMENT_NODE of the same name
                pos += 1;
            }
            tempitem2 = tempitem2.previousSibling;
        }
        
        xpath = "*[name()='"+el.nodeName+"' and namespace-uri()='"+(el.namespaceURI===null?'':el.namespaceURI)+"']["+pos+']'+'/'+xpath;

        el = el.parentNode;
    }
    xpath = '/*'+"[name()='"+xml.documentElement.nodeName+"' and namespace-uri()='"+(el.namespaceURI===null?'':el.namespaceURI)+"']"+'/'+xpath;
    xpath = xpath.replace(/\/$/, '');
    return xpath;
}

Also XMLSerializer might be worth a try.

Nate Anderson
  • 18,334
  • 18
  • 100
  • 135
axsk
  • 175
  • 2
  • 9
  • Please note `document.documentElement.nodeName` returns **uppercase `'HTML'`** [as mentioned in documentation](https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeName) But if you search for it using `/*[name()='HTML']`, **you get no results** `document.evaluate("/*[name()='HTML']", document.documentElement).iterateNext()` produces `null`. Whereas if you **use lowercase** `/*[name()='html']` you will find results. The XPath [`name()` function returns QName, which seems case sensitive!](https://developer.mozilla.org/en-US/docs/Web/XPath/Functions/name). – Nate Anderson Apr 14 '23 at 20:53
  • If you're dealing with HTML / Element API , consider using the [`el.localName` attribute](https://developer.mozilla.org/en-US/docs/Web/API/Element/localName) instead of the `el.nodeName`, and the [`local-name()` XPath function](https://developer.mozilla.org/en-US/docs/Web/XPath/Functions/local-name) instead of the `name()` XPath function – Nate Anderson Apr 14 '23 at 20:57
  • My concerns may only apply when dealing with HTML documents, the `nodeName` property is uppercased with HTML, but ["may be cased differently for XML/XHTML documents)."](https://developer.mozilla.org/en-US/docs/Web/API/Element/tagName); later it says: "tag names of elements in an XML DOM tree are returned in the same case in which they're written in the original XML file" – Nate Anderson Apr 14 '23 at 21:03
5

function getElementXPath (element) {
  if (!element) return null

  if (element.id) {
    return `//*[@id=${element.id}]`
  } else if (element.tagName === 'BODY') {
    return '/html/body'
  } else {
    const sameTagSiblings = Array.from(element.parentNode.childNodes)
      .filter(e => e.nodeName === element.nodeName)
    const idx = sameTagSiblings.indexOf(element)

    return getElementXPath(element.parentNode) +
      '/' +
      element.tagName.toLowerCase() +
      (sameTagSiblings.length > 1 ? `[${idx + 1}]` : '')
  }
}

console.log(getElementXPath(document.querySelector('#a div')))
<div id="a">
 <div>def</div>
</div>
fenghen
  • 66
  • 1
  • 3
3

I checked every solution provided here but none of them works with svg elements (code getElementByXPath(getXPathForElement(elm)) === elm returns false for svg or path elements)

So I added the Touko's svg fix to the trincot's solution and got this code:

function getXPathForElement(element) {
    const idx = (sib, name) => sib 
        ? idx(sib.previousElementSibling, name||sib.localName) + (sib.localName == name)
        : 1;
    const segs = elm => !elm || elm.nodeType !== 1 
        ? ['']
        : elm.id && document.getElementById(elm.id) === elm
            ? [`id("${elm.id}")`]
            : [...segs(elm.parentNode), elm instanceof HTMLElement
                ? `${elm.localName}[${idx(elm)}]`
                : `*[local-name() = "${elm.localName}"][${idx(elm)}]`];
    return segs(element).join('/');
}

The difference is it returns *[local-name() = "tag"][n] instead of tag[n] if element is not an instance of HTMLElement (svgs are SVGElement but I decided not to stick with checking only svg).

Example:

Before:
.../div[2]/div[2]/span[1]/svg[1]/path[1]

After:
.../div[2]/div[2]/span[1]/*[local-name() = "svg"][1]/*[local-name() = "path"][1]

OddMorning
  • 451
  • 4
  • 5
1

Just pass the element in function getXPathOfElement and you will get the Xpath.

function getXPathOfElement(elt)
{
     var path = "";
     for (; elt && elt.nodeType == 1; elt = elt.parentNode)
     {
    idx = getElementIdx(elt);
    xname = elt.tagName;
    if (idx > 1) xname += "[" + idx + "]";
    path = "/" + xname + path;
     }

     return path;   
}
function getElementIdx(elt)
{
    var count = 1;
    for (var sib = elt.previousSibling; sib ; sib = sib.previousSibling)
    {
        if(sib.nodeType == 1 && sib.tagName == elt.tagName) count++
    }

    return count;
}
Rohit Luthra
  • 1,256
  • 17
  • 27
0

Get xPath by giving a dom element

This function returns full xPath selector (without any id or class). This type of selector is helpful when an site generate random id or class

function getXPath(element) {
    // Selector
    let selector = '';
    // Loop handler
    let foundRoot;
    // Element handler
    let currentElement = element;

    // Do action until we reach html element
    do {
        // Get element tag name 
        const tagName = currentElement.tagName.toLowerCase();
        // Get parent element
        const parentElement = currentElement.parentElement;

        // Count children
        if (parentElement.childElementCount > 1) {
            // Get children of parent element
            const parentsChildren = [...parentElement.children];
            // Count current tag 
            let tag = [];
            parentsChildren.forEach(child => {
                if (child.tagName.toLowerCase() === tagName) tag.push(child) // Append to tag
            })

            // Is only of type
            if (tag.length === 1) {
                // Append tag to selector
                selector = `/${tagName}${selector}`;
            } else {
                // Get position of current element in tag
                const position = tag.indexOf(currentElement) + 1;
                // Append tag to selector
                selector = `/${tagName}[${position}]${selector}`;
            }

        } else {
            //* Current element has no siblings
            // Append tag to selector
            selector = `/${tagName}${selector}`;
        }

        // Set parent element to current element
        currentElement = parentElement;
        // Is root  
        foundRoot = parentElement.tagName.toLowerCase() === 'html';
        // Finish selector if found root element
        if(foundRoot) selector = `/html${selector}`;
    }
    while (foundRoot === false);

    // Return selector
    return selector;
}
Karim
  • 11
  • 4