If you are looking to generate XPath strings that uniquely specify an element in an XML document based on a predefined attribute name, you can put together a solution based on these two answers:
To create the following extension method:
public static class XExtensions
{
public static string GetAbsoluteXPathByAttribute(this XElement element, string attributeName)
{
Func<XElement, string> relativeXPath = e => RelativeXPathByAttribute(e, attributeName);
return GetXPath(element, relativeXPath);
}
static string RelativeXPathByAttribute(XElement element, string attributeName)
{
var attr = element.Attribute(attributeName);
if (attr != null)
{
var name = string.Format("*[@{0}={1}]", attributeName, XPathLiteral(attr.Value));
var index = IndexPosition(element, e => { var a = e.Attribute(attributeName); return a != null && a.Value == attr.Value; });
if (index != -1)
name = string.Format(NumberFormatInfo.InvariantInfo, "/{0}[{1}]", name, index);
return name;
}
else if (!string.IsNullOrEmpty(element.Name.Namespace.ToString()))
{
string name = string.Format("*[local-name()={0}]", XPathLiteral(element.Name.LocalName));
var index = IndexPosition(element, e => e.Name.LocalName == element.Name.LocalName);
if (index != -1)
name = string.Format(NumberFormatInfo.InvariantInfo, "/{0}[{1}]", name, index);
return name;
}
else
{
string name = element.Name.LocalName;
var index = IndexPosition(element, e => e.Name == element.Name);
if (index != -1)
name = string.Format(NumberFormatInfo.InvariantInfo, "/{0}[{1}]", name, index);
return name;
}
}
public static int IndexPosition(this XElement element)
{
return IndexPosition(element, e => e.Name == element.Name);
}
public static int IndexPosition(XElement element, Func<XElement, bool> isRelevant)
{
if (element == null || isRelevant == null)
throw new ArgumentNullException();
if (element.Parent == null)
return -1;
int i = 1; // Indexes for nodes start at 1, not 0
foreach (var sibling in element.Parent.Elements().Where(isRelevant))
{
if (sibling == element)
{
return i;
}
i++;
}
throw new InvalidOperationException("element has been removed from its parent.");
}
static string GetXPath(XElement element, Func<XElement, string> relativeXPath)
{
if (element == null)
throw new ArgumentNullException("element");
if (relativeXPath == null)
throw new ArgumentNullException("relativeXPath");
var ancestors = from e in element.Ancestors()
select relativeXPath(e);
return string.Concat(ancestors.Reverse().ToArray()) +
relativeXPath(element);
}
/// <summary>
/// Produce an XPath literal equal to the value if possible; if not, produce
/// an XPath expression that will match the value.
/// From https://stackoverflow.com/questions/1341847/special-character-in-xpath-query
///
/// Note that this function will produce very long XPath expressions if a value
/// contains a long run of double quotes.
/// </summary>
/// <param name="value">The value to match.</param>
/// <returns>If the value contains only single or double quotes, an XPath
/// literal equal to the value. If it contains both, an XPath expression,
/// using concat(), that evaluates to the value.</returns>
static string XPathLiteral(string value)
{
// if the value contains only single or double quotes, construct
// an XPath literal
if (!value.Contains("\""))
{
return "\"" + value + "\"";
}
if (!value.Contains("'"))
{
return "'" + value + "'";
}
// if the value contains both single and double quotes, construct an
// expression that concatenates all non-double-quote substrings with
// the quotes, e.g.:
//
// concat("foo", '"', "bar")
StringBuilder sb = new StringBuilder();
sb.Append("concat(");
string[] substrings = value.Split('\"');
for (int i = 0; i < substrings.Length; i++)
{
bool needComma = (i > 0);
if (substrings[i] != "")
{
if (i > 0)
{
sb.Append(", ");
}
sb.Append("\"");
sb.Append(substrings[i]);
sb.Append("\"");
needComma = true;
}
if (i < substrings.Length - 1)
{
if (needComma)
{
sb.Append(", ");
}
sb.Append("'\"'");
}
}
sb.Append(")");
return sb.ToString();
}
}
And then, to test with and without namespaces:
public class TestClass
{
static string GetXml()
{
string xml = @"<?xml version=""1.0"" encoding=""utf-8""?>
<parent>
<element href=""www.something.com"" title=""First"">
<element href=""www.something.com"" title=""Second"">
<element href=""www.something.com"" title=""Third"">
</element>
</element>
</element>
<element href=""www.something.com"" title=""Some title""></element>
<element href=""www.something.com"" title=""Another"">
<element href=""www.something.com"" title=""Extra"">
<element href=""www.something.com"" title=""Page"">
<element href=""www.something.com"" title=""Target"">
</element>
</element>
</element>
</element>
</parent>";
return xml;
}
static string GetXmlWithNamespace()
{
string xml = @"<?xml version=""1.0"" encoding=""utf-8""?>
<parent
xmlns=""urn:schemas-microsoft-com:office:spreadsheet""
xmlns:o=""urn:schemas-microsoft-com:office:office"">
<element href=""www.something.com"" title=""First"">
<element href=""www.something.com"" title=""Second"">
<element href=""www.something.com"" title=""Third"">
</element>
</element>
</element>
<element href=""www.something.com"" title=""Some title""></element>
<element href=""www.something.com"" title=""Another"">
<element href=""www.something.com"" title=""Extra"">
<element href=""www.something.com"" title=""Page"">
<element href=""www.something.com"" title=""Target"">
</element>
</element>
</element>
</element>
</parent>";
return xml;
}
public static void Test()
{
Test(GetXml());
Test(GetXmlWithNamespace());
}
public static void Test(string xml)
{
var doc = XDocument.Parse(xml);
var paths = doc.Root.DescendantsAndSelf().Select(e => new { Element = e, Path = e.GetAbsoluteXPathByAttribute("title") }).ToList();
Debug.WriteLine(JsonConvert.SerializeObject(paths.Select(pair => pair.Path), Formatting.Indented));
foreach (var pair in paths)
{
var result = doc.XPathSelectElement(pair.Path);
if (result != pair.Element)
Debug.Assert(result == pair.Element); // No asserts
}
}
}
Which produces the following paths that that correctly evaluate to the element in question:
"parent",
"parent/*[@title=\"First\"][1]",
"parent/*[@title=\"First\"][1]/*[@title=\"Second\"][1]",
"parent/*[@title=\"First\"][1]/*[@title=\"Second\"][1]/*[@title=\"Third\"][1]",
"parent/*[@title=\"Some title\"][1]",
"parent/*[@title=\"Another\"][1]",
"parent/*[@title=\"Another\"][1]/*[@title=\"Extra\"][1]",
"parent/*[@title=\"Another\"][1]/*[@title=\"Extra\"][1]/*[@title=\"Page\"][1]",
"parent/*[@title=\"Another\"][1]/*[@title=\"Extra\"][1]/*[@title=\"Page\"][1]/*[@title=\"Target\"][1]"