0

I want to turn an XML document with repeated values into dataframe. I have seen several posts but I can't sort it out!

<tns:DataSet xmlns:tns="www.test.org" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <tns:Curves>
        <tns:Name v="Name1"/>
    <tns:Point>
            <tns:Xvalue v="0"/>
        </tns:Point>
    </tns:Curves>
    <tns:Curves>
        <tns:Name v="Name2"/>
        <tns:Point>
            <tns:Xvalue v="5"/>
        </tns:Point>
        <tns:Point>
            <tns:Xvalue v="20"/>
        </tns:Point>
    </tns:Curves>
</tns:DataSet>

Here is a post with similar question but not matching Load XML to Dataframe in R with parent node attributes

doc = xmlTreeParse("./sample.xml", useInternalNodes = TRUE)

bodyToDF <- function(x){
  Name = xpathSApply(x, "/tns:Name", xmlGetAttr, "v")
  sp <- xpathApply(x, "/tns.Point", function(y){
    Xvalue <- xpathSApply(y, "/tns:Xvalue", xmlGetAttr,"v")
    data.frame(Name, Xvalue)
  })
  do.call(rbind, sp)}

res <- xpathApply(doc, '/tns:DataSet/tns:Curves', bodyToDF)
temp.df <- do.call(rbind, res)

I expect a dataframe with Name and Xvalue :

   Name XValue
1 Name1      0
2 Name2      5
3 Name2     20
Jacques
  • 3
  • 2

1 Answers1

0

xml2-approach, using a bit of xpath-'magic'

library( xml2 )
library( magrittr )

doc <- read_xml('<tns:DataSet xmlns:tns="www.test.org" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
                <tns:Curves>
                  <tns:Name v="Name1"/>
                  <tns:Point>
                    <tns:Xvalue v="0"/>
                  </tns:Point>
                </tns:Curves>
                <tns:Curves>
                  <tns:Name v="Name2"/>
                  <tns:Point>
                    <tns:Xvalue v="5"/>
                  </tns:Point>
                  <tns:Point>
                    <tns:Xvalue v="20"/>
                  </tns:Point>
                </tns:Curves>
                </tns:DataSet>')

#get all Xvalue nodes
xvalue <- xml_find_all( doc, "//tns:Xvalue")

#build data.frame
#for Name-column: use the xpath to select the 'tns:Curves' 
#node from the xvalue-node (ancestor::tns:Curves),
#and then select the underlying ns:Name-node. 
#Finally extract the value of attribute 'v'
df <- data.frame( 
  Name = xml_find_first( xvalue , ".//ancestor::tns:Curves/tns:Name") %>% xml_attr( "v" ),
  xValue = xvalue %>% xml_attr( "v" ),
  stringsAsFactors = FALSE )

df

#    Name xValue
# 1 Name1      0
# 2 Name2      5
# 3 Name2     20
Wimpel
  • 26,031
  • 1
  • 20
  • 37