Perhaps to seal this question, using code from the previous answer.
Sample data:
myframe <- structure(list(id = 1:2, xml = c("<Root>\n<Product>\n <Test_ID value=\"1\" />\n <Effective_Date value=\"2022-01-01\" />\n <Membership value=\"Yes\" />\n <Request>\n <Request_ID value=\"1\" />\n <Request_type value=\"Simple\" />\n </Request>\n <Request>\n <Request_ID value=\"2\" />\n <Request_type value=\"Complex\" />\n </Request>\n</Product>\n</Root>", "<Root>\n<Product>\n <Test_ID value=\"1\" />\n <Effective_Date value=\"2022-01-01\" />\n <Membership value=\"Yes\" />\n <Request>\n <Request_ID value=\"3\" />\n <Request_type value=\"Simple\" />\n </Request>\n <Request>\n <Request_ID value=\"4\" />\n <Request_type value=\"Complex\" />\n </Request>\n</Product>\n</Root>" )), class = "data.frame", row.names = c(NA, -2L))
myframe2 <- structure(list(id = 1:2, xml = c("<Root>\n<Product>\n <Test_ID value=\"1\" />\n <Effective_Date value=\"2022-01-01\" />\n <Membership value=\"Yes\" />\n <Request>\n <Request_ID value=\"1\" />\n <Request_type value=\"Simple\" />\n </Request>\n <Request>\n <Request_ID value=\"2\" />\n <Request_type value=\"Complex\" />\n </Request>\n</Product>\n</Root>", "<Root>\n<Product>\n <Test_ID value=\"1\" />\n <Effective_Date value=\"2022-01-01\" />\n <Request>\n <Request_ID value=\"3\" />\n <Request_type value=\"Simple\" />\n </Request>\n <Request>\n <Request_ID value=\"4\" />\n <Request_type value=\"Complex\" />\n </Request>\n</Product>\n</Root>" )), class = "data.frame", row.names = c(NA, -2L))
Functions from the previous answer:
func1 <- function(z) if (is.null(names(z))) attr(z, "value") else lapply(z, func1)
merge.list <- function(A, B) {
# normalize lengths, just in case, since I think you have more than one $Product
A <- lapply(A, `length<-`, max(lengths(A)))
B <- lapply(B, `length<-`, max(lengths(B)))
BnotA <- setdiff(names(B), names(A))
AnotB <- setdiff(names(A), names(B))
inboth <- intersect(names(A), names(B))
A[BnotA] <- replicate(length(BnotA), rep(NA, max(lengths(A))), simplify = FALSE)
A[AnotB] <- lapply(A[AnotB], function(z) c(z, rep(NA, max(lengths(B)))))
A[inboth] <- Map(c, A[inboth], B[inboth])
A
}
Processing the column of xmls:
intermediate <- lapply(myframe$xml, function(X) xml2::as_list(xml2::read_xml(X)))
final <- lapply(intermediate, function(L) {
do.call(rbind.data.frame, lapply(func1(L$Root), function(pr) {
as.data.frame(lapply(split(pr, names(pr)), function(Y) Reduce(merge.list, Y)))
}))
})
final
# [[1]]
# Effective_Date Membership Request.Request_ID Request.Request_type Test_ID
# Product.1 2022-01-01 Yes 1 Simple 1
# Product.2 2022-01-01 Yes 2 Complex 1
# [[2]]
# Effective_Date Membership Request.Request_ID Request.Request_type Test_ID
# Product.1 2022-01-01 Yes 3 Simple 1
# Product.2 2022-01-01 Yes 4 Complex 1
Depending on the structures, you might be able to do:
do.call(rbind, final)
# Effective_Date Membership Request.Request_ID Request.Request_type Test_ID
# Product.1 2022-01-01 Yes 1 Simple 1
# Product.2 2022-01-01 Yes 2 Complex 1
# Product.11 2022-01-01 Yes 3 Simple 1
# Product.21 2022-01-01 Yes 4 Complex 1
If there are any differences (missing columns), then you might need one of the variants to rbind
provided by other packages. For instance, if the second value of myframe$xml
did not have "Membership"
(as in myframe2
above), then
intermediate2 <- lapply(myframe2$xml, function(X) xml2::as_list(xml2::read_xml(X)))
final2 <- lapply(intermediate2, function(L) {
do.call(rbind.data.frame, lapply(func1(L$Root), function(pr) {
as.data.frame(lapply(split(pr, names(pr)), function(Y) Reduce(merge.list, Y)))
}))
})
final2
# [[1]]
# Effective_Date Membership Request.Request_ID Request.Request_type Test_ID
# Product.1 2022-01-01 Yes 1 Simple 1
# Product.2 2022-01-01 Yes 2 Complex 1
# [[2]]
# Effective_Date Request.Request_ID Request.Request_type Test_ID
# Product.1 2022-01-01 3 Simple 1
# Product.2 2022-01-01 4 Complex 1
and unfortunately
do.call(rbind, final2)
# Error in rbind(deparse.level, ...) :
# numbers of columns of arguments do not match
but we can do
dplyr::bind_rows(final2)
# Effective_Date Membership Request.Request_ID Request.Request_type Test_ID
# Product.1...1 2022-01-01 Yes 1 Simple 1
# Product.2...2 2022-01-01 Yes 2 Complex 1
# Product.1...3 2022-01-01 <NA> 3 Simple 1
# Product.2...4 2022-01-01 <NA> 4 Complex 1
data.table::rbindlist(final2, fill = TRUE, use.names = TRUE)
# Effective_Date Membership Request.Request_ID Request.Request_type Test_ID
# <char> <char> <char> <char> <char>
# 1: 2022-01-01 Yes 1 Simple 1
# 2: 2022-01-01 Yes 2 Complex 1
# 3: 2022-01-01 <NA> 3 Simple 1
# 4: 2022-01-01 <NA> 4 Complex 1