0

I am not really advanced in R. for a project I need to read in following XML file in R. Could you please help me with some code?

Thank you so much!

<?xml version="1.0" encoding="UTF-8"?>
<
-->
<Games timestamp="2017-12-27T18:53:32">
  <Game id="726877" away_score="2" away_team_id="164" away_team_name="Bayer 04 Leverkusen" competition_id="5" competition_name="Champions League" game_date="2013-09-17T19:45:00" home_score="4" home_team_id="1" home_team_name="Manchester United" matchday="1" period_1_start="2013-09-17T19:45:40" period_2_start="2013-09-17T20:45:56" season_id="2013" season_name="Season 2013/2014">
    <Event id="1992096475" event_id="1" type_id="34" period_id="16" min="0" sec="0" team_id="1" outcome="1" x="0.0" y="0.0" timestamp="2013-09-17T18:47:44.144" last_modified="2013-09-17T19:07:31">
      <Q id="126212742" qualifier_id="59" value="1, 12, 3, 31, 5, 15, 25, 16, 20, 10, 26, 6, 8, 13, 14, 18, 22, 23" />
      <Q id="1442327870" qualifier_id="194" value="14965" />
      <Q id="733173531" qualifier_id="44" value="1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5" />
      <Q id="826253563" qualifier_id="227" value="0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="767631931" qualifier_id="130" value="8" />
      <Q id="1228198122" qualifier_id="131" value="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="1973319984" qualifier_id="30" value="51940, 55909, 14075, 41184, 2034, 14965, 20695, 2404, 12297, 13017, 83090, 37642, 27258, 39725, 43020, 18892, 54771, 43250" />
    </Event>
    <Event id="693448427" event_id="1" type_id="34" period_id="16" min="0" sec="0" team_id="164" outcome="1" x="0.0" y="0.0" timestamp="2013-09-17T18:49:35.764" last_modified="2013-09-17T19:46:59">
      <Q id="1835631099" qualifier_id="130" value="4" />
      <Q id="1265328962" qualifier_id="59" value="1, 26, 17, 3, 21, 5, 10, 6, 11, 18, 7, 4, 8, 9, 14, 15, 23, 25" />
      <Q id="1078621513" qualifier_id="44" value="1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5" />
      <Q id="1841403032" qualifier_id="227" value="0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="536206380" qualifier_id="131" value="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="1909738980" qualifier_id="30" value="80201, 80149, 27795, 50309, 50022, 20301, 112338, 10233, 16141, 38239, 85971, 69960, 38117, 42181, 20726, 165174, 95743, 6107" />
      <Q id="149852270" qualifier_id="197" value="79" />
      <Q id="1188894429" qualifier_id="194" value="10233" />
    </Event>
    <Event id="1157700789" event_id="2" type_id="32" period_id="1" min="0" sec="0" team_id="1" outcome="1" x="0.0" y="0.0" timestamp="2013-09-17T19:45:40.853" last_modified="2013-09-17T19:45:41">
      <Q id="934036070" qualifier_id="127" value="Left to Right" />
    </Event>
    <Event id="1702860990" event_id="3" type_id="32" period_id="1" min="0" sec="0" team_id="164" outcome="1" x="0.0" y="0.0" timestamp="2013-09-17T19:45:40.853" last_modified="2013-09-17T19:45:40">
      <Q id="1911763730" qualifier_id="127" value="Right to Left" />
    </Event>
    <Event id="2117870340" event_id="13" type_id="12" period_id="1" min="0" sec="25" player_id="50022" 

I already tried

results <- xmlToDataFrame("data/5_Champions_League/F1/F1_20132014.xml")

1 Answers1

1

Since you didn't provide complete data or described what you want to get from it, here's a way to get Game information:

# parse XML
doc = XML::xmlTreeParse(
    '<Games timestamp="2017-12-27T18:53:32">
  <Game id="726877" away_score="2" away_team_id="164" away_team_name="Bayer 04 Leverkusen" competition_id="5" competition_name="Champions League" game_date="2013-09-17T19:45:00" home_score="4" home_team_id="1" home_team_name="Manchester United" matchday="1" period_1_start="2013-09-17T19:45:40" period_2_start="2013-09-17T20:45:56" season_id="2013" season_name="Season 2013/2014">
    <Event id="1992096475" event_id="1" type_id="34" period_id="16" min="0" sec="0" team_id="1" outcome="1" x="0.0" y="0.0" timestamp="2013-09-17T18:47:44.144" last_modified="2013-09-17T19:07:31">
      <Q id="126212742" qualifier_id="59" value="1, 12, 3, 31, 5, 15, 25, 16, 20, 10, 26, 6, 8, 13, 14, 18, 22, 23" />
      <Q id="1442327870" qualifier_id="194" value="14965" />
      <Q id="733173531" qualifier_id="44" value="1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5" />
      <Q id="826253563" qualifier_id="227" value="0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="767631931" qualifier_id="130" value="8" />
      <Q id="1228198122" qualifier_id="131" value="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="1973319984" qualifier_id="30" value="51940, 55909, 14075, 41184, 2034, 14965, 20695, 2404, 12297, 13017, 83090, 37642, 27258, 39725, 43020, 18892, 54771, 43250" />
    </Event>
    <Event id="693448427" event_id="1" type_id="34" period_id="16" min="0" sec="0" team_id="164" outcome="1" x="0.0" y="0.0" timestamp="2013-09-17T18:49:35.764" last_modified="2013-09-17T19:46:59">
      <Q id="1835631099" qualifier_id="130" value="4" />
      <Q id="1265328962" qualifier_id="59" value="1, 26, 17, 3, 21, 5, 10, 6, 11, 18, 7, 4, 8, 9, 14, 15, 23, 25" />
      <Q id="1078621513" qualifier_id="44" value="1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5" />
      <Q id="1841403032" qualifier_id="227" value="0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="536206380" qualifier_id="131" value="1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 0, 0, 0, 0, 0, 0" />
      <Q id="1909738980" qualifier_id="30" value="80201, 80149, 27795, 50309, 50022, 20301, 112338, 10233, 16141, 38239, 85971, 69960, 38117, 42181, 20726, 165174, 95743, 6107" />
      <Q id="149852270" qualifier_id="197" value="79" />
      <Q id="1188894429" qualifier_id="194" value="10233" />
    </Event>
    <Event id="1157700789" event_id="2" type_id="32" period_id="1" min="0" sec="0" team_id="1" outcome="1" x="0.0" y="0.0" timestamp="2013-09-17T19:45:40.853" last_modified="2013-09-17T19:45:41">
      <Q id="934036070" qualifier_id="127" value="Left to Right" />
    </Event>
    <Event id="1702860990" event_id="3" type_id="32" period_id="1" min="0" sec="0" team_id="164" outcome="1" x="0.0" y="0.0" timestamp="2013-09-17T19:45:40.853" last_modified="2013-09-17T19:45:40">
      <Q id="1911763730" qualifier_id="127" value="Right to Left" />
    </Event>
  </Game>
</Games>'
    , useInternalNodes = TRUE)

# get all games
games = XML::xpathApply(doc, "//Game", XML::xmlAttrs)

# convert to data frame
games = as.data.frame(unlist(games))
games = tibble::rownames_to_column(games, var = "id")
names(games) = c("variables", "values")

# widen data frame
games = tidyr::pivot_wider(games, names_from = variables, values_from = values)

If you need event data, repeat the process with XML::xpathApply(doc, "//Event", XML::xmlAttrs)

Alberson Miranda
  • 1,248
  • 7
  • 25