I'm trying to parse the following xml files and put the atbat num attribute value with each of the pitch records in the data.frame.
<atbat num="1" b="1" s="3" o="1" start_tfs="231201" start_tfs_zulu="2014-06- 10T23:12:01Z" batter="571697" stand="L" b_height="5-10" pitcher="493137" p_throws="R" des="Scooter Gennett called out on strikes. " des_es="Scooter Gennett se poncha sin tirarle. " event="Strikeout">
<pitch des="Ball" des_es="Bola mala" id="3" type="B" tfs="231236" tfs_zulu="2014-06-10T23:12:36Z" x="148.50" y="111.39" sv_id="140610_191405" start_speed="88.9" end_speed="81.1" sz_top="3.49" sz_bot="1.77" pfx_x="-7.11" pfx_z="6.3" px="-1.447" pz="3.935" x0="-2.51" y0="50.0" z0="5.896" vx0="5.084" vy0="-130.343" vz0="-0.852" ax="-12.102" ay="29.981" az="-21.38" break_y="23.7" break_angle="26.9" break_length="5.8" pitch_type="FT" type_confidence="2.000" zone="11" nasty="35" spin_dir="228.269" spin_rate="1804.956" cc="" mt=""/>
<pitch des="Called Strike" des_es="Strike cantado" id="4" type="S" tfs="231250" tfs_zulu="2014-06-10T23:12:50Z" x="85.84" y="158.88" sv_id="140610_191419" start_speed="90.7" end_speed="83.4" sz_top="3.31" sz_bot="1.44" pfx_x="-6.26" pfx_z="7.63" px="0.402" pz="1.822" x0="-2.405" y0="50.0" z0="5.533" vx0="9.46" vy0="-132.494" vz0="-6.149" ax="-11.109" ay="28.773" az="-18.554" break_y="23.8" break_angle="24.7" break_length="5.0" pitch_type="FF" type_confidence=".874" zone="9" nasty="45" spin_dir="219.201" spin_rate="1924.531" cc="" mt=""/>
<pitch des="Called Strike" des_es="Strike cantado" id="5" type="S" tfs="231308" tfs_zulu="2014-06-10T23:13:08Z" x="105.58" y="166.65" sv_id="140610_191437" start_speed="80.4" end_speed="74.1" sz_top="3.2" sz_bot="1.41" pfx_x="9.8" pfx_z="2.15" px="-0.242" pz="1.644" x0="-2.525" y0="50.0" z0="5.977" vx0="2.346" vy0="-117.836" vz0="-3.748" ax="13.625" ay="24.687" az="-29.117" break_y="23.8" break_angle="-25.1" break_length="9.6" pitch_type="SL" type_confidence="2.000" zone="7" nasty="34" spin_dir="102.646" spin_rate="1719.198" cc="" mt=""/>
<pitch des="Foul" des_es="Foul" id="6" type="S" tfs="231325" tfs_zulu="2014-06-10T23:13:25Z" x="125.32" y="132.97" sv_id="140610_191454" start_speed="91.2" end_speed="83.4" sz_top="3.13" sz_bot="1.44" pfx_x="-4.45" pfx_z="7.42" px="-0.822" pz="2.988" x0="-2.524" y0="50.0" z0="5.617" vx0="5.993" vy0="-133.61" vz0="-3.337" ax="-7.988" ay="30.874" az="-18.794" break_y="23.7" break_angle="18.8" break_length="4.7" pitch_type="FF" type_confidence="2.000" zone="11" nasty="59" spin_dir="210.836" spin_rate="1692.064" cc="" mt=""/>
<pitch des="Called Strike" des_es="Strike cantado" id="7" type="S" tfs="231351" tfs_zulu="2014-06-10T23:13:51Z" x="123.61" y="161.47" sv_id="140610_191520" start_speed="89.7" end_speed="83.2" sz_top="3.27" sz_bot="1.52" pfx_x="0.14" pfx_z="7.56" px="-0.796" pz="1.706" x0="-2.612" y0="50.0" z0="5.657" vx0="4.67" vy0="-131.367" vz0="-6.647" ax="0.25" ay="26.547" az="-18.826" break_y="23.8" break_angle="-3.2" break_length="4.7" pitch_type="FC" type_confidence="2.000" zone="13" nasty="62" spin_dir="178.929" spin_rate="1474.376" cc="" mt=""/>
</atbat>
Below is the r script that I'm using to parse the xml to a data.frame and it works perfectly. I have commented out the atbat num line because all that does is give me a row number mismatch.
library(XML)
library(dplyr)
library(plyr)
library(RMySQL)
require(XML)
baseURL <- 'http://gd2.mlb.com/components/game/'
testURL <- 'http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_10/gid_2014_06_10_milmlb_nynmlb_1/game_events.xml'
inningallURL <- 'http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_10/gid_2014_06_10_milmlb_nynmlb_1/inning/inning_all.xml'
data <- xmlTreeParse(testURL, useInternalNodes = TRUE)
ia_data <- xmlTreeParse(inningallURL, useInternalNodes = TRUE)
inningall_df <- data.frame (
game_id = substr(inningallURL, 66, 95),
#num = xpathSApply(ia_data, '//*/atbat', xmlGetAttr,'num'),
des = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'des'),
des_es = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'des_es'),
id = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'id'),
type = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'type'),
tfs = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'tfs'),
tfs_zulu = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'tfs_zulu'),
x = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'x'),
y = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'y'),
sv_id = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'sv_id'),
start_speed = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'start_speed'),
end_speed = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'end_speed'),
sz_top = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'sz_top'),
sz_bot = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'sz_bot'),
pfx_x = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'pfx_x'),
pfx_z = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'pfx_z'),
px = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'px'),
pz = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'pz'),
x0 = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'x0'),
y0 = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'y0'),
z0 = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'z0'),
vx0 = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'vx0'),
vy0 = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'vy0'),
vz0 = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'vz0'),
ax = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'ax'),
ay = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'ay'),
az = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'az'),
break_y = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'break_y'),
break_angle = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'break_angle'),
break_length = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'break_length'),
pitch_type = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'pitch_type'),
type_confidence = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'type_confidence'),
zone = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'zone'),
nasty = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'nasty'),
spin_dir = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'spin_dir'),
spin_rate = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'spin_rate'),
cc = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'cc'),
mt = xpathSApply(ia_data, '//*/pitch', xmlGetAttr,'mt')
)
I have searched and found a couple of examples, one here that looked like it should work, but all I did was successfully break the code that was working for the data.frame. HA! What I would like to end up with is a data frame with the at bat value duplicated for each of the children below it like below. I left off most of the columns to save your eyes.
game_id atbatnum des des_es id type
gid_2014_06_10_milmlb_nynmlb_1 1 Ball Bola mala 3 B
gid_2014_06_10_milmlb_nynmlb_1 1 Called Strike Strike cantado 4 S
gid_2014_06_10_milmlb_nynmlb_1 1 Called Strike Strike cantado 5 S
gid_2014_06_10_milmlb_nynmlb_1 1 Foul Foul 6 S
gid_2014_06_10_milmlb_nynmlb_1 1 Called Strike Strike cantado 7 S
gid_2014_06_10_milmlb_nynmlb_1 2 Called Strike Strike cantado 11 S
gid_2014_06_10_milmlb_nynmlb_1 2 Ball Bola mala 12 B
gid_2014_06_10_milmlb_nynmlb_1 2 Ball Bola mala 13 B
gid_2014_06_10_milmlb_nynmlb_1 2 Ball Bola mala 14 B
gid_2014_06_10_milmlb_nynmlb_1 2 Ball Bola mala 15 B