Please forgive me if the answer to this is obvious, I am very new to R.
I am trying to aggregate this set of data but one of the columns keeps returning NA.
> dput(head(DrivingDistance,50))
structure(list(player_name = c("Brian Stuard", "Billy Hurley III",
"Greg Chalmers", "William McGirt", "Russell Knox", "Cody Gribble",
"Tony Finau", "Dustin Johnson", "Justin Thomas", "Vaughn Taylor",
"Jason Day", "Brendan Steele", "Si Woo Kim", "Brandt Snedeker",
"Jason Dufner", "Ryan Moore", "Rod Pampling", "Fabián Gómez",
"Jimmy Walker", "Jim Herman", "Pat Perez", "Daniel Berger", "Patrick Reed",
"James Hahn", "Mackenzie Hughes", "Branden Grace", "Jordan Spieth",
"Hideki Matsuyama", "Charley Hoffman", "Jhonattan Vegas", "Aaron Baddeley",
"Bubba Watson", "J.T. Poston", "Shawn Stefani", "Stewart Cink",
"William McGirt", "Fabián Gómez", "David Lingmerth", "Henrik Norlander",
"Tim Wilkinson", "Gonzalo Fernandez-Castaño", "Daniel Summerhays",
"Webb Simpson", "Peter Malnati", "Jason Bohn", "Vaughn Taylor",
"Daniel Berger", "Zac Blair", "Ryan Brehm", "Chez Reavie"), date = structure(c(17174,
17174, 17174, 17174, 17174, 17174, 17174, 17174, 17174, 17174,
17174, 17174, 17174, 17174, 17174, 17174, 17174, 17174, 17174,
17174, 17174, 17174, 17174, 17174, 17174, 17174, 17174, 17174,
17174, 17174, 17174, 17174, 17181, 17181, 17181, 17181, 17181,
17181, 17181, 17181, 17181, 17181, 17181, 17181, 17181, 17181,
17181, 17181, 17181, 17181), class = "Date"), DrDis = c("263.1",
"265.4", "266.5", "267.9", "269.3", "270.8", "304.8", "319.6",
"301.6", "269.6", "300.4", "288.5", "271.6", "271.9", "272.0",
"272.6", "275.1", "275.4", "275.6", "276.6", "278.4", "278.5",
"279.3", "279.8", "280.4", "283.3", "283.4", "283.6", "286.0",
"286.3", "287.9", "300.3", "304.3", "304.1", "304.0", "303.9",
"303.5", "303.3", "304.5", "303.0", "301.6", "301.6", "299.6",
"298.9", "297.6", "296.3", "302.6", "295.1", "305.3", "305.5"
)), row.names = c(NA, -50L), class = c("tbl_df", "tbl", "data.frame"
))
Here is the return after trying to aggregate.
player_name date DrDis
<chr> <date> <dbl>
1 A.J. McInerney 2018-02-21 NA
2 Aaron Baddeley 2018-08-01 NA
3 Aaron Rai 2019-06-06 NA
4 Aaron Wise 2018-10-28 NA
5 Abraham Ancer 2019-02-13 NA
6 Adam Bland 2018-03-04 NA
7 Adam Hadwin 2018-08-11 NA
8 Adam Long 2019-09-22 NA
9 Adam Schenk 2019-03-03 NA
10 Adam Scott 2018-08-12 NA
# ... with 551 more rows
There were 50 or more warnings (use warnings() to see the first 50)
Here is the code I am using to create Driving Distance and then aggregate this set of data.
DrivingDistance <-CurrentData[CurrentData$statistic == 'Driving Distance' & CurrentData$variable == 'AVG.',] %>%
select(player_name, date, value) %>%
dplyr::rename(DrDis = value)
DrivingDistance %>%
group_by(player_name) %>%
summarize_all(mean, na.rm = TRUE)