I've been trying to load up this function that will take data from three different data frames with some baseball data and then produce a fourth frame with averages of that data that is weighted depending on if the player appears in all 3 frames (played all 3 years) or less, and so on.
The data frames are linked by the fact that each has a playerid which is what I'm using to check how many and which years they specifically appear in. I also have a fourth data frame that has a master list of the playerids and corresponding names that I use to establish the list of IDs to check.
Below is what my data frames with the stats look like of which I have 3 spanning back to 2016
> head(batters_18)
player_id player_name launch_speed launch_angle
1 592450 Aaron Judge 94.7 12.4
2 408234 Miguel Cabrera 94.4 7.3
3 443558 Nelson Cruz 93.9 12.8
4 608336 Joey Gallo 93.8 21.5
5 519317 Giancarlo Stanton 93.7 11.6
6 623520 David Bote 93.5 3.5
this is my actual function code
# to combine batting stats from the 3 seasons in the appropriate categories
# but with a weighting of 45% in 2018, 35% in 2017, and 20% in 2016 for sake
# of favoring recent form and performance, but in each seasons all players have
# at least 50 events
combine.batting.stats <- function(batters_16, batters_17, batters_18, playerID_map){
#using the stats for each year along with the player ID map
b18 = batters_18
b17 = batters_17
b16 = batters_16
playerID_map = playerID_map
playerid = playerID_map$MLBID
# so first my weights with the scenarios being
# exists in all 3 years, exits in exactly two, and finally exists exactly one
# the check for whether something is in a data frame is as below
# SOMETHING %in% DATAFRAME$COLUMN
# this should be used to code three different scenarios where I weight
# the value of season stats depending on how may seasons they qualify in
if(playerid %in% b18$player_id = TRUE & playerid %in% b17$player_id = TRUE
& playerid %in% b16$player_id = TRUE) {
#calculation for case of 3 year player
# 18 is 45%, 17 is 35%, and 16 is 20%
average_launch_speed = (((b18$launch_speed * 0.45) + (b17$launch_speed * 0.35)
+ (b16$launch_speed * 0.2)) / 3)
average_launch_angle = (((b18$launch_angle * 0.45) + (b17$launch_angle * 0.35)
+ (b16$launch_angle * 0.2)) / 3)
}
if(playerid %in% b18$player_id = TRUE & playerid %in% b17$player_id = TRUE
& playerid %in% b16$player_id = FALSE) {
#calculation for player in b18 and b17 but not b16....should be extended to
#other 2 year player situations that is b17 and b16 but not b18 as well as
#b18 and b16 but not b17 (which I would like to skew even more to b18 stats)
#than players who have played the most recent 2 years to reflect potential
#post injury change
average_launch_speed = (((b18$launch_speed * 0.6) + (b17$launch_speed * 0.4))
/ 2)
average_launch_angle = (((b18$launch_angle * 0.6) + (b17$launch_angle * 0.4))
/ 2)
}
if(playerid %in% b18$player_id = TRUE & playerid %in% b17$player_id = FALSE
& playerid %in% b16$player_id = TRUE) {
#in b18 and b16 but not b17
average_launch_speed = (((b18$launch_speed * 0.6) + (b16$launch_speed * 0.4))
/ 2)
average_launch_angle = (((b18$launch_angle * 0.6) + (b16$launch_angle * 0.4))
/ 2)
}
if(playerid %in% b18$player_id = FALSE & playerid %in% b17$player_id = TRUE
& playerid %in% b16$player_id = TRUE) {
#in b17 and b16 but not b18
average_launch_speed = (((b17$launch_speed * 0.6) + (b16$launch_speed * 0.4))
/ 2)
average_launch_angle = (((b17$launch_angle * 0.6) + (b16$launch_angle * 0.4))
/ 2)
}
# next are those in only one single frame/year
# this one is only in 18
if(playerid %in% b18$player_id = TRUE & playerid %in% b17$player_id = FALSE
& playerid %in% b16$player_id = FALSE){
average_launch_speed = b18$launch_speed
average_launch_angle = b18$launch_angle
}
# only in b17
if(playerid %in% b18$player_id = FALSE & playerid %in% b17$player_id = TRUE
& playerid %in% b16$player_id = FALSE){
average_launch_speed = b17$launch_speed
average_launch_angle = b17$launch_angle
}
#only in b16
if(playerid %in% b18$player_id = FALSE & playerid %in% b17$player_id = FALSE
& playerid %in% b16$player_id = TRUE){
average_launch_speed = b16$launch_speed
average_launch_angle = b16$launch_angle
}
# returning a data frame from the function
combined_stats = data.frame(playerid, average_launch_speed, average_launch_angle)
}
and then this is the error I get in the console which I just don't understand the issue with my function
> # to combine batting stats from the 3 seasons in the appropriate categories
> # but with a weighting of 45% in 2018, 35% in 2017, and 20% in 2016 for sake
> # of favoring recent form and performance, but in each seasons all players have
> # at least 50 events
>
> combine.batting.stats <- function(batters_16, batters_17, batters_18, playerID_map){
+
+ #using the stats for each year along with the player ID map
+
+ b18 = batters_18
+ b17 = batters_17
+ b16 = batters_16
+ playerID_map = playerID_map
+ playerid = playerID_map$MLBID
+
+ # so first my weights with the scenarios being
+ # exists in all 3 years, exits in exactly two, and finally exists exactly one
+
+
+
+ # the check for whether something is in a data frame is as below
+ # SOMETHING %in% DATAFRAME$COLUMN
+ # this should be used to code three different scenarios where I weight
+ # the value of season stats depending on how may seasons they qualify in
+
+ if(playerid %in% b18$player_id = TRUE & playerid %in% b17$player_id = TRUE
Error: unexpected '=' in:
"
if(playerid %in% b18$player_id ="
> & playerid %in% b16$player_id = TRUE) {
Error: unexpected '&' in " &"
>
> #calculation for case of 3 year player
> # 18 is 45%, 17 is 35%, and 16 is 20%
>
> average_launch_speed = (((b18$launch_speed * 0.45) + (b17$launch_speed * 0.35)
+ + (b16$launch_speed * 0.2)) / 3)
Error: object 'b18' not found
>
> average_launch_angle = (((b18$launch_angle * 0.45) + (b17$launch_angle * 0.35)
+ + (b16$launch_angle * 0.2)) / 3)
Error: object 'b18' not found
>
> }
Error: unexpected '}' in " }"
>
> if(playerid %in% b18$player_id = TRUE & playerid %in% b17$player_id = TRUE
Error: unexpected '=' in " if(playerid %in% b18$player_id ="
> & playerid %in% b16$player_id = FALSE) {
Error: unexpected '&' in " &"
>
> #calculation for player in b18 and b17 but not b16....should be extended to
> #other 2 year player situations that is b17 and b16 but not b18 as well as
> #b18 and b16 but not b17 (which I would like to skew even more to b18 stats)
> #than players who have played the most recent 2 years to reflect potential
> #post injury change
>
> }
Error: unexpected '}' in " }"
>
>
> data.frame(check.rows = FALSE)
data frame with 0 columns and 0 rows
>
> }
Error: unexpected '}' in "}"
> }
Error: unexpected '}' in "}"
> # to combine batting stats from the 3 seasons in the appropriate categories
> # but with a weighting of 45% in 2018, 35% in 2017, and 20% in 2016 for sake
> # of favoring recent form and performance, but in each seasons all players have
> # at least 50 events
>
> combine.batting.stats <- function(batters_16, batters_17, batters_18, playerID_map){
+
+ #using the stats for each year along with the player ID map
+
+ b18 = batters_18
+ b17 = batters_17
+ b16 = batters_16
+ playerID_map = playerID_map
+ playerid = playerID_map$MLBID
+
+ # so first my weights with the scenarios being
+ # exists in all 3 years, exits in exactly two, and finally exists exactly one
+
+
+
+ # the check for whether something is in a data frame is as below
+ # SOMETHING %in% DATAFRAME$COLUMN
+ # this should be used to code three different scenarios where I weight
+ # the value of season stats depending on how may seasons they qualify in
+
+ if(playerid %in% b18$player_id = TRUE & playerid %in% b17$player_id = TRUE
Error: unexpected '=' in:
"
if(playerid %in% b18$player_id ="
> & playerid %in% b16$player_id = TRUE) {
Error: unexpected '&' in " &"
>
> #calculation for case of 3 year player
> # 18 is 45%, 17 is 35%, and 16 is 20%
>
> average_launch_speed = (((b18$launch_speed * 0.45) + (b17$launch_speed * 0.35)
+ + (b16$launch_speed * 0.2)) / 3)
Error: object 'b18' not found
>
> average_launch_angle = (((b18$launch_angle * 0.45) + (b17$launch_angle * 0.35)
+ + (b16$launch_angle * 0.2)) / 3)
Error: object 'b18' not found
>
> }
Error: unexpected '}' in " }"
>
> if(playerid %in% b18$player_id = TRUE & playerid %in% b17$player_id = TRUE
Error: unexpected '=' in " if(playerid %in% b18$player_id ="
> & playerid %in% b16$player_id = FALSE) {
Error: unexpected '&' in " &"
>
> #calculation for player in b18 and b17 but not b16....should be extended to
> #other 2 year player situations that is b17 and b16 but not b18 as well as
> #b18 and b16 but not b17 (which I would like to skew even more to b18 stats)
> #than players who have played the most recent 2 years to reflect potential
> #post injury change
>
> average_launch_speed = (((b18$launch_speed * 0.6) + (b17$launch_speed * 0.4))
+ / 2)
Error: object 'b18' not found
>
> average_launch_angle = (((b18$launch_angle * 0.6) + (b17$launch_angle * 0.4))
+ / 2)
Error: object 'b18' not found
>
> }
Error: unexpected '}' in " }"
>
> if(playerid %in% b18$player_id = TRUE & playerid %in% b17$player_id = FALSE
Error: unexpected '=' in " if(playerid %in% b18$player_id ="
> & playerid %in% b16$player_id = TRUE) {
Error: unexpected '&' in " &"
>
> #in b18 and b16 but not b17
>
>
> average_launch_speed = (((b18$launch_speed * 0.6) + (b16$launch_speed * 0.4))
+ / 2)
Error: object 'b18' not found
>
> average_launch_angle = (((b18$launch_angle * 0.6) + (b16$launch_angle * 0.4))
+ / 2)
Error: object 'b18' not found
>
> }
Error: unexpected '}' in " }"
>
> if(playerid %in% b18$player_id = FALSE & playerid %in% b17$player_id = TRUE
Error: unexpected '=' in " if(playerid %in% b18$player_id ="
> & playerid %in% b16$player_id = TRUE) {
Error: unexpected '&' in " &"
>
> #in b17 and b16 but not b18
>
>
> average_launch_speed = (((b17$launch_speed * 0.6) + (b16$launch_speed * 0.4))
+ / 2)
Error: object 'b17' not found
>
> average_launch_angle = (((b17$launch_angle * 0.6) + (b16$launch_angle * 0.4))
+ / 2)
Error: object 'b17' not found
>
> }
Error: unexpected '}' in " }"
>
> # next are those in only one single frame/year
> # this one is only in 18
>
> if(playerid %in% b18$player_id = TRUE & playerid %in% b17$player_id = FALSE
Error: unexpected '=' in " if(playerid %in% b18$player_id ="
> & playerid %in% b16$player_id = FALSE){
Error: unexpected '&' in " &"
>
> average_launch_speed = b18$launch_speed
Error: object 'b18' not found
>
> average_launch_angle = b18$launch_angle
Error: object 'b18' not found
>
> }
Error: unexpected '}' in " }"
>
> # only in b17
>
> if(playerid %in% b18$player_id = FALSE & playerid %in% b17$player_id = TRUE
Error: unexpected '=' in " if(playerid %in% b18$player_id ="
> & playerid %in% b16$player_id = FALSE){
Error: unexpected '&' in " &"
>
> average_launch_speed = b17$launch_speed
Error: object 'b17' not found
>
> average_launch_angle = b17$launch_angle
Error: object 'b17' not found
>
> }
Error: unexpected '}' in " }"
>
> #only in b16
>
> if(playerid %in% b18$player_id = FALSE & playerid %in% b17$player_id = FALSE
Error: unexpected '=' in " if(playerid %in% b18$player_id ="
> & playerid %in% b16$player_id = TRUE){
Error: unexpected '&' in " &"
>
> average_launch_speed = b16$launch_speed
Error: object 'b16' not found
>
> average_launch_angle = b16$launch_angle
Error: object 'b16' not found
>
> }
Error: unexpected '}' in " }"
>
> # returning a data frame from the function
> combined_stats = data.frame(playerid, average_launch_speed, average_launch_angle)
Error in data.frame(playerid, average_launch_speed, average_launch_angle) :
object 'average_launch_speed' not found
>
> }
Error: unexpected '}' in "}"