I am doing a baseball-related project using utility players as the subject. My definition of a utility player for this is a player that either plays an infield and outfield position, plays 3 infield position, or plays catcher and an outfield/infield (not including 1B) position.
I would like to try to filter my dataset out by players who fit that definition, however, I don't know how to do this as all of the players are separated by their position. Here's an example of the dataset.
18 Addison Russell CHC 2B 381.2 NA NA NA 0 NA 0 1 NA 1 110 88 0.800 23 NA NA NA 0.0
19 Addison Russell CHC SS 143.2 NA NA NA 0 NA 0 1 NA 1 37 24 0.649 17 NA NA NA -0.3
20 Adeiny Hechavarria - - - 2B 204.0 NA NA NA 1 NA 0 -2 NA -1 43 33 0.767 7 NA NA NA 0.1
21 Adeiny Hechavarria - - - 3B 71.0 NA NA NA 0 NA 0 0 NA 0 12 9 0.750 0 NA NA NA 0.0
22 Adeiny Hechavarria - - - SS 182.0 NA NA NA 0 NA 1 0 NA 1 53 39 0.736 10 NA NA NA 0.6
So for example of what I would like to do; in my definition of a utility player, Addison Russell doesn't count because he only plays 2 infield positions, however, Adeiny Hechavarria does as he plays 3. So how can I filter out the data set to have only players that match my definition? I've never really done anything like this, so I don't really even know where to start.
> dput(head(utility_plyr_df))
structure(list(ï..Name = c("A.J. Pollock", "A.J. Pollock", "Aaron Altherr",
"Aaron Hicks", "Aaron Judge", "Abraham Almonte"), Team = c("LAD",
"LAD", "- - -", "NYY", "NYY", "ARI"), Pos = c("CF", "LF", "CF",
"CF", "RF", "RF"), Inn = c(510, 145, 52, 499.1, 775.1, 54), rSZ = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), rCERA = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), rSB = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), rGDP = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), rARM = c(-1L, -1L, 0L, -1L, 3L, 1L), rGFP = c(0L, -1L, 0L,
0L, 3L, 0L), rPM = c(-8L, 1L, 0L, 0L, 14L, 2L), rTS = c(NA, NA,
NA, NA, NA, NA), DRS = c(-9L, -1L, 0L, -1L, 20L, 3L), BIZ = c(75L,
17L, 14L, 93L, 112L, 10L), Plays = c(67L, 14L, 13L, 83L, 104L,
10L), RZR = c(0.893, 0.824, 0.929, 0.892, 0.929, 1), OOZ = c(24L,
11L, 5L, 32L, 73L, 5L), FSR = c(NA, NA, NA, NA, NA, NA), FRM = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), ARM = c(-1.9,
-1.4, -0.2, -1.1, 4, 0.4), DPR = c(NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_), RngR = c(-4.6, 0.3, -0.2, 1.2,
7.6, 1.1), ErrR = c(0.4, -0.1, 0.1, 0.5, 1.1, -0.2), UZR = c(-6.1,
-1.1, -0.3, 0.6, 12.7, 1.4), UZR.150 = c(-20.2, -13.3, -4.9,
0.8, 24.2, 53.2), Def = c(-5.2, -1.9, -0.2, 1.4, 8.7, 1.1), playerid = c(9256L,
9256L, 11270L, 5297L, 15640L, 5486L), outfld = c(375, 375, 375,
375, 375, 375), infld = c(487, 487, 487, 487, 487, 487), pos_1b = c(97,
97, 97, 97, 97, 97), pos_cr = c(93, 93, 93, 93, 93, 93)), row.names = c(NA,
6L), class = "data.frame")
New attempted code @Parfait
uzr.2019 <- within(uzr.2019, {
pos_cr <- ave(ifelse(Pos == "C", 1, 0), 'ï..Name', FUN=sum)
pos_1b <- ave(ifelse(Pos == "1B", 1, 0), 'ï..Name', FUN=sum)
infld <- ave(ifelse(Pos %in% c("C", "P", "1B", "2B", "3B", "SS") , 1, 0), 'ï..Name', FUN=sum)
outfld <- ave(ifelse(Pos %in% c("LF", "CF", "RF"), 1, 0), 'ï..Name', FUN=sum)
})
# SUBSET DATA FRAME BY OR CONDITIONS
utility_plyr_df <- subset(uzr.2019,
(infld >= 1 & oufld >= 1) | # plays either an infield and outfield position
(infld == 3) | # plays 3 infield positions
(pos_cr == 1 & infld >= 1 &
oufld >= 1 & pos_1b == 0) # plays catcher and an outfield/infield (not including 1B)
)