A combination of group_by
, to do the analysis per company (or per cyl
in this example) and do
to find the first instance in which years (or mpg
) is not NA should work:
df <- structure(list(model = c("Datsun 710", "Merc 240D", "Merc 230",
"Fiat 128", "Honda Civic", "Toyota Corolla", "Toyota Corona",
"Fiat X1-9", "Porsche 914-2", "Lotus Europa", "Volvo 142E", "Mazda RX4",
"Mazda RX4 Wag", "Hornet 4 Drive", "Valiant", "Merc 280", "Merc 280C",
"Ferrari Dino", "Hornet Sportabout", "Duster 360", "Merc 450SE",
"Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", "Lincoln Continental",
"Chrysler Imperial", "Dodge Challenger", "AMC Javelin", "Camaro Z28",
"Pontiac Firebird", "Ford Pantera L", "Maserati Bora"), mpg = c(NA,
NA, NA, NA, NA, 33.9, 21.5, NA, 26, 30.4, 21.4, NA, NA, NA, 18.1,
19.2, 17.8, 19.7, NA, NA, NA, NA, 15.2, 10.4, 10.4, 14.7, 15.5,
15.2, 13.3, 19.2, 15.8, 15), cyl = c(4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8), disp = c(108, 146.7, 140.8, 78.7, 75.7, 71.1, 120.1,
79, 120.3, 95.1, 121, 160, 160, 258, 225, 167.6, 167.6, 145,
360, 360, 275.8, 275.8, 275.8, 472, 460, 440, 318, 304, 350,
400, 351, 301), hp = c(93, 62, 95, 66, 52, 65, 97, 66, 91, 113,
109, 110, 110, 110, 105, 123, 123, 175, 175, 245, 180, 180, 180,
205, 215, 230, 150, 150, 245, 175, 264, 335), drat = c(3.85,
3.69, 3.92, 4.08, 4.93, 4.22, 3.7, 4.08, 4.43, 3.77, 4.11, 3.9,
3.9, 3.08, 2.76, 3.92, 3.92, 3.62, 3.15, 3.21, 3.07, 3.07, 3.07,
2.93, 3, 3.23, 2.76, 3.15, 3.73, 3.08, 4.22, 3.54), wt = c(2.32,
3.19, 3.15, 2.2, 1.615, 1.835, 2.465, 1.935, 2.14, 1.513, 2.78,
2.62, 2.875, 3.215, 3.46, 3.44, 3.44, 2.77, 3.44, 3.57, 4.07,
3.73, 3.78, 5.25, 5.424, 5.345, 3.52, 3.435, 3.84, 3.845, 3.17,
3.57), qsec = c(18.61, 20, 22.9, 19.47, 18.52, 19.9, 20.01, 18.9,
16.7, 16.9, 18.6, 16.46, 17.02, 19.44, 20.22, 18.3, 18.9, 15.5,
17.02, 15.84, 17.4, 17.6, 18, 17.98, 17.82, 17.42, 16.87, 17.3,
15.41, 17.05, 14.5, 14.6), vs = c(1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0), am = c(1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1), gear = c(4,
4, 4, 4, 4, 4, 3, 4, 5, 5, 4, 4, 4, 3, 3, 4, 4, 5, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 5, 5), carb = c(1, 2, 2, 1, 2, 1, 1,
1, 2, 2, 2, 4, 4, 1, 1, 4, 4, 6, 2, 4, 3, 3, 3, 4, 4, 4, 2, 2,
4, 2, 4, 8)), row.names = c(NA, -32L), class = c("tbl_df", "tbl",
"data.frame"))
df %>%
group_by(cyl) %>%
do(
.[first(which(!is.na(.$mpg))):nrow(.),]
)