Base R solution:
# Function to deteremine data.frame vector indices that are numeric:
# resolve_num_cols => function()
resolve_num_cols <- function(df){
# Store a vector of numeric column names:
# num_cols => logical vector
num_cols <- which(
vapply(
df,
is.numeric,
logical(1),
USE.NAMES = FALSE
)
)
# Explicitly define the returned object: logical vector => env
return(num_cols)
}
# Function to impute median values for each numeric vector in data.frame
# impute_median_vals_in_df => function()
impute_median_vals_in_df <- function(df, num_col_idx){
# Replace the na. values in each numeric vector: df => data.frame
df[,num_col_idx] <- lapply(
num_col_idx,
function(col_idx){
df[,col_idx] <- ifelse(
is.na(df[,col_idx]),
median(df[,col_idx], na.rm = TRUE),
df[,col_idx]
)
}
)
# Return the data.frame object: data.frame => env
return(df)
}
# Apply the function to resolve the numeric vectors in data.frame:
# num_cols => integer vector
num_cols <- resolve_num_cols(df1)
# Apply the function: clean_df => data.frame
clean_df <- impute_median_vals_in_df(df1, num_cols)
Data used:
# Import data: df1 => data.frame
df1 <- structure(list(mpg = c(NA, 21, NA, 21.4, 18.7, 18.1, 14.3, 24.4,
22.8, 19.2, NA, 16.4, 17.3, NA, NA, 10.4, 14.7, 32.4, 30.4, 33.9,
21.5, NA, 15.2, 13.3, 19.2, 27.3, 26, NA, NA, 19.7, 15, 21.4),
cyl = c(NA, 6, NA, 6, 8, NA, NA, 4, 4, 6, 6, 8, 8, 8, 8,
8, 8, NA, NA, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, NA, NA, 4), disp = c(160,
160, 108, 258, 360, 225, 360, 146.7, 140.8, 167.6, 167.6,
NA, NA, 275.8, NA, 460, 440, 78.7, 75.7, 71.1, NA, 318, NA,
350, 400, NA, NA, 95.1, 351, NA, 301, 121), hp = c(110, 110,
93, 110, NA, 105, 245, 62, 95, 123, 123, 180, NA, 180, 205,
215, NA, 66, NA, 65, 97, NA, NA, 245, 175, 66, 91, NA, 264,
175, NA, 109), drat = c(3.9, 3.9, 3.85, 3.08, 3.15, 2.76,
3.21, NA, 3.92, 3.92, 3.92, 3.07, NA, 3.07, 2.93, 3, 3.23,
4.08, NA, 4.22, NA, 2.76, 3.15, 3.73, 3.08, NA, NA, 3.77,
4.22, 3.62, NA, NA), wt = c(2.62, 2.875, 2.32, 3.215, 3.44,
3.46, 3.57, NA, 3.15, 3.44, 3.44, 4.07, NA, NA, 5.25, 5.424,
5.345, 2.2, 1.615, 1.835, NA, NA, 3.435, 3.84, NA, NA, NA,
1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46, 17.02, 18.61,
19.44, NA, NA, NA, 20, NA, 18.3, 18.9, 17.4, 17.6, 18, NA,
17.82, NA, 19.47, 18.52, 19.9, NA, 16.87, NA, 15.41, 17.05,
18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6), vs = c(0, NA,
1, 1, NA, NA, 0, NA, 1, NA, 1, 0, 0, 0, 0, 0, 0, 1, NA, 1,
1, 0, 0, 0, NA, 1, NA, 1, 0, 0, 0, 1), am = c(NA, NA, NA,
0, NA, 0, NA, NA, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, NA,
0, 0, 0, NA, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3, NA,
3, 3, NA, NA, 4, 4, 3, 3, 3, 3, 3, NA, 4, 4, 4, 3, 3, 3,
3, 3, 4, 5, NA, NA, NA, NA, 4), carb = c(4, 4, 1, 1, 2, 1,
4, NA, NA, 4, NA, 3, NA, 3, NA, 4, 4, 1, 2, 1, 1, 2, 2, 4,
NA, NA, 2, 2, NA, 6, 8, 2)), row.names = c("Mazda RX4", "Mazda RX4 Wag",
"Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", "Valiant",
"Duster 360", "Merc 240D", "Merc 230", "Merc 280", "Merc 280C",
"Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood",
"Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic",
"Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin",
"Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2",
"Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora",
"Volvo 142E"), class = "data.frame")