I am having difficulties solving the error "there should be the same number of samples in x and y". I notice that others have posted on this site regarding this error, but their solutions have not worked for me. I am attaching an abbreviated version of my dataset here.
x_train
is here:
x_train <- structure(list(laterality = c("Left", "Right", "Right", "Right",
"Left", "Left", "Left", "Left", "Left", "Right"), age = c(66L,
56L, 69L, 49L, 60L, 70L, 58L, 53L, 59L, 64L), insurance = c("MEDICARE",
"UNITED", "MEDICARE", "UNITED", "COMMERCIAL", "MEDICARE", "AETNA",
"AETNA", "OXFORD", "MEDICARE_MANAGED"), employment = c("Retired",
"FullTime", "Retired", "FullTime", "Disabled", "SelfEmployed",
"Retired", "FullTime", "FullTime", "Disabled"), sex = c("Female",
"Male", "Female", "Female", "Female", "Female", "Male", "Male",
"Female", "Male"), race = c("WhiteorCaucasian", "WhiteorCaucasian",
"WhiteorCaucasian", "WhiteorCaucasian", "WhiteorCaucasian", "WhiteorCaucasian",
"Other", "BlackorAfricanAmerican", "WhiteorCaucasian", "WhiteorCaucasian"
), ethnicity = c("NotHispanicorLatino", "NotHispanicorLatino",
"NotHispanicorLatino", "NotHispanicorLatino", "NotHispanicorLatino",
"NotHispanicorLatino", "NotHispanicorLatino", "NotHispanicorLatino",
"NotHispanicorLatino", "NotHispanicorLatino"), bmi = c(22.3,
33, 34.3, 36, 30, 20, 29.5, 33.4, 26.5, 34.2), PreferredLanguage = c("English",
"English", "English", "English", "English", "English", "English",
"English", "English", "English"), married = c("Married", "Married",
"Married", "Married", "Married", "Married", "Divorced", "Single",
"Married", "Married"), RadiographSevere = c("No", "No", "No",
"No", "No", "No", "No", "No", "No", "No"), HxAnxietyDepression = c("No",
"No", "No", "Yes", "Yes", "Yes", "No", "No", "No", "No"), SurgeryYear = c(2017L,
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L
), operativetime = c(82L, 79L, 85L, 76L, 84L, 86L, 67L, 75L,
72L, 100L), HipApproach = c("Anterior", "Posterior", "Posterior",
"Posterior", "Posterior", "Anterior", "Posterior", "Posterior",
"Posterior", "Posterior")), row.names = c(NA, -10L), class = c("data.table",
"data.frame"))
y_train
is here:
y_train <- structure(list(POD1AverageNrsScoreCut = c("[0,5)", "[0,5)", "[0,5)",
"[0,5)", "[5,10)", "[0,5)", "[0,5)", "[5,10)", "[0,5)", "[0,5)"
)), row.names = c(NA, -10L), class = c("data.table", "data.frame"
))
Code I am using for rfe is here:
library(caret)
control <- rfeControl(functions = rfFuncs, # random forest
method = "repeatedcv", # repeated cv
repeats = 3, # number of repeats
number = 10) # number of folds
result_rfe <- rfe(x = x_train, y = y_train, sizes = c(1:30), rfeControl = control)