I get the following error:
Error in model.frame.default(form = Elite ~ ., data = C_test, na.action = na.fail) : variable lengths differ (found for 'Privat1')
I check the length and not sure why it is different. How can I fix the length?
My code:
fix(colleges)
rownames(colleges)=colleges [,1]
fix(colleges)
colleges=colleges [,-1]
fix(colleges)
#create "Elite" variable
Elite=rep("No",nrow(colleges))
Elite[colleges$Top10perc >50]=" Yes"
Elite=as.factor(Elite)
colleges=data.frame(colleges ,Elite)
names(colleges)
attach(colleges)
#Change 'Elite' from factor to numeric so you can run lm
colleges$Elite<-as.numeric(colleges$Elite)
#change Private to binary to be able to use normalization
Privat1=rep(2 , length(Private))
Privat1[Private=="Yes"] <- 1
colleges=data.frame(colleges, Privat1)
#remove Private column for cleaner results
colleges=colleges [,-1]
fix(colleges)
#To tell which varibles are statistacally sig.
Linear_reg<- lm(Elite ~ ., data=colleges)
summary(Linear_reg)
#start to prep for kNN
C_Subset<-data.frame(colleges[c('Privat1', 'Apps', 'Accept', 'Top10perc', 'Top25perc', 'Outstate',
'Room.Board', 'PhD', 'Terminal', 'Expend')])
#Normalize
normalize <- function(x){return((x - min(x)) / (max(x) - min(x)))}
C_Norm<- as.data.frame(lapply(C_Subset[1:10], normalize))
set.seed(46)
C_Dat<- sample(1:nrow(C_Subset), size = nrow(C_Subset)*.85, replace = TRUE)
C_train<- C_Subset[C_Dat,]
C_test<- C_Subset[-C_Dat,]
C_train_lab<-C_Subset[C_Dat,1]
C_test_lab<-C_Subset[-C_Dat,1]
library(class)
train_control<- trainControl(method = 'cv', number = 5)
model<- train(Elite ~., data = C_test, trControl = train_control, method = 'rpart')
# this is where the error occurs