Apanalysis<-newApfbage[c("type","engagement","postdata")]
#It contains data of engagement(int), type of facebook post (char) and postdata(char)
Apanalysis$textlength<- nchar(as.character(Apanalysis$postdata))
Apanalysis$textlength[is.na(Apanalysis$textlength)]<-0
#creating training dataset and dfm
index<- createDataPartition(Apnewanalysis$type, times = 1, p=0.7, list=FALSE)
train<- Apnewanalysis[index,]
train.tokens<- tokenize(train$postdata, what="word",removeNumbers = TRUE, removePunct = TRUE, removeSymbols = TRUE, removeSeparators = TRUE, removeHyphens = TRUE)
train.tokens<- toLower(train.tokens)
train.tokens<- selectFeatures(train.tokens, stopwords(), selection = "remove")
train.tokens<- wordstem(train.tokens, language = "english")
train.tokens.matrix<- dfm(train.tokens)
train.tokens.df<- cbind(Label=train$type,
as.data.frame(train.tokens.matrix))
names(train.tokens.df)<- make.names(names(train.tokens.df))
#function for term frequency
term.frequency<- function(row){
row/sum(row)
}
#function for inverse document frequency
invsere.doc.freq<- function(col){
corpus.size<- length(col)
doc.count<- length(which(col>0))
log10(corpus.size/doc.count)
}
#function for TF-IDF
tf.idf<- function(tf, idf){
tf*idf
}
train.tokens.df<- apply(train.tokens.matrix, 1,term.frequency)
train.tokens.idf<- apply(train.tokens.matrix, 2,invsere.doc.freq)
train.tokens.tfidf<- apply(train.tokens.df, 2, tf.idf, idf=train.tokens.idf)
train.tokens.tfidf<- t(train.tokens.tfidf)
incompletecases<-which(!complete.cases(train.tokens.tfidf))
train.tokens.tfidf[incompletecases,]<- rep(0.0, ncol(train.tokens.tfidf))
train.tokens.tfidf.df<- cbind(Label=as.character(train$type),
train.tokens.tfidf)
names(train.tokens.tfidf.df)<- make.names(train.tokens.tfidf.df)
cv.folds<- createMultiFolds(train.tokens.tfidf.tf$type, k=10, times = 3)
cv.ctrl<-trainControl(method = "repeatedcv", number = 10, repeats = 3, index = cv.folds)
cl<- makeCluster(3, type = "SOCK")
registerDoSNOW(cl)
n1<- names(train.tokens.tfidf.df)
f1 <- as.formula(paste("Label ~", paste(sprintf("`%s`",n1[!n1 %in% "Label"]) , collapse = " + ")))
rpart.cv.2<-train(f1,data=train.tokens.tfidf.df, method="rpart",trControl=cv.ctrl, tuneLength=7)
This is my code. I'm trying to run the train function but it is giving an error
Error in `[.data.frame`(m, labs) : undefined columns selected
Called from: [.data.frame
(m, labs)
I've tried finding a relevant solution regarding this and has tried different solutions but every solution is resulting in the similar result. Thank you