0
Apanalysis<-newApfbage[c("type","engagement","postdata")]
#It contains data of engagement(int), type of facebook post (char) and postdata(char)

Apanalysis$textlength<- nchar(as.character(Apanalysis$postdata))
Apanalysis$textlength[is.na(Apanalysis$textlength)]<-0

#creating training dataset and dfm 

index<- createDataPartition(Apnewanalysis$type, times = 1, p=0.7, list=FALSE)
train<- Apnewanalysis[index,]
train.tokens<- tokenize(train$postdata, what="word",removeNumbers = TRUE, removePunct = TRUE, removeSymbols = TRUE, removeSeparators = TRUE, removeHyphens = TRUE)
train.tokens<- toLower(train.tokens)
train.tokens<- selectFeatures(train.tokens, stopwords(), selection = "remove")
train.tokens<- wordstem(train.tokens, language = "english")
train.tokens.matrix<- dfm(train.tokens)
train.tokens.df<- cbind(Label=train$type, 
as.data.frame(train.tokens.matrix))
names(train.tokens.df)<- make.names(names(train.tokens.df))

#function for term frequency 
 term.frequency<- function(row){ 
 row/sum(row)
  }

#function for inverse document frequency 
invsere.doc.freq<- function(col){
corpus.size<- length(col)
doc.count<- length(which(col>0))

log10(corpus.size/doc.count)
 }

#function for TF-IDF 
tf.idf<- function(tf, idf){
tf*idf 
 }

train.tokens.df<- apply(train.tokens.matrix, 1,term.frequency)

train.tokens.idf<- apply(train.tokens.matrix, 2,invsere.doc.freq)

train.tokens.tfidf<- apply(train.tokens.df, 2, tf.idf, idf=train.tokens.idf)
train.tokens.tfidf<- t(train.tokens.tfidf)

incompletecases<-which(!complete.cases(train.tokens.tfidf))
train.tokens.tfidf[incompletecases,]<- rep(0.0, ncol(train.tokens.tfidf))

train.tokens.tfidf.df<- cbind(Label=as.character(train$type), 
train.tokens.tfidf)
names(train.tokens.tfidf.df)<- make.names(train.tokens.tfidf.df)

cv.folds<- createMultiFolds(train.tokens.tfidf.tf$type, k=10, times = 3)
cv.ctrl<-trainControl(method = "repeatedcv", number = 10, repeats = 3, index = cv.folds)

cl<- makeCluster(3, type = "SOCK")
registerDoSNOW(cl)
n1<- names(train.tokens.tfidf.df)
f1 <- as.formula(paste("Label ~", paste(sprintf("`%s`",n1[!n1 %in% "Label"]) , collapse = " + ")))
rpart.cv.2<-train(f1,data=train.tokens.tfidf.df, method="rpart",trControl=cv.ctrl, tuneLength=7)

This is my code. I'm trying to run the train function but it is giving an error

Error in `[.data.frame`(m, labs) : undefined columns selected

Called from: [.data.frame(m, labs)

I've tried finding a relevant solution regarding this and has tried different solutions but every solution is resulting in the similar result. Thank you

  • I would suggest to check the formula f1. Is it coming properly? – Prem Jul 22 '17 at 20:34
  • 1
    That's most probably has something to do with your data set. Please read [How to make a great reproducible example in R?](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) to present your dataset appropriately. – M-- Jul 22 '17 at 23:57
  • @Prem f1 is working properly. I've checked that. – Sumit Dargan Jul 23 '17 at 06:37
  • @Masoud train.tokens.tf.idf contains the tokenize data of posts on a Facebook page. I didn't find any appropriate solution here. – Sumit Dargan Jul 23 '17 at 06:45
  • Please make your example reproducible or at least show enough information about intermediate steps so that we can judge if things are working fine. Clearly they're not, so you need to pinpoint where they go awry. – Roman Luštrik Jul 23 '17 at 13:59

0 Answers0