-1

Supposing My dataset is iris with 3 classes and I want to implement one versus one SVM approach but when i subset the training set for each classifier by class i and class j, I get an empty subset (follow this line #selecting subset of training set where Species equal to class i and class j)

Species <-iris$Species
class <- unique(Species)
set.seed(123)
s<- sample (150,100)
data_train<- iris[s,]
data_test<- iris[-s,]
train <-data_train
test <-data_test
for(i in 2:length(unique(Species))-1){
  for(j in (i+1):length(unique(Species))){
    print(paste(class[i],class[j],sep=","))


    #selecting subset of training set and testing set where coronaryEvent equal to class i and class j
    train <-subset(train, Species %in% c(class[i],class[j]))
   str(train)


  }}
[1] "setosa,versicolor"
'data.frame':   0 obs. of  5 variables:
 $ Sepal.Length: num 
 $ Sepal.Width : num 
 $ Petal.Length: num 
 $ Petal.Width : num 
 $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 
[1] "setosa,virginica"
'data.frame':   0 obs. of  5 variables:
 $ Sepal.Length: num 
 $ Sepal.Width : num 
 $ Petal.Length: num 
 $ Petal.Width : num 
 $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 
[1] "versicolor,virginica"
'data.frame':   0 obs. of  5 variables:
 $ Sepal.Length: num 
 $ Sepal.Width : num 
 $ Petal.Length: num 
 $ Petal.Width : num 
 $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 
Rawia Hemdan
  • 91
  • 1
  • 11

1 Answers1

1

This should be working:

library(dplyr)
set.seed(123)
s<- sample (150,100)
data_train<- iris[s,]
data_test<- iris[-s,]
train <-data_train
test <-data_test
#train$Species <- as.factor(as.character(train$Species))
class <- unique(iris$Species)


for(i in 2:length(unique(iris$Species))-1){

  for(j in (i+1):length(unique(iris$Species))){
    print(paste(class[i],class[j],sep=","))

    class_i <- factor(as.factor(class[i]))
    class_j <- factor(as.factor(class[j]))


    train2 <- rbind(train[match(as.character(train$Species), class_i, nomatch = FALSE), ], 
                    train[match(as.character(train$Species), class_j, nomatch = FALSE), ])

    train2
    str(train2)


  }}

What I did is to assign the iris$Species value directly to class and changed a bit the subset. Let me know if this works as expected.

Barbara
  • 1,118
  • 2
  • 11
  • 34
  • Unfortunately i got incorrect results because > table(data_train$Species) setosa versicolor virginica 36 31 33 – Rawia Hemdan Nov 23 '17 at 10:31
  • the results [1] "setosa,versicolor" 'data.frame': 100 obs. of 5 variables:...[1] "setosa,virginica" 'data.frame': 100 obs. of 5 variables:[1] "versicolor,virginica"'data.frame': 0 obs. of 5 variables: – Rawia Hemdan Nov 23 '17 at 10:32
  • By this example it should be "setosa,versicolor" a df of 67 observations "setosa,virginica" a df of 69 observations and "versicolor,virginica" a df of 64 observations – Rawia Hemdan Nov 23 '17 at 10:35
  • @RawiaHemdan updated my answer, now it returns the expected results – Barbara Nov 23 '17 at 11:14