2

Here is the scenario:

library(rpart); library(dplyr); library(caret)
data <- read.csv("NetworkIntrusionValidatedata.csv") #50 example rows provided below
traindata <- createDataPartition(y,p=0.9,list = F)  %>% c()
train <- data[traindata,]
test <- data[-traindata,]
trainy <- y[traindata]
testy <- y[-traindata]
train <- cbind(train,trainy)
model2 <- rpart(trainy~., data = train)
prunedmodel <- prune(model2, cp = 0.27)
test2pred <- predict(model2, newdata = test, type = "prob")
test2pred <- factor(ifelse(test2pred>0.7,"normal","anomaly"))
table(test2pred)

The result is:

test2pred
anomaly  normal 
   2254    2254 

But there are only 2254 observations in total in the 'test' data. How can there be 2254 values in both anomaly and normal? I ran a 'bayesglm' on the same data and it works.

Sample data:

structure(list(duration = c(0L, 0L, 2L, 0L, 1L, 0L, 0L, 0L, 0L,  0L,
0L, 0L, 0L, 0L, 37L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,  0L, 0L, 0L,
 0L, 0L, 0L, 805L, 0L, 0L, 0L, 0L, 0L, 0L, 8L, 0L,  0L, 0L, 0L, 0L, 0L,
 0L, 0L, 0L, 0L, 0L, 0L), protocol_type = structure(c(2L,  2L, 2L, 1L,
 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,  2L, 3L, 2L, 2L,
 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 3L, 2L, 2L, 2L,  3L, 2L, 2L, 2L, 2L,
 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,  3L), .Label = c("icmp",
 "tcp", "udp"), class = "factor"), service = structure(c(46L,  46L,
 20L, 14L, 56L, 23L, 50L, 56L, 23L, 19L, 56L, 50L, 46L, 56L,  56L, 23L,
 23L, 23L, 46L, 46L, 29L, 44L, 23L, 23L, 6L, 10L, 23L,  23L, 15L, 46L,
 23L, 50L, 23L, 46L, 46L, 25L, 23L, 19L, 12L, 20L,  46L, 23L, 23L, 23L,
 32L, 23L, 55L, 23L, 56L, 46L), .Label = c("IRC",  "X11", "Z39_50",
 "auth", "bgp", "courier", "csnet_ns", "ctf",  "daytime", "discard",
 "domain", "domain_u", "echo", "eco_i",  "ecr_i", "efs", "exec",
 "finger", "ftp", "ftp_data", "gopher",  "hostnames", "http",
 "http_443", "imap4", "iso_tsap", "klogin",  "kshell", "ldap", "link",
 "login", "mtp", "name", "netbios_dgm",  "netbios_ns", "netbios_ssn",
 "netstat", "nnsp", "nntp", "ntp_u",  "other", "pm_dump", "pop_2",
 "pop_3", "printer", "private", "remote_job",  "rje", "shell", "smtp",
 "sql_net", "ssh", "sunrpc", "supdup",  "systat", "telnet", "tftp_u",
 "tim_i", "time", "urp_i", "uucp",  "uucp_path", "vmnet", "whois"),
 class = "factor"), flag = structure(c(2L,  2L, 10L, 10L, 3L, 10L, 10L,
 10L, 10L, 10L, 10L, 10L, 2L, 6L,  10L, 10L, 10L, 10L, 10L, 2L, 2L, 6L,
 10L, 10L, 2L, 3L, 10L, 10L,  10L, 10L, 5L, 10L, 10L, 10L, 2L, 3L, 10L,
 10L, 10L, 10L, 6L,  10L, 10L, 10L, 2L, 10L, 6L, 10L, 6L, 10L), .Label
 = c("OTH",  "REJ", "RSTO", "RSTOS0", "RSTR", "S0", "S1", "S2", "S3", "SF",  "SH"), class = "factor"), src_bytes = c(0L, 0L, 12983L, 20L, 
 0L, 267L, 1022L, 129L, 327L, 26L, 0L, 616L, 0L, 0L, 773L, 350L,  213L,
 246L, 45L, 0L, 0L, 0L, 196L, 277L, 0L, 0L, 294L, 300L,  520L, 54L,
 76944L, 720L, 301L, 1L, 0L, 0L, 209L, 220L, 43L, 88382L,  0L, 277L,
 321L, 335L, 0L, 234L, 0L, 54540L, 0L, 46L), dst_bytes = c(0L,  0L, 0L,
 0L, 15L, 14515L, 387L, 174L, 467L, 157L, 0L, 330L, 0L,  0L, 364200L,
 3610L, 659L, 2090L, 44L, 0L, 0L, 0L, 1823L, 1816L,  0L, 0L, 6442L,
 440L, 0L, 51L, 1L, 281L, 19794L, 1L, 0L, 44L,  12894L, 688L, 71L, 0L,
 0L, 4968L, 2715L, 3228L, 0L, 3236L, 0L,  8314L, 0L, 45L), land = c(0L,
 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
 0L, 0L, 0L, 0L, 0L, 0L, 0L,  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
 0L, 0L, 0L, 0L, 0L, 0L,  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
 wrong_fragment = c(0L, 0L,  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
 0L, 0L, 0L, 0L, 0L, 0L,  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
 0L, 0L, 0L, 0L, 0L,  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
 0L, 0L, 0L, 0L ), urgent = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
 0L, 0L,  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
 0L,  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
 0L, 0L, 0L, 0L, 0L, 0L), hot = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,  0L, 0L,
 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,  0L, 0L, 0L,
 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 4L, 0L,  0L, 0L, 0L, 0L,
 0L, 0L, 0L, 0L, 2L, 0L, 0L), num_failed_logins = c(0L,  0L, 0L, 0L,
 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,  0L, 0L, 0L, 0L,
 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,  0L, 0L, 0L, 0L, 0L,
 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,  0L), logged_in = c(0L,
 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L,  1L, 0L, 0L, 1L, 1L, 1L, 1L,
 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L,  1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L,
 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,  1L, 0L, 1L, 0L, 1L, 0L, 0L),
 num_compromised = c(0L, 0L, 0L,  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
 0L, 0L, 0L, 0L, 0L, 0L, 0L,  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
 0L, 0L, 0L, 0L, 0L, 0L,  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
 0L, 1L, 0L, 0L), 
     root_shell = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), su_attempted = c(0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L), num_root = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), num_file_creations = c(0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L), num_shells = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), num_access_files = c(0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L), num_outbound_cmds = c(0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
     ), is_host_login = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), is_guest_login = c(0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
     0L, 0L, 0L, 0L), count = c(229L, 136L, 1L, 1L, 1L, 4L, 1L, 
     1L, 33L, 1L, 1L, 1L, 111L, 120L, 1L, 8L, 24L, 16L, 505L, 
     204L, 118L, 1L, 17L, 17L, 116L, 273L, 22L, 7L, 511L, 511L, 
     12L, 1L, 15L, 40L, 483L, 2L, 11L, 1L, 113L, 15L, 144L, 13L, 
     29L, 49L, 266L, 8L, 281L, 4L, 1L, 68L), srv_count = c(10L, 
     1L, 1L, 65L, 8L, 4L, 3L, 1L, 47L, 1L, 1L, 2L, 2L, 120L, 1L, 
     8L, 24L, 16L, 505L, 18L, 19L, 1L, 17L, 18L, 8L, 13L, 46L, 
     7L, 511L, 511L, 12L, 2L, 15L, 3L, 1L, 2L, 11L, 1L, 113L, 
     15L, 8L, 13L, 37L, 50L, 8L, 21L, 6L, 24L, 12L, 68L), serror_rate = c(0, 
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
     0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.05, 0, 0, 0, 
     0, 0, 1, 0, 0.03, 0, 0, 0, 1, 0, 1, 0), srv_serror_rate = c(0, 
     0, 0, 0, 0.12, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
     0, 0, 1, 0, 0.05, 0, 0, 0, 1, 0, 0.33, 0), rerror_rate = c(1, 
     1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 
     1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0.92, 1, 0, 0, 
     0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0), srv_rerror_rate = c(1, 
     1, 0, 0, 0.5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 
     1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 
     0, 0, 0, 0, 0, 1, 0, 0, 0, 0.67, 0), same_srv_rate = c(0.04, 
     0.01, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.02, 1, 1, 1, 1, 1, 
     1, 0.09, 0.16, 1, 1, 1, 0.07, 0.05, 1, 1, 1, 1, 1, 1, 1, 
     0.08, 0, 0.5, 1, 1, 1, 1, 0.06, 1, 1, 1, 0.03, 1, 0.02, 1, 
     1, 1), diff_srv_rate = c(0.06, 0.06, 0, 0, 0, 0, 0, 0, 0, 
     0, 0, 0, 0.07, 0, 0, 0, 0, 0, 0, 0.07, 0.05, 0, 0, 0, 0.07, 
     0.06, 0, 0, 0, 0, 0, 0, 0, 0.38, 1, 1, 0, 0, 0, 0, 0.06, 
     0, 0, 0, 0.06, 0, 0.06, 0, 0, 0), srv_diff_host_rate = c(0, 
     0, 0, 1, 0.75, 0, 1, 0, 0.04, 0, 0, 1, 0, 0, 0, 0, 0, 0, 
     0, 0, 0, 0, 0, 0.11, 0, 0, 0.11, 0, 0, 0, 0, 1, 0, 0, 0, 
     1, 0, 0, 0, 0, 0, 0, 0.08, 0.04, 0, 0.1, 0, 0.08, 0.92, 0
     ), dst_host_count = c(255L, 255L, 134L, 3L, 29L, 155L, 255L, 
     255L, 151L, 52L, 255L, 255L, 255L, 235L, 38L, 71L, 255L, 
     35L, 255L, 255L, 255L, 255L, 255L, 36L, 255L, 255L, 180L, 
     255L, 46L, 255L, 241L, 158L, 20L, 255L, 255L, 185L, 255L, 
     53L, 255L, 203L, 255L, 13L, 29L, 255L, 255L, 255L, 255L, 
     255L, 91L, 255L), dst_host_srv_count = c(10L, 1L, 86L, 57L, 
     86L, 255L, 28L, 255L, 255L, 26L, 128L, 129L, 2L, 171L, 73L, 
     255L, 255L, 255L, 255L, 18L, 19L, 87L, 255L, 255L, 8L, 13L, 
     255L, 255L, 59L, 255L, 238L, 82L, 255L, 3L, 1L, 59L, 255L, 
     27L, 254L, 114L, 8L, 255L, 255L, 255L, 8L, 255L, 6L, 250L, 
     86L, 255L), dst_host_same_srv_rate = c(0.04, 0, 0.61, 1, 
     0.31, 1, 0.11, 1, 1, 0.5, 0.5, 0.51, 0.01, 0.73, 0.16, 1, 
     1, 1, 1, 0.07, 0.07, 0.34, 1, 1, 0.03, 0.05, 1, 1, 1, 1, 
     0.99, 0.52, 1, 0.01, 0, 0.24, 1, 0.51, 1, 0.38, 0.03, 1, 
     1, 1, 0.03, 1, 0.02, 0.98, 0.34, 1), dst_host_diff_srv_rate = c(0.06, 
     0.06, 0.04, 0, 0.17, 0, 0.72, 0, 0, 0.08, 0.01, 0.03, 0.07, 
     0.07, 0.05, 0, 0, 0, 0, 0.07, 0.05, 0.01, 0, 0, 0.06, 0.06, 
     0, 0, 0, 0, 0.01, 0.06, 0, 0.58, 1, 0.03, 0, 0.08, 0.01, 
     0.01, 0.06, 0, 0, 0, 0.06, 0, 0.07, 0.01, 0.03, 0), dst_host_same_src_port_rate = c(0, 
     0, 0.61, 1, 0.03, 0.01, 0, 0, 0.01, 0.02, 0, 0, 0, 0, 0.03, 
     0.01, 0, 0.03, 1, 0, 0, 0.01, 0, 0.03, 0, 0, 0.01, 0, 1, 
     0.83, 0, 0.01, 0.05, 0.99, 0, 0.01, 0, 0.02, 0, 0.38, 0, 
     0.08, 0.03, 0, 0, 0, 0, 0, 0.01, 0.26), dst_host_srv_diff_host_rate = c(0, 
     0, 0.02, 0.28, 0.02, 0.03, 0, 0, 0.03, 0, 0, 0, 0, 0, 0.04, 
     0.04, 0, 0.05, 0, 0, 0, 0, 0, 0.02, 0, 0, 0.01, 0, 0.14, 
     0, 0, 0, 0.02, 0, 0, 0.03, 0, 0, 0, 0.02, 0, 0.01, 0.04, 
     0, 0, 0, 0, 0, 0.03, 0), dst_host_serror_rate = c(0, 0, 0, 
     0, 0, 0.01, 0, 0.01, 0, 0, 0, 0, 0, 0.69, 0, 0, 0, 0, 0, 
     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.01, 0.05, 0, 0, 0.01, 
     0, 0, 0, 0, 1, 0, 0.03, 0, 0, 0, 1, 0, 1, 0), dst_host_srv_serror_rate = c(0, 
     0, 0, 0, 0, 0, 0, 0.01, 0, 0, 0, 0, 0, 0.95, 0.77, 0, 0, 
     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0), dst_host_rerror_rate = c(1, 
     1, 0, 0, 0.83, 0, 0.72, 0.02, 0, 0, 0.66, 0.33, 1, 0.02, 
     0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0.07, 0, 
     0, 0.01, 0.96, 0.89, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0.06, 
     0, 0), dst_host_srv_rerror_rate = c(1, 1, 0, 0, 0.71, 0, 
     0.04, 0.02, 0, 0, 0.32, 0, 1, 0, 0.07, 0, 0, 0, 0, 1, 1, 
     0, 0, 0, 1, 1, 0, 0, 0, 0, 0.07, 0, 0, 0, 1, 0.95, 0, 0, 
     0, 0, 0, 0, 0, 0, 1, 0, 0, 0.06, 0, 0)), row.names = c(NA,  50L), class = "data.frame")
DanY
  • 5,920
  • 1
  • 13
  • 33
  • Welcome to SO! This community has a few [rules](https://stackoverflow.com/help/on-topic) and [norms](https://stackoverflow.com/help/how-to-ask) and following them will help you get a good answer to your question. In particular, it’s best to provide an [MCVE](https://stackoverflow.com/help/mcve) (a minimum, complete, and verifiable example). Good advice for R-specific MVCEs is available [here](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example/5963610#5963610) and [here](https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html). – DanY Sep 11 '18 at 05:08

1 Answers1

0

welcome to Stack Overflow and thank you for reading the Tour.

You do not include the data for y so I cannot exactly reproduce your example. Nevertheless, I can explain why this is happening and how to fix it. To do so, I will use the built-in mtcars data set. This data has a binary variable. am indicates whether or not the vehicle has an automatic transmission. Since it is not given as a factor, I start by changing it to one. Then use rpart much as you did.

library(rpart)
mtcars$am = factor(mtcars$am)
model2 <- rpart(am~., data = mtcars)
test2pred <- predict(model2, newdata = mtcars, type = "prob")

You then change test2pred to a factor. I want to be able to look at the output of predict, so in converting it to a factor, I will give it a new name.

test2pred.fact
anomaly  normal 
     32      32 

Notice that the mtcars data set has 32 instances and, like in your example, it produced one "anomaly" and one "normal" per instance. Let's look at why. To do so, we need to go back to what came out of predict.

head(test2pred)
                          0          1
Mazda RX4         0.1428571 0.85714286
Mazda RX4 Wag     0.1428571 0.85714286
Datsun 710        0.1428571 0.85714286
Hornet 4 Drive    0.9444444 0.05555556
Hornet Sportabout 0.9444444 0.05555556
Valiant           0.9444444 0.05555556

By specifying type = "prob", your predict statement asked to return the probabilities. The probability of yes and of no. If one of these is > 0.7, the other is necessarily < 0.7. I think what you meant to do was just look at whether the probability of a yes was > 0.7. So instead of

test2pred.fact <- factor(ifelse(test2pred>0.7,"normal","anomaly"))

just test the first probability with:

test2pred.fact <- factor(ifelse(test2pred[,1]>0.7,"normal","anomaly"))

For my mtcars example, this gives:

table(test2pred.fact)
test2pred.fact
anomaly  normal 
     14      18

Now some of each totaling to the right number of instances.

G5W
  • 36,531
  • 10
  • 47
  • 80