0

When ever I put this line of code:

> m1 <- knn( train = trainSetNorm[,c(1:41)], test = testSetNorm[,c(1:41)], 
cl = trainSetNorm[,c(42)], k = 703)

I get the following error:

Error in knn(train = trainSetNorm[, c(1:41)], test = testSetNorm[, c(1:41)],: 
k = 0 must be at least 1







I used the KDD Cup 99 corrected as the corrected 10 percent data set. The 10 percent is used and the training data and the corrected is used as the test data. Here's the exact code:

setwd("C:/Users/admin/Desktop/BIGLOU")

Load Data Set

testSet <- read.delim('corrected', sep = ',', header = FALSE)
colnames(testSet) <- c("duration", "protocol_type", "service", "flag", 
"src_bytes", "dst_bytes", "land", 
                    "wrong_fragment", "urgent", "hot", "num_failed_logins", 
"logged_in", "num_compromised",
                    "root_shell", "su_attempted", "num_root", 
"num_file_creations", "num_shells", 
                    "num_access_files", "num_outbound_cmds", 
"is_host_login","is_guest_login", "count", "srv_count", 
                    "serror_rate", "srv_serror_rate", "rerror_rate", 
"srv_rerror_rate", "same_srv_rate", "diff_srv_rate",
                    "srv_diff_host_rate", "dst_host_count", 
"dst_host_srv_count", "dst_host_same_srv_rate", 
                    "dst_host_diff_srv_rate", "dst_host_same_src_port_rate", 
"dst_host_srv_diff_host_rate",
                    "dst_host_serror_rate", "dst_host_srv_serror_rate", 
"dst_host_rerror_rate", "dst_host_srv_rerror_rate",
                    "connection_type")

trainSet <- read.delim('kddcup.data_10_percent_corrected', sep = ',', header = FALSE)
colnames(trainSet) <- c("duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", "land", 
                    "wrong_fragment", "urgent", "hot", "num_failed_logins", 
"logged_in", "num_compromised",
                    "root_shell", "su_attempted", "num_root", 
"num_file_creations", "num_shells", 
                    "num_access_files", "num_outbound_cmds", 
"is_host_login","is_guest_login", "count", "srv_count", 
                    "serror_rate", "srv_serror_rate", "rerror_rate", 
"srv_rerror_rate", "same_srv_rate", "diff_srv_rate",
                    "srv_diff_host_rate", "dst_host_count", 
"dst_host_srv_count", "dst_host_same_srv_rate", 
                    "dst_host_diff_srv_rate", "dst_host_same_src_port_rate", 
"dst_host_srv_diff_host_rate",
                    "dst_host_serror_rate", "dst_host_srv_serror_rate", 
"dst_host_rerror_rate", "dst_host_srv_rerror_rate",
                    "connection_type")

Change Catagorical Values to Numerical Values.

testSet[,c(2)] <- as.integer(testSet[,c(2)])
testSet[,c(3)] <- as.integer(testSet[,c(3)])
testSet[,c(4)] <- as.integer(testSet[,c(4)])
testSet[,c(7)] <- as.integer(testSet[,c(7)])
testSet[,c(12)] <- as.integer(testSet[,c(12)])
testSet[,c(21)] <- as.integer(testSet[,c(21)])
testSet[,c(22)] <- as.integer(testSet[,c(22)])

trainSet[,c(2)] <- as.integer(trainSet[,c(2)])
trainSet[,c(3)] <- as.integer(trainSet[,c(3)])
trainSet[,c(4)] <- as.integer(trainSet[,c(4)])
trainSet[,c(7)] <- as.integer(trainSet[,c(7)])
trainSet[,c(12)] <- as.integer(trainSet[,c(12)])
trainSet[,c(21)] <- as.integer(trainSet[,c(21)])
trainSet[,c(22)] <- as.integer(trainSet[,c(22)])

Randomize the Data Set

set.seed(60223)
rand <-runif(nrow(testSet))
testSet <- testSet[order(rand),]
set.seed(12558)
rand <-runif(nrow(trainSet))
trainSet <- trainSet[order(rand),]

Normalize the input data

normalize <- function(x) {return( abs((x - min(x))/(max(x) - min(x))))}
testSetNorm <- as.data.frame(lapply(testSet[,c(1:41)],normalize))
trainSetNorm <- as.data.frame(lapply(trainSet[,c(1:41)],normalize))

testSetNorm <-cbind(testSetNorm, testSet[,c(42)])
colnames(testSet)[42] <- "connection_type"
trainSetNorm <-cbind(trainSetNorm, trainSet[,c(42)])
colnames(trainSet)[42] <- "connection_type"

Remove any missing data points

trainSetNorm <- trainSetNorm[complete.cases(trainSetNorm),]
testSetNorm <- testSetNorm[complete.cases(testSetNorm),]


require(class)
m1 <- knn( train = trainSetNorm[,c(1:41)], test = testSetNorm[,c(1:41)], cl = trainSetNorm[,c(42)], k = 703)
Cœur
  • 37,241
  • 25
  • 195
  • 267

0 Answers0