I am comparing Keras with Random forest. I follow research paper and it give keras model is high accuracy as compare to random forest model, but when i implemented it not give me. Accuracy and STD of RF* 0.997 0.0006 Accuracy of Keras 0.0079
#Importing dataset
dataset = pd.read_csv('KDD_Dataset.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 41:42].values
from sklearn.preprocessing import LabelEncoder
labelencoder_X = LabelEncoder()
X[:,0] = labelencoder_X.fit_transform(X[:,0])
X[:,1] = labelencoder_X.fit_transform(X[:,1])
X[:,2] = labelencoder_X.fit_transform(X[:,2])
#
from sklearn.preprocessing import OneHotEncoder
onehotencoder_0 = OneHotEncoder(categorical_features=[0])
onehotencoder_1 = OneHotEncoder(categorical_features=[1])
onehotencoder_2 = OneHotEncoder(categorical_features=[2])
X = onehotencoder_0.fit_transform(X).toarray()
X = onehotencoder_1.fit_transform(X).toarray()
X = onehotencoder_2.fit_transform(X).toarray()
Encoding categorical data y
from sklearn.preprocessing import LabelEncoder
labelencoder_y = LabelEncoder()
y = labelencoder_y.fit_transform(y)
max(y)
Splitting the dataset into the Training set and Test set
#from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size = 0.2,
random_state = 1)
"""sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)"""
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
Fitting Random Forest Classification to the Training set
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 500,
criterion = 'entropy',
random_state = 0,
oob_score = True)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator= classifier,
X = X_train,
y = y_train,
cv=10)
accuracies_mean = accuracies.mean()
accuracies_std = accuracies.std()
print("Accuracy and STD of RF")
print(accuracies_mean)
print(accuracies_std)
Keras Model
model = Sequential()
model.add(Dense(12, input_dim=45, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
from keras import optimizers
numpy.random.seed(7)
import datetime, os
logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_squared_error', optimizer=sgd, metrics=['accuracy'])
sgd = optimizers.SGD(lr=0.01, clipnorm=1.)
model.fit(X_train, y_train,
batch_size=50000,
epochs=10,
verbose=1,
validation_data=(X_test, y_test),
callbacks=None)
y_pred = model.predict(X_test)
score = model.evaluate(X_test, y_test, verbose=1)
suggest me how i can improve accuracy of keras