0

I am working in insider threat detection with US-CERT as Data set in TensorFlow.

Here is the code :

    import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from pandas.api.types import CategoricalDtype

#Use Pandas to create a dataframe
#In windows to get file from path other than same run directory see:
#https://stackoverflow.com/questions/16952632/read-a-csv-into-pandas-from-f-drive-on-windows-7

URL = 'https://raw.githubusercontent.com/dc401/tensorflow-insiderthreat/master/scenario2-training-dataset-transformed-tf.csv'
dataframe = pd.read_csv(URL)
#print(dataframe.head())

#show dataframe details for column types
#print(dataframe.info())

#print(pd.unique(dataframe['user']))
#https://pbpython.com/categorical-encoding.html
dataframe["user"] = dataframe["user"].astype('category')
dataframe["source"] = dataframe["source"].astype('category')
dataframe["action"] = dataframe["action"].astype('category')
dataframe["user_cat"] = dataframe["user"].cat.codes
dataframe["source_cat"] = dataframe["source"].cat.codes
dataframe["action_cat"] = dataframe["action"].cat.codes

#print(dataframe.info())
#print(dataframe.head())

#save dataframe with new columns for future datmapping
dataframe.to_csv('dataframe-export-allcolumns.csv')

#remove old columns
del dataframe["user"]
del dataframe["source"]
del dataframe["action"]
#restore original names of columns
dataframe.rename(columns={"user_cat": "user", "source_cat": "source", "action_cat": "action"}, inplace=True)
print(dataframe.head())
print(dataframe.info())

#save dataframe cleaned up
dataframe.to_csv('dataframe-export-int-cleaned.csv')


#Split the dataframe into train, validation, and test
train, test = train_test_split(dataframe, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

#Create an input pipeline using tf.data
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('insiderthreat')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds


#choose columns needed for calculations (features)
feature_columns = []
for header in ["vector", "date", "user", "source", "action"]:
    feature_columns.append(feature_column.numeric_column(header))

#create feature layer
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

#set batch size pipeline
batch_size = 32
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

#create compile and train model
model = tf.keras.Sequential([
  feature_layer,
  layers.Dense(128, activation='relu'),
  layers.Dense(128, activation='relu'),
  layers.Dense(1)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train_ds,
          validation_data=val_ds,
          epochs=5)

loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy)

The Accuracy is between 60% and 50% so I want to transform this neural network into convolution neural network in order to increase the accuracy. I think the key is to add more layers here like

#create compile and train model
model = tf.keras.Sequential([
  feature_layer,
  layers.Dense(128, activation='relu'),
  layers.Dense(128, activation='relu'),
  layers.Dense(1)
])

The question is how do I do to transform this model into convolution neural network ?

2 Answers2

1

You just replace the dense layers with convolution and pooling layers:

#create compile and train model
model = tf.keras.Sequential([
    feature_layer,
    layers.Conv1D(*args),
    layers.MaxPooling1D(*args),
    layers.Flatten(),
    layers.Dense(1)
])

You can use the tensorflow documentation on CNNs to piece together what arguments you should use and how to further customize your application.

Philip Ciunkiewicz
  • 2,652
  • 3
  • 12
  • 24
0

A CNN consists of:

  • Convolutional Layers
  • Fully Connected Layers (The typical neural network with the Dense layers).

So, the architecture of a simple CNN looks like this:

enter image description here

After the CNN layers are flattened, you can start adding dense layers, just like you are doing in your code.

The CNN of above image looks like this:

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Input(28,28,1))

# Adding Convolutional Layers here:
model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, strides=1, activation='relu')
model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, strides=2, activation='relu')
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=1, activation='relu')
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, strides=2, activation='relu')

model.add(tf.keras.layers.Flatten())

# This is where you add the Fully-Connected layers.
model.add(tf.keras.layers.Dense(units=128, activation='relu'))
model.add(tf.keras.layers.Dense(units=10, activation='sigmoid'))

model.summary()

I found this great repository, which contains an example of creating a simple, easy to understand, but powerful CNN with tensorflow: https://github.com/kochlisGit/Tensorflow-State-of-the-Art-Neural-Networks/tree/master/MNIST-State-of-the-Art

LazyAnalyst
  • 426
  • 3
  • 16