I have good knowledge in the theory of machine learning and neural networks however have a little of it when working with its code. Here i am using tensorflow along with python in order to implement a Multi-layer Perceptron in order to detect fraudulent transaction out from normal ones in a labeled credit card transactions dataset. I started up from a MLP sample built using tensorflow and edited it to fit my data however it seems that it couldn't recognize any fraudulent transaction after a 1000 epoch!!. Now I'm confused, and i couldn't figure out if the problem is in the training or in the output representation (I print out both of the accuracy and the precision at the end where i'm concerned about the precision much more since i have a highly unbalanced dataset where the accuracy will be mostly about 99% in such cases).
I highly appreciate any help in the concept or even in the code if there is any problem in it. Below is my code and its corresponding output.
import sys
import os.path
import pandas as pd
import numpy as np
import tensorflow as tf
import sklearn as sk
from datetime import datetime
from sklearn.model_selection import train_test_split
# Choose device from cmd line. Options: gpu or cpu
device_name = sys.argv[1]
if device_name == 'gpu':
device_name = '/gpu:0'
else:
device_name = '/cpu:0'
tf.device(device_name)
print('Device used:' + str(device_name))
datasetCSVPath = sys.argv[2]
parentPath = os.path.abspath(os.path.join(datasetCSVPath, os.pardir))
print(datasetCSVPath)
print(parentPath)
print('Reading dataset...')
df = pd.read_csv(datasetCSVPath, index_col=False)
print('Adding Class inverse column...')
clsInv = [ 1 - row.Class for index, row in df.iterrows() ]
df = df.assign(classInverse = clsInv)
print('')
print('Raw Data: \n------------------------------ \n')
print(df.head())
print('')
print('Whole dataset size: ' + str(len(df)))
print('')
print('Data Discription\n------------------------------ \n')
print(df.describe())
print('')
X = pd.DataFrame(df,columns=['Time','V1','V2','V3','V4','V5','V6','V7','V8','V9','V10','V11','V12','V13','V14','V15','V16','V17','V18','V19','V20','V21','V22','V23','V24','V25','V26','V27','V28','Amount'])
print('Features Vector:\n------------------------------ \n')
print(X.head())
print('')
Y = pd.DataFrame(df,columns=['Class','classInverse'])
print('Class Vector:\n------------------------------ \n')
print(Y.head())
print('')
print('Splitting Features and Class vectors into training and validation sets by 70/30 ratio...')
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.3, shuffle=False)
print('Train features set size: ' + str(len(X_train)))
print('Train Class size: ' + str(len(Y_train)))
print('Train fraud frequency: ')
print(Y_train['Class'].value_counts())
print('')
print('Test features size: ' + str(len(X_test)))
print('Test Class size: ' + str(len(Y_test)))
print('Train fraud frequency: ')
print(Y_test['Class'].value_counts())
# Parameters
learning_rate = 0.001
training_epochs = 100
batch_size = 100
display_step = 1
# Network Parameters
n_hidden_1 = 10 # 1st layer number of features
n_hidden_2 = 10 # 2nd layer number of features
n_input = 30 # Number of feature
n_classes = 2 # Number of classes to predict
# tf Graph input
# x = tf.placeholder(tf.float32, [101, n_input])
# y = tf.placeholder(tf.float32)
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Implement get_GPU_tensorflow_session method, to use CPU just return tf.Session() instead
def get_session():
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
return tf.Session(config=config)
print('')
startTime = datetime.now()
print("Started at: ", startTime)
print('')
saver = tf.train.Saver()
# Launch the graph
with get_session() as sess:
if os.path.isfile(parentPath + "/model.ckpt"):
saver.restore(sess, parentPath + "/model.ckpt")
else:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(len(X_train)/batch_size)
X_batches = np.array_split(X_train, total_batch)
Y_batches = np.array_split(Y_train, total_batch)
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = X_batches[i], Y_batches[i]
#print(batch_x)
#print(batch_y)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
save_path = saver.save(sess, parentPath + "/model.ckpt")
print('')
print('Optimization Finished:\n------------------------------ \n')
print("Model saved in path: %s" % save_path)
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float" ))
print("Accuracy:", accuracy.eval({x: X_test, y: Y_test}))
# Calculate Precision
y_p = tf.argmax(pred, 1)
val_accuracy, y_pred = sess.run([accuracy, y_p], feed_dict={x:X_test, y:Y_test})
y_true = np.array(Y_test['classInverse'],dtype=np.int)
np.savetxt(parentPath + '/y_true.txt', y_true, fmt='%d')
np.savetxt(parentPath + '/y_pred.txt', y_pred, fmt='%d')
print('y_predicted length:',y_pred.size)
df_pred = pd.DataFrame(data=y_pred)
print(df_pred[0].value_counts())
print('')
print('y_true labels length:',y_true.size)
df_true = pd.DataFrame(data=y_true)
print(df_true[0].value_counts())
# print("Precision,0,macro:", sk.metrics.precision_score(y_true, y_pred, pos_label=0, average='macro'))
print("Recall", sk.metrics.recall_score(y_true, y_pred))
print("\nconfusion_matrix:\n")
print(sk.metrics.confusion_matrix(y_true, y_pred))
print('')
print("Time taken:", datetime.now() - startTime)
global result
result = tf.argmax(pred, 1).eval({x: X_test, y: Y_test})
Output:
E:\Karbala\CreditCard>python Trained.py cpu CC_All.csv
Device used:/cpu:0
CC_All.csv
E:\Karbala\CreditCard
Reading dataset...
Adding Class inverse column...
Raw Data:
------------------------------
Unnamed: 0 Time V1 V2 V3 V4 V5 \
0 1 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321
1 2 0.0 1.191857 0.266151 0.166480 0.448154 0.060018
2 3 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198
3 4 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309
4 5 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193
V6 V7 V8 ... V22 V23 V24 \
0 0.462388 0.239599 0.098698 ... 0.277838 -0.110474 0.066928
1 -0.082361 -0.078803 0.085102 ... -0.638672 0.101288 -0.339846
2 1.800499 0.791461 0.247676 ... 0.771679 0.909412 -0.689281
3 1.247203 0.237609 0.377436 ... 0.005274 -0.190321 -1.175575
4 0.095921 0.592941 -0.270533 ... 0.798278 -0.137458 0.141267
V25 V26 V27 V28 Amount Class classInverse
0 0.128539 -0.189115 0.133558 -0.021053 149.62 0 1.0
1 0.167170 0.125895 -0.008983 0.014724 2.69 0 1.0
2 -0.327642 -0.139097 -0.055353 -0.059752 378.66 0 1.0
3 0.647376 -0.221929 0.062723 0.061458 123.50 0 1.0
4 -0.206010 0.502292 0.219422 0.215153 69.99 0 1.0
[5 rows x 33 columns]
Whole dataset size: 284807
Data Discription
------------------------------
Unnamed: 0 Time V1 V2 V3 \
count 284807.000000 284807.000000 2.848070e+05 2.848070e+05 2.848070e+05
mean 142404.000000 94813.859575 1.165980e-15 3.416908e-16 -1.373150e-15
std 82216.843396 47488.145955 1.958696e+00 1.651309e+00 1.516255e+00
min 1.000000 0.000000 -5.640751e+01 -7.271573e+01 -4.832559e+01
25% 71202.500000 54201.500000 -9.203734e-01 -5.985499e-01 -8.903648e-01
50% 142404.000000 84692.000000 1.810880e-02 6.548556e-02 1.798463e-01
75% 213605.500000 139320.500000 1.315642e+00 8.037239e-01 1.027196e+00
max 284807.000000 172792.000000 2.454930e+00 2.205773e+01 9.382558e+00
V4 V5 V6 V7 V8 \
count 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05
mean 2.086869e-15 9.604066e-16 1.490107e-15 -5.556467e-16 1.177556e-16
std 1.415869e+00 1.380247e+00 1.332271e+00 1.237094e+00 1.194353e+00
min -5.683171e+00 -1.137433e+02 -2.616051e+01 -4.355724e+01 -7.321672e+01
25% -8.486401e-01 -6.915971e-01 -7.682956e-01 -5.540759e-01 -2.086297e-01
50% -1.984653e-02 -5.433583e-02 -2.741871e-01 4.010308e-02 2.235804e-02
75% 7.433413e-01 6.119264e-01 3.985649e-01 5.704361e-01 3.273459e-01
max 1.687534e+01 3.480167e+01 7.330163e+01 1.205895e+02 2.000721e+01
... V22 V23 V24 V25 \
count ... 2.848070e+05 2.848070e+05 2.848070e+05 2.848070e+05
mean ... -3.444850e-16 2.578648e-16 4.471968e-15 5.340915e-16
std ... 7.257016e-01 6.244603e-01 6.056471e-01 5.212781e-01
min ... -1.093314e+01 -4.480774e+01 -2.836627e+00 -1.029540e+01
25% ... -5.423504e-01 -1.618463e-01 -3.545861e-01 -3.171451e-01
50% ... 6.781943e-03 -1.119293e-02 4.097606e-02 1.659350e-02
75% ... 5.285536e-01 1.476421e-01 4.395266e-01 3.507156e-01
max ... 1.050309e+01 2.252841e+01 4.584549e+00 7.519589e+00
V26 V27 V28 Amount Class \
count 2.848070e+05 2.848070e+05 2.848070e+05 284807.000000 284807.000000
mean 1.687098e-15 -3.666453e-16 -1.220404e-16 88.349619 0.001727
std 4.822270e-01 4.036325e-01 3.300833e-01 250.120109 0.041527
min -2.604551e+00 -2.256568e+01 -1.543008e+01 0.000000 0.000000
25% -3.269839e-01 -7.083953e-02 -5.295979e-02 5.600000 0.000000
50% -5.213911e-02 1.342146e-03 1.124383e-02 22.000000 0.000000
75% 2.409522e-01 9.104512e-02 7.827995e-02 77.165000 0.000000
max 3.517346e+00 3.161220e+01 3.384781e+01 25691.160000 1.000000
classInverse
count 284807.000000
mean 0.998273
std 0.041527
min 0.000000
25% 1.000000
50% 1.000000
75% 1.000000
max 1.000000
[8 rows x 33 columns]
Features Vector:
------------------------------
Time V1 V2 V3 V4 V5 V6 V7 \
0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599
1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803
2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461
3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609
4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941
V8 V9 ... V20 V21 V22 V23 \
0 0.098698 0.363787 ... 0.251412 -0.018307 0.277838 -0.110474
1 0.085102 -0.255425 ... -0.069083 -0.225775 -0.638672 0.101288
2 0.247676 -1.514654 ... 0.524980 0.247998 0.771679 0.909412
3 0.377436 -1.387024 ... -0.208038 -0.108300 0.005274 -0.190321
4 -0.270533 0.817739 ... 0.408542 -0.009431 0.798278 -0.137458
V24 V25 V26 V27 V28 Amount
0 0.066928 0.128539 -0.189115 0.133558 -0.021053 149.62
1 -0.339846 0.167170 0.125895 -0.008983 0.014724 2.69
2 -0.689281 -0.327642 -0.139097 -0.055353 -0.059752 378.66
3 -1.175575 0.647376 -0.221929 0.062723 0.061458 123.50
4 0.141267 -0.206010 0.502292 0.219422 0.215153 69.99
[5 rows x 30 columns]
Class Vector:
------------------------------
Class classInverse
0 0 1.0
1 0 1.0
2 0 1.0
3 0 1.0
4 0 1.0
Splitting Features and Class vectors into training and validation sets by 70/30 ratio...
Train features set size: 199364
Train Class size: 199364
Train fraud frequency:
0 198980
1 384
Name: Class, dtype: int64
Test features size: 85443
Test Class size: 85443
Train fraud frequency:
0 85335
1 108
Name: Class, dtype: int64
Started at: 2018-05-23 16:14:59.137501
2018-05-23 16:14:59.149501: I T:\src\github\tensorflow\tensorflow\core\platform\cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
Epoch: 0001 cost= 15.756240782
Epoch: 0002 cost= 13.016147260
Epoch: 0003 cost= 12.134617616
Epoch: 0004 cost= 12.069568292
Epoch: 0005 cost= 11.135412584
Epoch: 0006 cost= 9.884679892
Epoch: 0007 cost= 8.383180135
Epoch: 0008 cost= 6.902679826
Epoch: 0009 cost= 6.774688792
Epoch: 0010 cost= 4.422019803
Epoch: 0011 cost= 1.476535059
Epoch: 0012 cost= 0.404204394
Epoch: 0013 cost= 0.226037890
Epoch: 0014 cost= 0.039981139
Epoch: 0015 cost= 0.026224809
Epoch: 0016 cost= 0.022262489
Epoch: 0017 cost= 0.021467826
Epoch: 0018 cost= 0.020389822
Epoch: 0019 cost= 0.019285675
Epoch: 0020 cost= 0.020210635
Epoch: 0021 cost= 0.019544568
Epoch: 0022 cost= 0.021737715
Epoch: 0023 cost= 0.022598267
Epoch: 0024 cost= 0.016776631
Epoch: 0025 cost= 0.016166327
Epoch: 0026 cost= 0.015632419
Epoch: 0027 cost= 0.019071494
Epoch: 0028 cost= 0.014936624
Epoch: 0029 cost= 0.016047536
Epoch: 0030 cost= 0.050594513
Epoch: 0031 cost= 0.016173221
Epoch: 0032 cost= 0.040230576
Epoch: 0033 cost= 0.015785579
Epoch: 0034 cost= 0.016168447
Epoch: 0035 cost= 0.016222776
Epoch: 0036 cost= 0.015788947
Epoch: 0037 cost= 0.015013909
Epoch: 0038 cost= 0.015142855
Epoch: 0039 cost= 0.015545011
Epoch: 0040 cost= 0.014925630
Epoch: 0041 cost= 0.023263962
Epoch: 0042 cost= 0.014892573
Epoch: 0043 cost= 0.015483862
Epoch: 0044 cost= 0.014753576
Epoch: 0045 cost= 0.015170122
Epoch: 0046 cost= 0.014410569
Epoch: 0047 cost= 0.014892267
Epoch: 0048 cost= 0.014679266
Epoch: 0049 cost= 0.015411067
Epoch: 0050 cost= 0.015130163
Epoch: 0051 cost= 0.018912251
Epoch: 0052 cost= 0.031489589
Epoch: 0053 cost= 0.016441514
Epoch: 0054 cost= 0.015934747
Epoch: 0055 cost= 0.015185592
Epoch: 0056 cost= 0.014962219
Epoch: 0057 cost= 0.014789902
Epoch: 0058 cost= 0.014183648
Epoch: 0059 cost= 0.014771792
Epoch: 0060 cost= 0.013984746
Epoch: 0061 cost= 0.013986725
Epoch: 0062 cost= 0.013990105
Epoch: 0063 cost= 0.013991248
Epoch: 0064 cost= 0.013991301
Epoch: 0065 cost= 0.014417812
Epoch: 0066 cost= 0.015127911
Epoch: 0067 cost= 0.014091956
Epoch: 0068 cost= 0.014594984
Epoch: 0069 cost= 0.014839828
Epoch: 0070 cost= 0.013982915
Epoch: 0071 cost= 0.013986001
Epoch: 0072 cost= 0.013989394
Epoch: 0073 cost= 0.026697119
Epoch: 0074 cost= 0.060450242
Epoch: 0075 cost= 0.015880147
Epoch: 0076 cost= 0.015730870
Epoch: 0077 cost= 0.015402611
Epoch: 0078 cost= 0.014916952
Epoch: 0079 cost= 0.014984660
Epoch: 0080 cost= 0.015820000
Epoch: 0081 cost= 0.014817713
Epoch: 0082 cost= 0.014412518
Epoch: 0083 cost= 0.015078428
Epoch: 0084 cost= 0.014607639
Epoch: 0085 cost= 0.014731142
Epoch: 0086 cost= 0.014768947
Epoch: 0087 cost= 0.014705300
Epoch: 0088 cost= 0.042353919
Epoch: 0089 cost= 0.014150210
Epoch: 0090 cost= 0.014066173
Epoch: 0091 cost= 0.014510406
Epoch: 0092 cost= 0.014101755
Epoch: 0093 cost= 0.014870159
Epoch: 0094 cost= 0.013983156
Epoch: 0095 cost= 0.013988206
Epoch: 0096 cost= 0.013990270
Epoch: 0097 cost= 0.013990550
Epoch: 0098 cost= 0.013990402
Epoch: 0099 cost= 0.021223181
Epoch: 0100 cost= 0.014299056
Optimization Finished:
------------------------------
Model saved in path: E:\Karbala\CreditCard/model.ckpt
Accuracy: 0.998736
y_predicted length: 85443
1 85443
Name: 0, dtype: int64
y_true labels length: 85443
1 85335
0 108
Name: 0, dtype: int64
Recall 1.0
confusion_matrix:
[[ 0 108]
[ 0 85335]]
Time taken: 0:02:10.352000