I have 3 very specific questions:
I am training a regression model-the dateset is small (300)-, and getting a validation accuracy of 1.00 in the 4th epoch till the last ! and training accuracy 0.9957 at the last epoch, while the loss is actually big it is 33 , so I don't know how both the accuracy and loss are very high! using optimizer ADAM and loss (mean_absolute_error)
When scaling the inputs, some values turn to negative despite I don't have any negative value, is that reasonable?And I noticed some similar numbers are not the same after scaling.
when I predict, I should scale the data I am going to predict at the same manner I scaled the inputs,no ? but how can I make sure of that as the inputs are all scaled related to each other in all the rows -as I understand-.
array=SData.to_numpy() array
array([[ 6.25 , 6.25 , 6.25 , ..., 8.11521 , 13.525349, 744.421033], [ 6.25 , 6.25 , 6.25 , ..., 8.989118, 14.981864, 744.484697], [ 6.25 , 6.25 , 6.25 , ..., 8.931293, 14.885489, 744.484629], ..., [ 6.160831, 8.157965, 9.184461, ..., 6.170488, 10.284147, 938.598232], [ 6.160831, 8.157965, 9.184461, ..., 12.417958, 20.696597, 938.291951], [ 6.160831, 8.157965, 9.184461, ..., 6.007829, 10.013048, 938.103987]])
unscaled_inputs=array[:,:9] unscaled_inputs targets=array[:,9:] unscaled_inputs array([[ 6.25 , 6.25 , 6.25 , ..., 6.25 , 6.25 , 0. ], [ 6.25 , 6.25 , 6.25 , ..., 6.25 , 6.25 , 15. ], [ 6.25 , 6.25 , 6.25 , ..., 6.25 , 6.25 , 30. ], ..., [ 6.160831, 8.157965, 9.184461, ..., 8.640023, 8.996907, 45. ], [ 6.160831, 8.157965, 9.184461, ..., 8.640023, 8.996907, 60. ], [ 6.160831, 8.157965, 9.184461, ..., 8.640023, 8.996907, 75. ]])
scaled_inputs=preprocessing.scale(unscaled_inputs)
scaled_inputs
array([[ 0.64061068, -1.55811375, -1.96681483, ..., -0.96073795, -1.709721 , -1.46385011], [ 0.64061068, -1.55811375, -1.96681483, ..., -0.96073795, -1.709721 , -0.87831007], [ 0.64061068, -1.55811375, -1.96681483, ..., -0.96073795, -1.709721 , -0.29277002], ..., [ 0.35930701, 1.56499191, 1.66411229, ..., 0.76559569, 0.84111767, 0.29277002], [ 0.35930701, 1.56499191, 1.66411229, ..., 0.76559569, 0.84111767, 0.87831007], [ 0.35930701, 1.56499191, 1.66411229, ..., 0.76559569, 0.84111767, 1.46385011]])
shuffled_indicies=np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indicies)
shuffled_indicies
array([257, 191, 37, 128, 72, 247, 161, 252, 140, 264, 258, 255, 278, 148, 231, 186, 31, 83, 230, 175, 121, 156, 151, 256, 192, 200, 66, 59, 199, 9, 223, 157, 214, 73, 92, 61, 60, 139, 47, 280, 202, 104, 110, 22, 39, 197, 81, 225, 69, 94, 284, 18, 113, 187, 267, 173, 91, 90, 111, 180, 144, 20, 287, 153, 131, 103, 268, 172, 260, 193, 141, 224, 179, 87, 106, 96, 274, 85, 89, 105, 84, 75, 15, 160, 52, 24, 126, 16, 235, 124, 44, 40, 249, 34, 63, 219, 11, 198, 149, 118, 277, 222, 238, 209, 127, 272, 184, 107, 5, 146, 169, 57, 116, 170, 82, 23, 207, 174, 188, 88, 206, 7, 36, 226, 86, 150, 276, 163, 62, 12, 253, 204, 45, 74, 210, 14, 108, 195, 196, 4, 109, 263, 241, 147, 78, 176, 33, 10, 232, 248, 42, 43, 50, 97, 270, 117, 254, 181, 201, 266, 182, 38, 211, 218, 212, 26, 239, 41, 55, 275, 77, 189, 30, 122, 80, 58, 271, 19, 119, 158, 154, 177, 53, 70, 265, 99, 205, 165, 250, 178, 49, 213, 136, 240, 6, 208, 25, 32, 217, 246, 285, 237, 3, 227, 155, 190, 259, 159, 269, 138, 167, 216, 234, 64, 281, 133, 137, 166, 2, 54, 112, 13, 65, 279, 114, 95, 100, 1, 125, 282, 185, 145, 102, 29, 135, 0, 101, 71, 164, 17, 28, 130, 68, 262, 56, 245, 129, 244, 236, 283, 67, 8, 79, 134, 35, 51, 120, 168, 194, 21, 27, 98, 251, 115, 273, 123, 233, 76, 286, 228, 243, 220, 162, 142, 229, 203, 152, 143, 221, 242, 171, 48, 93, 132, 183, 215, 261, 46])
shuffled_inputs=scaled_inputs[shuffled_indicies]
shuffled_targets=targets[shuffled_indicies]
#define the numcer of observations
observations_count=shuffled_inputs.shape[0]
# 80 10 10 Rule
train_count=int(0.8 * observations_count)
validation_count=int(0.1 * observations_count )
test_count=observations_count-train_count-validation_count
train_inputs=shuffled_inputs[:train_count]
train_targets=shuffled_targets[:train_count]
validation_inputs=shuffled_inputs[train_count:train_count+validation_count]
validation_targets=shuffled_targets[train_count:train_count+validation_count]
test_inputs=shuffled_inputs[train_count+validation_count:]
test_targets=shuffled_targets[train_count+validation_count:]
np.savez('Sample_Data_Train',inputs=train_inputs,targets=train_targets)
np.savez('Sample_Data_Validation',inputs=validation_inputs,targets=validation_targets)
np.savez('Sample_Data_Test',inputs=test_inputs,targets=test_targets)
npz=np.load(r"C:\Users\dai_k\OneDrive\Desktop\GRASSHOPPERS\Second semester\Thesis\samplenpz\Sample_Data_Train.npz")
Processed_train_inputs=npz['inputs'].astype(np.float)
processed_train_targets=npz['targets'].astype(np.float)
npz1=np.load(r"C:\Users\dai_k\OneDrive\Desktop\GRASSHOPPERS\Second semester\Thesis\samplenpz\Sample_Data_Validation.npz")
processed_validation_inputs=npz1['inputs'].astype(np.float)
processed_validation_targets=npz1['targets'].astype(np.float)
npz2=np.load(r"C:\Users\dai_k\OneDrive\Desktop\GRASSHOPPERS\Second semester\Thesis\samplenpz\Sample_Data_Test.npz")
processed_test_inputs=npz2['inputs'].astype(np.float)
processed_test_targets=npz2['targets'].astype(np.float)
output_size=8
hidden_layer_size=100 # START WITH ANY WIDTH - This is a hyperbarameter
model=tf.keras.Sequential([
tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
tf.keras.layers.Dense(output_size, activation='relu')
])
model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['accuracy'])
batch_size=8
max_epochs=30
early_stopping=tf.keras.callbacks.EarlyStopping()
model.fit(Processed_train_inputs,
processed_train_targets,
batch_size=batch_size,
epochs=max_epochs,
callbacks=[early_stopping],
validation_data=(processed_validation_inputs, processed_validation_targets),
verbose=2 )