I am trying to define a custom loss function in tensorflow that penalizes false positives and false negatives based on the answer from this post. I have to modify the code that calculates specificity and recall quite a bit because I have a multiclass classification problem and the problem from the post is only binary classification. I am training with my images stored in ImageDataGenerator
objects if it matters.
What the loss function does is as follows.
- Convert the logits in
y_pred
and one-hot encoded classes iny_true
to a sparse numerical vector (eg. [0, 2, 1, 1]) for each batch - Instantiate counters for the True Positives, True Negatives, False Positives, and False Negatives (TPx, TNx, FPx, FNx - x being either 0, 1, or 2 depending on the class). The gargantuan
if
andelif
statements basically count each spot in the confusion matrix, because a 3x3 confusion matrix is significantly more complicated than a 2x2 confusion matrix. It simply adds up the metric per class (TP_g, TN_g, FP_g, FN_g) to get the total metric. - Converts the added metrics to a tensorflow tensor (I stole that part from the aforementioned post)
- Calculates specificity and recall, then subtracts a weighted sum from
1.0
to return the total loss for the batch.
Here is the loss function that I've defined:
def myLossFcn(y_true, y_pred, recall_weight, spec_weight):
#benign ==0
#hyperplastic ==1
#neoplastic ==2
y_true = np.argmax(y_true, axis=1)
y_pred = np.argmax(y_pred, axis=1)
y_true = tensorflow.cast(y_true, tensorflow.float32)
y_pred = tensorflow.cast(y_pred, tensorflow.float32)
print('y_true:', y_true)
print('y_pred:', y_pred)
#true positives for all classes
TP0 = 0
TP1 = 0
TP2 = 0
for i in range(len(y_true)):
if y_true[i] == 0 and y_pred[i] == 0:
TP0 += 1 #benign true positive
elif y_true[i] == 1 and y_pred[i] == 1:
TP1 += 1 #hyperplastic true positive
elif y_true[i] == 2 and y_pred[i] ==2: #neoplastic true positive
TP2 += 1
TP_g = TP0 + TP1 + TP2 #num true positives total (per batch)
#true negatives for all classes
TN0 = 0
TN1 = 0
TN2 = 0
for i in range(len(y_true)):
if (y_true[i] == 1 and y_pred[i] == 1) or (y_true[i] == 1 and y_pred[i] == 2) or (y_true[i] == 2 and y_pred[i] == 1) or (y_true[i] == 2 and y_pred[i] == 2):
TN0 +=1
elif (y_true[i] == 0 and y_pred[i] == 0) or (y_true[i] == 0 and y_pred[i] == 2) or (y_true[i] == 2 and y_pred[i] == 0) or (y_true[i] == 2 and y_pred[i] == 2):
TN1 +=1
elif (y_true[i] == 0 and y_pred[i] == 0) or (y_true[i] == 0 and y_pred[i] == 1) or (y_true[i] == 1 and y_pred[i] == 0) or (y_true[i] == 1 and y_pred[i] == 1):
TN2 +=1
TN_g = TN0 + TN1 + TN2
#false positives for all classes
FP0 = 0
FP1 = 0
FP2 = 0
for i in range(len(y_true)):
if (y_true[i] == 0 and y_pred[i] == 1) or (y_true[i] == 0 and y_pred[i] == 2):
FP0 +=1
elif (y_true[i] == 1 and y_pred[i] == 0) or (y_true[i] == 1 and y_pred[i] == 2):
FP1 +=1
elif (y_true[i] == 0 and y_pred[i] == 2) or (y_true[i] == 1 and y_pred[i] == 2):
FP2 +=1
FP_g = FP0 + FP1 + FP2
#false negatives for all classes
FN0 = 0
FN1 = 0
FN2 = 0
for i in range(len(y_true)):
if (y_true[i] == 0 and y_pred[i] == 1) or (y_true[i] == 0 and y_pred[i] == 2):
FN0 +=1
elif (y_true[i] == 1 and y_pred[i] == 0) or (y_true[i] == 1 and y_pred[i] == 2):
FN1 += 1
elif (y_true[i] == 0 and y_pred[i] == 1) or (y_true[i] == 1 and y_pred[i] == 2):
FN2 +=1
FN_g = FN0 + FN1 + FN2
#Converted as Keras Tensors
TP_g = K.sum(K.variable(TP_g))
TN_g = K.sum(K.variable(TN_g))
FP_g = K.sum(K.variable(FP_g))
FN_g = K.sum(K.variable(FN_g))
print(TP_g)
print(TN_g)
print(FP_g)
print(FN_g)
specificity = TN_g / (TN_g + FP_g + K.epsilon())
recall = TP_g / (TP_g + FN_g + K.epsilon())
print('spec:', specificity)
print('recall:', recall)
loss = 1.0 - (recall_weight*recall + spec_weight*specificity)
print('loss:', loss)
return tensorflow.constant(loss)
Following the previous post, I instantiate a function wrapper to pass in the weights for specificity and recall, then start training:
def custom_loss(recall_weight, spec_weight):
def recall_spec_loss(y_true, y_pred):
return myLossFcn(y_true, y_pred, recall_weight, spec_weight)
return recall_spec_loss
model = tensorflow.keras.applications.resnet50.ResNet50(weights=None,
input_shape=(100,100,1),
pooling=max,
classes=3)
loss = custom_loss(recall_weight=0.9, spec_weight=0.1)
model.compile(optimizer=hyperparameters['optimizer'],
loss=loss,
metrics=['accuracy', tensorflow.keras.metrics.FalseNegatives()],
run_eagerly=True)
history = model.fit(train_set,
epochs=50,
callbacks=[model_checkpoint],
validation_data=val_set,
verbose=2)
When I run my code, I get an error back
ValueError: No gradients provided for any variable: [FOR BREVITY I WILL NOT COPY+PASTE ALL OF THE GRADIENT NAMES THAT IT LISTED]
I will also post the output that I receive and the traceback up to that error message:
Found 625 images belonging to 3 classes.
Found 376 images belonging to 3 classes.
Found 252 images belonging to 3 classes.
Epoch 1/50
y_true: tf.Tensor([0. 2. 1. 0.], shape=(4,), dtype=float32)
y_pred: tf.Tensor([0. 0. 0. 0.], shape=(4,), dtype=float32)
tf.Tensor(2.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)
spec: tf.Tensor(0.8, shape=(), dtype=float32)
recall: tf.Tensor(0.6666667, shape=(), dtype=float32)
loss: tf.Tensor(0.32, shape=(), dtype=float32)
Traceback (most recent call last):
File "/home/d/dsussman/dsherman/endo_git_v2/justin_method.py", line 253, in <module>
verbose=2)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 1178, in fit
tmp_logs = self.train_function(iterator)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 850, in train_function
return step_function(self, iterator)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 840, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py", line 1285, in run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py", line 2833, in call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py", line 3608, in _call_for_each_replica
return fn(*args, **kwargs)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/autograph/impl/api.py", line 597, in wrapper
return func(*args, **kwargs)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 833, in run_step
outputs = model.train_step(data)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 794, in train_step
self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py", line 530, in minimize
return self.apply_gradients(grads_and_vars, name=name)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py", line 630, in apply_gradients
grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/optimizer_v2/utils.py", line 76, in filter_empty_gradients
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable:
I have been looking online quite a bit to no avail. I have ensured that all my variables are tensors to the best of my ability as mentioned in this post and looked at this post but I don't really understand what the solution meant by:
Keep in mind that the python function you write (custom_loss) is called to generate and compile a C function. The compiled function is what is called during training. When you python custom_loss function is called, the arguments are tensor objects that don't have data attached to them. The
K.eval
call will fail, as will theK.shape
call
I am also not even sure that the second post is relevant, but it is all I could find on the internet. I am hoping that the solution is as simple as I forgot to do something really obvious, or change something that is easy, but for the life of me I can't figure out what is going wrong.
Any help is greatly appreciated
EDIT
I have updated my loss function so that all the intermediate values are tensorflow Tensors of dtype float32 and am receiving the same errors:
def myLossFcn(y_true, y_pred, recall_weight, spec_weight):
#benign ==0
#hyperplastic ==1
#neoplastic ==2
print('y_true:', y_true)
print('y_pred:', y_pred)
tp = tensorflow.keras.metrics.TruePositives()
tp.update_state(y_pred, y_true)
TP_g = tp.result()
tn = tensorflow.metrics.TrueNegatives()
tn.update_state(y_pred, y_true)
TN_g = tn.result()
fp = tensorflow.keras.metrics.FalsePositives()
fp.update_state(y_pred, y_true)
FP_g = fp.result()
fn = tensorflow.keras.metrics.FalseNegatives()
fn.update_state(y_pred, y_true)
FN_g= fn.result()
print(TP_g)
print(TN_g)
print(FP_g)
print(FN_g)
#Converted as Keras Tensors
TP_g = K.sum(K.variable(TP_g))
TN_g = K.sum(K.variable(TN_g))
FP_g = K.sum(K.variable(FP_g))
FN_g = K.sum(K.variable(FN_g))
print(TP_g)
print(TN_g)
print(FP_g)
print(FN_g)
specificity = TN_g / (TN_g + FP_g + K.epsilon())
recall = TP_g / (TP_g + FN_g + K.epsilon())
print('spec:', specificity)
print('recall:', recall)
loss = 1.0 - (recall_weight*recall + spec_weight*specificity)
print('loss:', loss)
return tensorflow.constant(loss) #probably not a tensorflow scalar atm
I am printing the metrics twice to see if the K.sum(K.variable(**METRIC**))
affects anything.
Here is the output:
tf.Tensor(8.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(8.0, shape=(), dtype=float32)
spec: tf.Tensor(0.0, shape=(), dtype=float32)
recall: tf.Tensor(0.33333334, shape=(), dtype=float32)
loss: tf.Tensor(0.7, shape=(), dtype=float32)
Traceback (most recent call last):
File "/home/d/dsussman/dsherman/endo_git_v2/justin_method.py", line 282, in <module>
verbose=2)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 1178, in fit
tmp_logs = self.train_function(iterator)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 850, in train_function
return step_function(self, iterator)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 840, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py", line 1285, in run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py", line 2833, in call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py", line 3608, in _call_for_each_replica
return fn(*args, **kwargs)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/autograph/impl/api.py", line 597, in wrapper
return func(*args, **kwargs)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 833, in run_step
outputs = model.train_step(data)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 794, in train_step
self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py", line 530, in minimize
return self.apply_gradients(grads_and_vars, name=name)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py", line 630, in apply_gradients
grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars)
File "/home/d/dsussman/dsherman/.conda/envs/myNewEnv/lib/python3.7/site-packages/tensorflow/python/keras/optimizer_v2/utils.py", line 76, in filter_empty_gradients
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: