I follow guide to build facial recognition app , code runs in problem on training models every thing runs fine except the training models: any suggestions ?
@tf.function
def train_step(batch):
# Record all of our operations
with tf.GradientTape() as tape:
train_num = 0
print("\n\n----------------------- train_num -----------------------\n", train_num)
train_num += 1
# Get anchor and positive/negative image
X = batch[:2]
# Get label
y = batch[2]
# Forward pass
yhat = siamese_model(X, training=True)
# Calculate loss
loss = binary_cross_loss(y, yhat)
# Calculate gradients
grad = tape.gradient(loss, siamese_model.trainable_variables)
# Calculate updated weights and apply to siamese model
opt.apply_gradients(zip(grad, siamese_model.trainable_variables))
print("\n--------------------- Loss ---------------------\n", loss)
# Return loss
return loss
###### Training loop ######
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
with tf.device(tf.DeviceSpec(device_type="GPU", device_index=0)):
def train(train_data, EPOCHS):
# Loop through epochs
for epoch in range(1, EPOCHS + 1):
print('\n Epoch {}/{}'.format(epoch, EPOCHS))
progbar = Progbar(len(train_data))
# Loop through each batch
for idx, batch in enumerate(train_data):
# Run train step here
print("--------------------- Enumarted data ---------------------\n", batch)
print(enumerate(train_data))
train_step(batch)
progbar.update(idx + 1)
# Save checkpoints
if epoch % 10 == 0:
checkpoint.save(file_prefix=checkpoint_prefix)
EPOCHS = 50
train(train_data, EPOCHS)
The result that I get is :
2022-04-08 01:59:30.620152: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
Epoch 1/50
Tensor("binary_crossentropy/weighted_loss/value:0", shape=(), dtype=float32)
Tensor("binary_crossentropy/weighted_loss/value:0", shape=(), dtype=float32)
2022-04-08 01:59:32.955948: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2022-04-08 01:59:33.751812: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublasLt64_11.dll
2022-04-08 01:59:33.756303: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudnn64_8.dll
Process finished with exit code -1073740791 (0xC0000409)
Youtube :
Github of code :
Link to github with full tutorial
Code copied from source , works till mentioned training examples Also used GPU for the training task tried using different batch sizes and different input size as far as I know the error raises because of computation error that python interpreter counters with
- Works on Ubuntu ( Changed data paths )