Tensorflow not working on gpu, raises error nakon seting up cuda and all .dll-s

Question

I have a problem getting tensorflow-gpu working. I use two CNN models, one for object detection and the second for classification. In project, i need opencv, keras, imageai and tensorflow at the same time. I can't get them working all at the same time, because of all their dependencies(tensorflow and imageai need different version of keras and h5py), except if I use tesnsorflow-gpu==2.4.0, and that is the only way to make them work together. But for tht version of tensorflow, I cant make gpu working because there is always one missing .dll (cusolver64_10.dll), which does not come with basing Cuda 11 or Cuda 10 installation. I added all cudnn libraries and tried just renaming cusolver64_11.dll to cusolver64_10.dll, but then the program starts throwing errors I never saw before. HERE you can find the example I took and model I am using, and down bellow you can see usage of models and errors I get after renaming that .dll.

import os
 
import cv2
import keras
import numpy as np
from imageai.Detection import ObjectDetection

execution_path = os.getcwd()
classes = {0 : "cat", 1 : "dog"}
frames_number = 7498
l = frames_number
i = 0
# Print iterations progress
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
    if iteration == total: 
        print()

def predict_image(imagein, classifier):
    predict_modified = np.resize(imagein, (64, 64, 3))
    predict_modified = predict_modified / 255
    predict_modified = np.expand_dims(predict_modified, axis=0)
    result = classifier.predict(predict_modified)
    predict = np.argmax(result)
    return predict, result[0][predict]


def load_models():
    detector = ObjectDetection()
    detector.setModelTypeAsRetinaNet()
    detector.setModelPath(os.path.join(
        execution_path, "models\\resnet50_coco_best_v2.1.0.h5"))
    detector.loadModel("flash")
    classifier = keras.models.load_model("models/my_model")
    return detector, classifier


def detect_objects(imagein):
    _, detections, detected_objects_image_array = detector.detectObjectsFromImage(input_image=imagein, input_type='array', display_box=False, minimum_percentage_probability=50,
                                                                                           display_object_name=False, display_percentage_probability=False, output_type='array',
                                                                                           extract_detected_objects=True, custom_objects=['cat', 'dog'])
    return detections, detected_objects_image_array

def draw_boxes(frame, detections, images):
    for i in range(len(detections)) :
        image, box_points = images[i], detections[i]["box_points"]
        predict, acc = predict_image(image, classifier)
        frame = cv2.rectangle(frame, (box_points[0], box_points[1]), (box_points[2], box_points[3]), (255, 0, 0), 2)
        font = cv2.FONT_HERSHEY_PLAIN
        frame = cv2.putText(frame, f"{classes[predict]} {str(round(acc, 2))}%", (box_points[0], box_points[1]-15), font, 1, (255, 0,0), 1, cv2.LINE_AA )
    return frame

detector, classifier = load_models()

cap = cv2.VideoCapture("videos\\dogsNcats.mp4")
if not cap.isOpened():
    print("Error opening video stream or file!")

printProgressBar(0, l, prefix = 'Progress:', suffix = 'Complete', length = 50)

while(cap.isOpened()):

    ret, frame = cap.read()

    if ret:
        printProgressBar(i + 1, l, prefix = 'Progress:', suffix = 'Complete', length = 50)
        i+=1
        if i < 150:
            continue
        detections, images = detect_objects(frame)
        frame = draw_boxes(frame, detections, images)
        cv2.imshow("Frame", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break
cap.release()
cv2.destroyAllWindows()

PS C:\Users\jokam\OneDrive\Documents\Projects\Python\10lines_obj_detec> & C:/Users/jokam/.conda/envs/cuda-tf/python.exe c:/Users/jokam/OneDrive/Documents/Projects/Python/10lines_obj_detec/main.py
2021-09-09 19:31:15.557525: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudart64_110.dll
2021-09-09 19:31:17.007163: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-09-09 19:31:17.008282: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library nvcuda.dll
2021-09-09 19:31:18.174734: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 3050 Ti Laptop GPU computeCapability: 8.6
coreClock: 1.485GHz coreCount: 20 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 178.84GiB/s
2021-09-09 19:31:18.175393: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudart64_110.dll
2021-09-09 19:31:18.521378: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2021-09-09 19:31:18.521779: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublasLt64_11.dll
2021-09-09 19:31:18.832838: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cufft64_10.dll
2021-09-09 19:31:18.856149: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library curand64_10.dll
2021-09-09 19:31:19.034908: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusolver64_10.dll
2021-09-09 19:31:19.201189: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusparse64_11.dll
2021-09-09 19:31:19.216492: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudnn64_8.dll
2021-09-09 19:31:19.216826: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0
2021-09-09 19:31:19.217244: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-09-09 19:31:19.218702: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 3050 Ti Laptop GPU computeCapability: 8.6
coreClock: 1.485GHz coreCount: 20 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 178.84GiB/s
2021-09-09 19:31:19.219060: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudart64_110.dll
2021-09-09 19:31:19.219195: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2021-09-09 19:31:19.219305: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublasLt64_11.dll
2021-09-09 19:31:19.219471: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cufft64_10.dll
2021-09-09 19:31:19.219674: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library curand64_10.dll
2021-09-09 19:31:19.219899: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusolver64_10.dll
2021-09-09 19:31:19.220039: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusparse64_11.dll
2021-09-09 19:31:19.220173: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudnn64_8.dll
2021-09-09 19:31:19.220369: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0
2021-09-09 19:31:19.538541: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1261] Device interconnect StreamExecutor with strength 1 edge matrix:
2021-09-09 19:31:19.538900: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1267]      0 
2021-09-09 19:31:19.539319: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1280] 0:   N
2021-09-09 19:31:19.539728: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1406] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 2899 MB memory) -> physical GPU (device: 0, name: NVIDIA GeForce RTX 3050 Ti Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6)
2021-09-09 19:31:19.540493: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
WARNING:tensorflow:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.
test1ess: |█-------------------------------------------------| 2.0% Complete
test2
test21
2021-09-09 19:31:24.004676: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-09-09 19:31:35.295180: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudnn64_8.dll
2021-09-09 19:31:36.982379: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2021-09-09 19:31:37.014772: E tensorflow/stream_executor/cuda/cuda_blas.cc:226] failed to create cublas handle: CUBLAS_STATUS_NOT_INITIALIZED
2021-09-09 19:31:37.015080: W tensorflow/core/framework/op_kernel.cc:1763] OP_REQUIRES failed at conv_ops.cc:1106 : Not found: No algorithm worked!
Traceback (most recent call last):
  File "C:\Users\jokam\.conda\envs\cuda-tf\lib\site-packages\imageai\Detection\__init__.py", line 314, in detectObjectsFromImage
    boxes, scores, labels = model.predict_on_batch(np.expand_dims(detected_copy, axis=0))
  File "C:\Users\jokam\.conda\envs\cuda-tf\lib\site-packages\tensorflow\python\keras\engine\training.py", line 1820, in predict_on_batch
    outputs = self.predict_function(iterator)
  File "C:\Users\jokam\.conda\envs\cuda-tf\lib\site-packages\tensorflow\python\eager\def_function.py", line 828, in __call__
    result = self._call(*args, **kwds)
  File "C:\Users\jokam\.conda\envs\cuda-tf\lib\site-packages\tensorflow\python\eager\def_function.py", line 895, in _call
    filtered_flat_args, self._concrete_stateful_fn.captured_inputs)  # pylint: disable=protected-access
  File "C:\Users\jokam\.conda\envs\cuda-tf\lib\site-packages\tensorflow\python\eager\function.py", line 1919, in _call_flat
    ctx, args, cancellation_manager=cancellation_manager))
  File "C:\Users\jokam\.conda\envs\cuda-tf\lib\site-packages\tensorflow\python\eager\function.py", line 560, in call
    ctx=ctx)
  File "C:\Users\jokam\.conda\envs\cuda-tf\lib\site-packages\tensorflow\python\eager\execute.py", line 60, in quick_execute
    inputs, attrs, num_outputs)
tensorflow.python.framework.errors_impl.NotFoundError: 2 root error(s) found.
  (0) Not found:  No algorithm worked!
         [[node retinanet-bbox/conv1/Conv2D (defined at C:\Users\jokam\.conda\envs\cuda-tf\lib\site-packages\imageai\Detection\__init__.py:314) ]]
         [[retinanet-bbox/filtered_detections/map/while/body/_1/retinanet-bbox/filtered_detections/map/while/GatherV2_162/_36]]
  (1) Not found:  No algorithm worked!
         [[node retinanet-bbox/conv1/Conv2D (defined at C:\Users\jokam\.conda\envs\cuda-tf\lib\site-packages\imageai\Detection\__init__.py:314) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_predict_function_16232]

Function call stack:
predict_function -> predict_function


During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:/Users/jokam/OneDrive/Documents/Projects/Python/10lines_obj_detec/main.py", line 73, in <module>
    detections, images = detect_objects(frame)
  File "c:/Users/jokam/OneDrive/Documents/Projects/Python/10lines_obj_detec/main.py", line 44, in detect_objects
    extract_detected_objects=True, custom_objects=['cat', 'dog'])
  File "C:\Users\jokam\.conda\envs\cuda-tf\lib\site-packages\imageai\Detection\__init__.py", line 393, in detectObjectsFromImage
    "Ensure you specified correct input image, input type, output type and/or output image path ")
ValueError: Ensure you specified correct input image, input type, output type and/or output image path

Firstly `Move to C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.2\bin` and then `Rename file cusolver64_11.dll To cusolver64_10.dll `. Take a look at similar issue [here](https://stackoverflow.com/a/65608751/14290244). Thanks! — , Sep 22 '21 at 01:59
Read the question please, I wrote that I tried that already, and then this error appears. — Josip Maričević, Sep 22 '21 at 09:12

Tensorflow not working on gpu, raises error nakon seting up cuda and all .dll-s

0 Answers0