openCV with cuda, not using GPU

Question

I'm running Debian 12 with a GeForce RTX 4080 graphics card. I've installed openCV with CUDA support to accelerate facial recognition. However, when I run the code, it seems to be using the CPU instead of the GPU.


$ uname -a ; cat /etc/issue
Linux TEST 6.1.0-10-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.38-1 (2023-07-14) x86_64 GNU/Linux
Debian GNU/Linux 12 \n \l

$ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0

$ lspci -v | grep VGA
01:00.0 VGA compatible controller: NVIDIA Corporation AD103 [GeForce RTX 4080] (rev a1) (prog-if 00 [VGA controller])

Thu Aug  3 06:27:20 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.125.06   Driver Version: 525.125.06   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0 Off |                  N/A |
| 30%   30C    P8     7W / 320W |     70MiB / 16376MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|    0   N/A  N/A      1500      G   /usr/lib/xorg/Xorg                 56MiB |
|    0   N/A  N/A      1694      G   /usr/bin/gnome-shell               11MiB |
+-----------------------------------------------------------------------------+

$ python -c "import cv2, re; cv_info = [re.sub('\s+', ' ', ci.strip()) for ci in cv2.getBuildInformation().strip().split('\n') if len(ci) > 0 and re.search(r'(nvidia*:?)|(cuda*:)|(cudnn*:)', ci.lower()) is not None]; print(cv_info)"
['NVIDIA CUDA: YES (ver 11.8, CUFFT CUBLAS FAST_MATH)', 'NVIDIA GPU arch: 75', 'NVIDIA PTX archs:', 'cuDNN: NO']

Here is my code:

import tensorflow as tf
from tensorflow import keras
import numpy as np
import cv2
from keras.models import load_model
import numpy as np
from io import BytesIO
import requests
import multiprocessing
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"



def face_detection(channel):
    facedetect = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    ip_address = '192.168.1.2:1024'
    username = 'admin'
    password = 'Admin12345'
    rtsp_url = f'rtsp://{username}:{password}@{ip_address}/Streaming/Channels/{channel}'
    cap = cv2.VideoCapture(rtsp_url)

    cap.set(3, 640)
    cap.set(4, 480)
    font = cv2.FONT_HERSHEY_COMPLEX

    model = load_model('keras_model.h5')

    nume_clase = ["name1", "name2", "name3", "name4", "name5"] 

    while True:
        success, imgOrignal = cap.read()
        faces = facedetect.detectMultiScale(imgOrignal, 1.3, 5)
        for x, y, w, h in faces:
            crop_img = imgOrignal[y:y + h, x:x + h]
            img = cv2.resize(crop_img, (224, 224))
            img = img.reshape(1, 224, 224, 3)
            prediction = model.predict(img)
            classIndex = np.argmax(prediction, axis=1)[0]
            probabilityValue = np.amax(prediction) * 100

            cv2.rectangle(imgOrignal, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.rectangle(imgOrignal, (x, y - 40), (x + w, y), (0, 255, 0), -2)
            cv2.putText(imgOrignal, str(nume_clase[classIndex]), (x, y - 10), font, 0.75, (255, 255, 255), 1, cv2.LINE_AA)
            cv2.putText(imgOrignal, str(round(probabilityValue, 2)) + "%", (180, 75), font, 0.75, (255, 0, 0), 2, cv2.LINE_AA)

        cv2.imshow(f"Result - Channel {channel}", imgOrignal)
        k = cv2.waitKey(1)
        if k == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == '__main__':

    channels = ['101','201','301','401','501','601','701','801','901','1001','1101','1201','1301','1401','1501'] 
    processes = []

    for channel in channels:
        process = multiprocessing.Process(target=face_detection, args=(channel,))
        processes.append(process)

    for process in processes:
        process.start()

    for process in processes:
        process.join()

I think you will have to use `cv::cuda::CascadeClassifier`. I dont know how to use it in python. Check https://stackoverflow.com/questions/61225139/cv2-cuda-cascadeclassifier-in-python for example (I didnt read it). — Micka, Aug 03 '23 at 11:18
you may check first of all it is able to see your gpu or not by the line: `print("GPU Available:", tf.config.list_physical_devices('GPU'))` — Yunus Temurlenk, Aug 03 '23 at 11:30
you wrote: "I've installed openCV with CUDA support". did you build cv2 from source? as far as i know, you can't install cv2 with gpu support, you must build from souce — gilad eini, Aug 11 '23 at 19:33

openCV with cuda, not using GPU

0 Answers0