I'm running Debian 12 with a GeForce RTX 4080 graphics card. I've installed openCV with CUDA support to accelerate facial recognition. However, when I run the code, it seems to be using the CPU instead of the GPU.
$ uname -a ; cat /etc/issue
Linux TEST 6.1.0-10-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.38-1 (2023-07-14) x86_64 GNU/Linux
Debian GNU/Linux 12 \n \l
$ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
$ lspci -v | grep VGA
01:00.0 VGA compatible controller: NVIDIA Corporation AD103 [GeForce RTX 4080] (rev a1) (prog-if 00 [VGA controller])
Thu Aug 3 06:27:20 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.125.06 Driver Version: 525.125.06 CUDA Version: 12.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA GeForce ... On | 00000000:01:00.0 Off | N/A |
| 30% 30C P8 7W / 320W | 70MiB / 16376MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 1500 G /usr/lib/xorg/Xorg 56MiB |
| 0 N/A N/A 1694 G /usr/bin/gnome-shell 11MiB |
+-----------------------------------------------------------------------------+
$ python -c "import cv2, re; cv_info = [re.sub('\s+', ' ', ci.strip()) for ci in cv2.getBuildInformation().strip().split('\n') if len(ci) > 0 and re.search(r'(nvidia*:?)|(cuda*:)|(cudnn*:)', ci.lower()) is not None]; print(cv_info)"
['NVIDIA CUDA: YES (ver 11.8, CUFFT CUBLAS FAST_MATH)', 'NVIDIA GPU arch: 75', 'NVIDIA PTX archs:', 'cuDNN: NO']
Here is my code:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import cv2
from keras.models import load_model
import numpy as np
from io import BytesIO
import requests
import multiprocessing
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def face_detection(channel):
facedetect = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
ip_address = '192.168.1.2:1024'
username = 'admin'
password = 'Admin12345'
rtsp_url = f'rtsp://{username}:{password}@{ip_address}/Streaming/Channels/{channel}'
cap = cv2.VideoCapture(rtsp_url)
cap.set(3, 640)
cap.set(4, 480)
font = cv2.FONT_HERSHEY_COMPLEX
model = load_model('keras_model.h5')
nume_clase = ["name1", "name2", "name3", "name4", "name5"]
while True:
success, imgOrignal = cap.read()
faces = facedetect.detectMultiScale(imgOrignal, 1.3, 5)
for x, y, w, h in faces:
crop_img = imgOrignal[y:y + h, x:x + h]
img = cv2.resize(crop_img, (224, 224))
img = img.reshape(1, 224, 224, 3)
prediction = model.predict(img)
classIndex = np.argmax(prediction, axis=1)[0]
probabilityValue = np.amax(prediction) * 100
cv2.rectangle(imgOrignal, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.rectangle(imgOrignal, (x, y - 40), (x + w, y), (0, 255, 0), -2)
cv2.putText(imgOrignal, str(nume_clase[classIndex]), (x, y - 10), font, 0.75, (255, 255, 255), 1, cv2.LINE_AA)
cv2.putText(imgOrignal, str(round(probabilityValue, 2)) + "%", (180, 75), font, 0.75, (255, 0, 0), 2, cv2.LINE_AA)
cv2.imshow(f"Result - Channel {channel}", imgOrignal)
k = cv2.waitKey(1)
if k == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
channels = ['101','201','301','401','501','601','701','801','901','1001','1101','1201','1301','1401','1501']
processes = []
for channel in channels:
process = multiprocessing.Process(target=face_detection, args=(channel,))
processes.append(process)
for process in processes:
process.start()
for process in processes:
process.join()