I'm doing a very basic image acquisition from PySpin feeding it to a neural net and finally displaying it. Each of those step is on its own process (I assume this would make everything faster). I have ran some benchmarks with my code. It seems like I'm mostly bottle necked by the image acquisition process.
import PySpin
import cv2
import sys
import time
from multiprocessing import Process, Queue, Value
import multiprocessing as mp
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.data import MetadataCatalog
def init_camera(cam_idx):
system = PySpin.System.GetInstance()
cam_list = system.GetCameras()
if cam_list.GetSize() == 0:
cam_list.Clear()
system.ReleaseInstance()
print('Not enough cameras!')
sys.exit(1)
cam = cam_list.GetByIndex(cam_idx)
cam.Init()
return cam, system
def acquire_images(cam_idx, img_queue, stop_thread):
cam, _ = init_camera(cam_idx)
nodemap = cam.GetNodeMap()
node_acquisition_mode = PySpin.CEnumerationPtr(nodemap.GetNode('AcquisitionMode'))
node_acquisition_mode_continuous = node_acquisition_mode.GetEntryByName('Continuous')
acquisition_mode_continuous = node_acquisition_mode_continuous.GetValue()
node_acquisition_mode.SetIntValue(acquisition_mode_continuous)
cam.BeginAcquisition()
processor = PySpin.ImageProcessor()
processor.SetColorProcessing(PySpin.SPINNAKER_COLOR_PROCESSING_ALGORITHM_HQ_LINEAR)
while not stop_thread.value:
image_result = cam.GetNextImage(1000)
if stop_thread.value:
break
if image_result.IsIncomplete():
print('Image incomplete with image status %d ...' % image_result.GetImageStatus())
else:
image_converted = processor.Convert(image_result, PySpin.PixelFormat_BGR8)
image_result.Release()
image_high = image_converted.GetNDArray()
img_queue.put(image_high)
if stop_thread.value:
cam.EndAcquisition()
cam.DeInit()
del cam
def process_images(img_queue, processed_img_queue, cfg_file, cfg_weights, stop_thread):
cfg = get_cfg()
cfg.MODEL.DEVICE = "cuda"
cfg.merge_from_file(cfg_file)
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.8
cfg.MODEL.WEIGHTS = cfg_weights
predictor = DefaultPredictor(cfg)
v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]))
while not stop_thread.value:
if not img_queue.empty():
image_high = img_queue.get()
width = int(image_high.shape[1] * 10 / 100)
height = int(image_high.shape[0] * 10 / 100)
image_data = cv2.resize(image_high, dsize=(width, height), interpolation=cv2.INTER_CUBIC)
outputs = predictor(image_data)
if stop_thread.value:
break
img = v.draw_instance_predictions(image_data[:, :, ::-1], outputs["instances"].to("cpu"))
processed_img_queue.put(img)
def display_images(processed_img_queue, stop_thread):
cv2.namedWindow("Segments Tracking", cv2.WINDOW_NORMAL)
fps = 0
frame_count = 0
start_time = time.time()
try:
while not stop_thread.value:
if not processed_img_queue.empty():
img = processed_img_queue.get()
frame_count += 1
elapsed_time = time.time() - start_time
if elapsed_time > 1:
fps = frame_count / elapsed_time
frame_count = 0
start_time = time.time()
img_with_fps = img.get_image()[:, :, ::-1].copy()
cv2.putText(img_with_fps, f"FPS: {fps:.2f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow("Segments Tracking", img_with_fps)
key = cv2.waitKey(1)
if key == 27 or stop_thread.value:
stop_thread.value = True
print("End Stream")
break
except KeyboardInterrupt:
stop_thread.value = True
print("Keyboard exception. End Stream")
cv2.destroyAllWindows()
print("Finish ending acquisition")
def main():
stop_thread = Value('b', False) # b indicates a boolean type
img_queue = Queue()
processed_img_queue = Queue()
cfg_file = model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")
cfg_weights = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")
acquire_process = Process(target=acquire_images, args=(0, img_queue, stop_thread))
process_process = Process(target=process_images, args=(img_queue, processed_img_queue, cfg_file, cfg_weights, stop_thread))
display_process = Process(target=display_images, args=(processed_img_queue,stop_thread))
acquire_process.start()
process_process.start()
display_process.start()
display_process.join()
stop_thread.value = True
acquire_process.join()
process_process.join()
return True
if __name__ == '__main__':
mp.set_start_method('spawn')
if main():
sys.exit(0)
else:
sys.exit(1)
From my benchmarks: Display Images: 0.0180 seconds Acquire Images: 0.1407 seconds Process Images: 0.1084 seconds
The Flir camera I'm working with is 60 Hz, so I should be able to at least acquire a frame every 0.016 seconds (1/60).
I started with writing everything in the main process, but eventually made it into parallel processes assuming it would be faster. This is still not sufficient. What should I do to at least make image acquisition be at 60 Hz for raw image display?