Read every nth frame from youtube live video for object detection

Question

I am currently using a custom trained yolov5 model to run object detection inference on live youtube videos, the problem is that the videos are streamed at 30 FPS , I actually don't want to process each frame for object detection and just process every nth frame.

I looked at the LoadStreams() class in the official yolov5 repo but I am not able to change the captured frame to be every 10th frame

https://github.com/ultralytics/yolov5/blob/574ceedfc5f171a89417175bfb14fda6a2646603/utils/dataloaders.py#L301

class LoadStreams:
    # YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP streams`
    def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True):
        self.mode = 'stream'
        self.img_size = img_size
        self.stride = stride

        if os.path.isfile(sources):
            with open(sources) as f:
                sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
        else:
            sources = [sources]

        n = len(sources)
        self.imgs, self.fps, self.frames, self.threads = [None] * n, [0] * n, [0] * n, [None] * n
        self.sources = [clean_str(x) for x in sources]  # clean source names for later
        self.auto = auto
        for i, s in enumerate(sources):  # index, source
            # Start thread to read frames from video stream
            st = f'{i + 1}/{n}: {s}... '
            if urlparse(s).hostname in ('www.youtube.com', 'youtube.com', 'youtu.be'):  # if source is YouTube video
                check_requirements(('pafy', 'youtube_dl==2020.12.2'))
                import pafy
                s = pafy.new(s).getbest(preftype="mp4").url  # YouTube URL
            s = eval(s) if s.isnumeric() else s  # i.e. s = '0' local webcam
            if s == 0:
                assert not is_colab(), '--source 0 webcam unsupported on Colab. Rerun command in a local environment.'
                assert not is_kaggle(), '--source 0 webcam unsupported on Kaggle. Rerun command in a local environment.'
            cap = cv2.VideoCapture(s)
            assert cap.isOpened(), f'{st}Failed to open {s}'
            w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            fps = cap.get(cv2.CAP_PROP_FPS)  # warning: may return 0 or nan
            self.frames[i] = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 0) or float('inf')  # infinite stream fallback
            self.fps[i] = max((fps if math.isfinite(fps) else 0) % 100, 0) or 30  # 30 FPS fallback

            _, self.imgs[i] = cap.read()  # guarantee first frame
            self.threads[i] = Thread(target=self.update, args=([i, cap, s]), daemon=True)
            LOGGER.info(f"{st} Success ({self.frames[i]} frames {w}x{h} at {self.fps[i]:.2f} FPS)")
            self.threads[i].start()
        LOGGER.info('')  # newline

        # check for common shapes
        s = np.stack([letterbox(x, self.img_size, stride=self.stride, auto=self.auto)[0].shape for x in self.imgs])
        self.rect = np.unique(s, axis=0).shape[0] == 1  # rect inference if all shapes equal
        if not self.rect:
            LOGGER.warning('WARNING: Stream shapes differ. For optimal performance supply similarly-shaped streams.')

    def update(self, i, cap, stream):
        # Read stream `i` frames in daemon thread
        n, f, read = 0, self.frames[i], 1  # frame number, frame array, inference every 'read' frame
        while cap.isOpened() and n < f:
            n += 1
            # _, self.imgs[index] = cap.read()
            cap.grab()
            if n % read == 0:
                success, im = cap.retrieve()
                if success:
                    self.imgs[i] = im
                else:
                    LOGGER.warning('WARNING: Video stream unresponsive, please check your IP camera connection.')
                    self.imgs[i] = np.zeros_like(self.imgs[i])
                    cap.open(stream)  # re-open stream if signal was lost
            time.sleep(0.0)  # wait time

    def __iter__(self):
        self.count = -1
        return self

    def __next__(self):
        self.count += 1
        if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord('q'):  # q to quit
            cv2.destroyAllWindows()
            raise StopIteration

        # Letterbox
        img0 = self.imgs.copy()
        img = [letterbox(x, self.img_size, stride=self.stride, auto=self.rect and self.auto)[0] for x in img0]

        # Stack
        img = np.stack(img, 0)

        # Convert
        img = img[..., ::-1].transpose((0, 3, 1, 2))  # BGR to RGB, BHWC to BCHW
        img = np.ascontiguousarray(img)

        return self.sources, img, img0, None, ''

    def __len__(self):
        return len(self.sources)  # 1E12 frames = 32 streams at 30 FPS for 30 years

score 1 · Answer 1 · answered Jul 13 '22 at 07:22

Welcome to the stackoverflow community. You can change read parameter here in the code to the number of frames you want to skip. Here, the current value is 1, change it 10 that should do the work for you.

n, f, read = 0, self.frames[i], 1  # frame number, frame array, inference every 'read' frame

Change it to:

n, f, read = 0, self.frames[i], 10  # frame number, frame array, inference every 'read' frame

score 1 · Answer 2 · edited Dec 29 '22 at 14:08

1

Control video frame rate by setting video_stride in detect.py

edited Dec 29 '22 at 14:08

Julia Meshcheryakova

3,162
3
22
42

answered Dec 29 '22 at 12:22

Nim.th

11
2

score 0 · Answer 3 · edited Jan 30 '23 at 20:23

If you are able to show every image without object detection then i would suggest showing every frame and then scoring your frame only once in n seconds. Like this:

last_recorded_time_score_frame = time.time()
while True:
  curr_time = time.time()
  ret, frame = cap.read()

  if curr_time - last_recorded_time_score_frame >= 2.0:  # score frame every 2 seconds
    self.cords = self.model.score_frame(frame) # run your object detection here
    last_recorded_time_score_frame = curr_time

    cv2.imshow(self.window_name, frame)
    c = cv2.waitKey(1)
    if c == 27:
      self.finish = True
      break

This is my code for detecting vehicles with YOLO5 but surely you can adapt it. I was also struggling with low FPS and lagging and I solved it with this easy trick.

Mike B · Answer 4 · 2023-01-17T09:29:11.927

-1

Just a note for the two existing answers. The solution that Prashant Maurya presents was the old way of skipping frames in Yolov5. N jacob's is the updated solution. It was introduced here and it is also valid for Yolov8.

edited Jan 17 '23 at 09:29

answered Jan 02 '23 at 07:54

Mike B

2,136
2
12
31

Read every nth frame from youtube live video for object detection

4 Answers4