6

I am trying to conduct object detection for a video by inputting the video through

cap = cv2.VideoCapture("video3.mp4")

and after the processing part I want to display the video with real time object detection using

while True:
ret, image_np = cap.read()

# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np_expanded, detection_graph)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
    image_np,
    output_dict['detection_boxes'],
    output_dict['detection_classes'],
    output_dict['detection_scores'],
    category_index,
    instance_masks=output_dict.get('detection_masks'),
    use_normalized_coordinates=True,
    line_thickness=8)

cv2.imshow('object detection', cv2.resize(image_np, (800, 600)))
if cv2.waitKey(25) & 0XFF == ord('q'):
    cv2.destroyAllWindows()
    break

But the colab says that cv2.imshow() is disabled and to use cv2_imshow(). But it only renders images. [Frame by frame]. I want to get the video output as I did using cv2.imshow(). Please help me to solve this. Thanks in advance.

My full code is attached

import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

import cv2
from google.colab.patches import cv2_imshow

cap = cv2.VideoCapture("video3.mp4")

sys.path.append("..")
from object_detection.utils import ops as utils_ops

if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')

from object_detection.utils import label_map_util

from object_detection.utils import visualization_utils as vis_util

detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')


category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)


def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
  (im_height, im_width, 3)).astype(np.uint8)


PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(3, 8) ]

IMAGE_SIZE = (12, 8)

def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:
 
  ops = tf.get_default_graph().get_operations()
  all_tensor_names = {output.name for op in ops for output in op.outputs}
  tensor_dict = {}
  for key in [
      'num_detections', 'detection_boxes', 'detection_scores',
      'detection_classes', 'detection_masks'
  ]:
    tensor_name = key + ':0'
    if tensor_name in all_tensor_names:
      tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
          tensor_name)
  if 'detection_masks' in tensor_dict:
   
    detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
    detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
   
    real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
    detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
    detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
        detection_masks, detection_boxes, image.shape[1], image.shape[2])
    detection_masks_reframed = tf.cast(
        tf.greater(detection_masks_reframed, 0.5), tf.uint8)
   
    tensor_dict['detection_masks'] = tf.expand_dims(
        detection_masks_reframed, 0)
  image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

  
  output_dict = sess.run(tensor_dict,
                         feed_dict={image_tensor: image})

  
  output_dict['num_detections'] = int(output_dict['num_detections'][0])
  output_dict['detection_classes'] = output_dict[
      'detection_classes'][0].astype(np.int64)
  output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
  output_dict['detection_scores'] = output_dict['detection_scores'][0]
  if 'detection_masks' in output_dict:
    output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict

while True:
    ret, image_np = cap.read()

    image_np_expanded = np.expand_dims(image_np, axis=0)

    output_dict = run_inference_for_single_image(image_np_expanded, detection_graph)

    vis_util.visualize_boxes_and_labels_on_image_array(
    image_np,
    output_dict['detection_boxes'],
    output_dict['detection_classes'],
    output_dict['detection_scores'],
    category_index,
    instance_masks=output_dict.get('detection_masks'),
    use_normalized_coordinates=True,
    line_thickness=8)

cv2_imshow(image_np)
if cv2.waitKey(25) & 0XFF == ord('q'):
    cv2.destroyAllWindows()
    break
Christoph Rackwitz
  • 11,317
  • 4
  • 27
  • 36
lahiru idangoda
  • 61
  • 1
  • 1
  • 2
  • Possible duplicate of [Image is not displaying in Google Colab while using imshow()](https://stackoverflow.com/questions/55288657/image-is-not-displaying-in-google-colab-while-using-imshow) – Bob Smith Jul 18 '19 at 15:04
  • of the currently given three answers, one just repeats to use `cv2_imshow` given by colab, which OP already knows, and the other two just embed video files in the HTML, which wasn't the question. the question is: how to repeatedly show images, and have them be displayed successively, in the same place, in a colab notebook. a proper solution requires `IPython` calls. – Christoph Rackwitz Jul 31 '22 at 17:29

3 Answers3

15

To use cv2.imshow in google colab, you can use the following import:

from google.colab.patches import cv2_imshow
cv2_imshow(img)
Rich Hildebrand
  • 1,607
  • 17
  • 15
  • Is there some documentation for the function? should the image be floats or ints? – Ahmad Moussa Jul 23 '20 at 16:01
  • You can probably use the opencv docs. As near as I can tell it works the same. https://docs.opencv.org/2.4/modules/highgui/doc/user_interface.html?highlight=imshow#imshow – Rich Hildebrand Jul 27 '20 at 17:00
1

This Colab notebook gives a method to see videos on notebooks:

import io
import base64
from IPython.display import HTML

def playvideo(filename):
    video = io.open(filename, 'r+b').read()
    encoded = base64.b64encode(video)
    return HTML(data='''<video alt="test" controls>
                    <source src="data:video/mp4;base64,{0}" type="video/mp4"/>
                 </video>'''.format(encoded.decode('ascii')))

Then to watch the video use playvideo('./Megamind.mp4').

Anyway, remember to put %pylab notebook at the beginning of the notebook which a lot of time helps with this type of problems.

Ilon
  • 89
  • 6
-1

Example showing you how to deal with video in Colab:

#define helper function to display videos
import io 
from IPython.display import HTML
from base64 import b64encode
def show_video(file_name, width=640):
  
  mp4 = open(file_name,'rb').read()
  data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
  return HTML("""
  <video width="{0}" controls>
        <source src="{1}" type="video/mp4">
  </video>
  """.format(width, data_url))
#convert resulting video from avi to mp4 file format
import os
path_video = os.path.join("outputs","tracker.avi")
%cd outputs/
!ffmpeg -y -loglevel panic -i tracker.avi output.mp4
%cd ..

#output object tracking video
path_output = os.path.join("outputs","output.mp4")
show_video(path_output, width=960)
Christoph Rackwitz
  • 11,317
  • 4
  • 27
  • 36
Mohamed TOUATI
  • 372
  • 2
  • 4