I'm trying to simply encode and decode a capture frame from the web-cam. I want to be able to send this over TCP but at the moment I'm having trouble performing this just locally.
Here's my code that simply takes the frame from the web-cam, encodes, then decodes, and displays the two images in a new window. The two images look like this:
Here's the code:
import struct
import cv2
import socket
import av
import time
import os
class PerfTimer:
def __init__(self, name):
self.name = name
def __enter__(self):
self.start_time = time.perf_counter()
def __exit__(self, type, value, traceback):
end_time = time.perf_counter()
print(f"'{self.name}' taken:", end_time - self.start_time, "seconds.")
os.environ['AV_PYTHON_AVISYNTH'] = 'C:/ffmpeg/bin'
socket_enabled = False
sock = None
if socket_enabled:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
print("Connecting to server...")
sock.connect(('127.0.0.1', 8000))
# Set up video capture.
print("Opening web cam...")
cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 800)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 600)
# Initialize the encoder.
encoder = av.CodecContext.create('h264', 'w')
encoder.width = 800
encoder.height = 600
encoder.pix_fmt = 'yuv420p'
encoder.bit_rate = 5000
# Initialize the decoder.
decoder = av.CodecContext.create('h264', 'r')
decoder.width = 800
decoder.height = 600
decoder.pix_fmt = 'yuv420p'
decoder.bit_rate = 5000
print("Streaming...")
while(cap.isOpened()):
# Capture the frame from the camera.
ret, orig_frame = cap.read()
cv2.imshow('Source Video', orig_frame)
# Convert to YUV.
img_yuv = cv2.cvtColor(orig_frame, cv2.COLOR_BGR2YUV_I420)
# Create a video frame object from the num py array.
video_frame = av.VideoFrame.from_ndarray(img_yuv, format='yuv420p')
with PerfTimer("Encoding") as p:
encoded_frames = encoder.encode(video_frame)
# Sometimes the encode results in no frames encoded, so lets skip the frame.
if len(encoded_frames) == 0:
continue
print(f"Decoding {len(encoded_frames)} frames...")
for frame in encoded_frames:
encoded_frame_bytes = bytes(frame)
if socket_enabled:
# Get the size of the encoded frame in bytes
size = struct.pack('<L', len(encoded_frame_bytes))
sock.sendall(size + encoded_frame_bytes)
# Step 1: Create the packet from the frame.
packet = av.packet.Packet(frame)
# Step 2: Decode the packet.
decoded_packets = decoder.decode(packet)
for packet in decoded_packets:
# Step 3: Convert the pixel format from the encoder color format to BGR for displaying.
frame = cv2.cvtColor(packet.to_ndarray(format='yuv420p'), cv2.COLOR_YUV2BGR_I420)
# Step 4. Display frame in window.
cv2.imshow('Decoded Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# release everything
cap.release()
sock.close()
cv2.destroyAllWindows()