I am training a deep learning model on a dataset of 100GB of video data. I am trying to convert it all into frames before applying Haar Cascade on each frame to crop the faces out. But im trying to find the fastest way to do so.
NOTE: There are 5 folds each with 2 parts in them. Each part has 6 folders with 3 different categories of data(0, 5, 10) in different video formats(mp4, MOV, mov). There are 144 video files in total and each is about 700MB in size.
EDIT: I am running into an issue where the processing of the video to frames is not waiting for its work to be done before going on to the next file
Error Message: It immediately throws an error for each file instead of waiting to convert the frames of the current file then moving on to the next file.
Error!!!
Reading from /Volumes/HDD/Data/Fold4_part2/44/5.mov
Category:5
Writing to /Volumes/HDD/Data/Fold4_part2/44
Number of frames: 7353
Converting video..
Error!!!
Reading from /Volumes/HDD/Data/Fold4_part2/45/0.mp4
Category:0
Writing to /Volumes/HDD/Data/Fold4_part2/45
Number of frames: 7716
Converting video..
Code:
import cv2
import time
import os
path_HDD = "/Volumes/HDD/Data"
def files(path):
"""
Function to get the files and add them to a list
Args:
path: path of the file
Not sure what is DS_Store but I do not need it
"""
for root, directories, files in os.walk(path, topdown=False):
for name in files:
file_path = os.path.join(root, name)
if (name == ".DS_Store"):
continue
else:
category = name.split(".")[0]
# Category returns the video category
try:
print("Reading from " + file_path)
print("Category:" + category)
print("Writing to " + root)
video_to_frames(file_path, category, root)
except:
print("Error!!!")
def video_to_frames(input_loc, label,output_loc):
"""Function to extract frames from input video file
and save them as separate frames in an output directory.
Args:
input_loc: Input video file.
output_loc: Output directory to save the frames.
Returns:
None
"""
# Log the time
time_start = time.time()
# Start capturing the feed
cap = cv2.VideoCapture(input_loc)
# Find the number of frames
video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
print ("Number of frames: ", video_length)
count = 0
print ("Converting video..\n")
# Start converting the video
while cap.isOpened():
# Extract the frame
ret, frame = cap.read()
# Write the results back to output location.
cv2.imwrite(output_loc + "/" + label + "/%#05d.jpg" % (count+1), frame)
count = count + 1
# If there are no more frames left
if (count > (video_length-1)):
# Log the time again
time_end = time.time()
# Release the feed
cap.release()
# Print stats
print ("Done extracting frames.\n%d frames extracted" % count)
print ("It took %d seconds forconversion." % (time_end-time_start))
break
if __name__=="__main__":
files(path_HDD)