Problem:
I've got around 10,000 images to compare to each other. My current program compares around 60 images every second, but at that speed, it would take nearly 9 days of runtime to finish. I've tried using c++ but the final code would take nearly 3x as long as the python one.
Question:
Is there any faster or more efficient way to compare images? I'm fine with using other languages and other libraries.
Code:
from PIL import Image
from PIL import ImageChops
import math, operator
from functools import reduce
import os
def rmsdiff(image_1, image_2):
h = ImageChops.difference(image_1, image_2).histogram()
return math.sqrt(reduce(operator.add, map(lambda h, i: i%256*(h**2), h, range(len(h)))) / (float(image_1.size[0]) * image_1.size[1]))
current = 0
try:
dire = "C:\\Users\\Nikola\\Downloads\\photos"
photos = os.listdir(dire)
for idx, val in enumerate(photos):
if val == "":
start = idx
break
for photo_1 in range(start,len(photos)):
if "." not in photos[photo_1]:
continue
print(f'Image: {photos[photo_1]}')
with Image.open(dire+"\\"+photos[photo_1]) as image_1:
image_1 = image_1.resize((16,16))
for photo_2 in range(photo_1+1, len(photos)):
current = photos[photo_2]
try:
if photos[photo_2][-4] != "." and photos[photo_2][-5] != ".":
continue
except:
continue
with Image.open(dire+"\\"+photos[photo_2]) as image_2:
image_2 = image_2.resize((16,16))
try:
value = rmsdiff(image_1, image_2)
if value < 12:
print(f'Similar Image: {photos[photo_1]}')
continue
except:
pass
except KeyboardInterrupt:
print()
print(current)