Ok so I think I've found a solution to your problem, I'm not going to say that it is the most efficient solution, but it should work
What I've done is, firstly, I modified your code a bit, the colours are sorted out first if they are in colors_dict
and then by their count, so when a colour not in the colors_dict
appears, with a count percentage less than 1%, the program automatically ignores the rest of the colours, so it doesn't iterate through all of the colours
Then I added two functions: color_bounds(color, bound)
and check_bounds(bounds, color_hex)
What color_bounds
does is it gets the range of colours (using an inputted range) that are similar to the inputted colour, for example with a range of 2, the function would return C93AE0
and C536DC
for the colour C738DE
Then check_bounds
uses the bounds from the colour and checks if any of the more frequent colours (already used colours) are within the bounds, and if so, it won't add it
import pandas as pd
from PIL import Image
from collections import Counter
def color_bounds(color, bound):
r, g, b = color[:2], color[2:4], color[4:]
bounds = int(r, 16), int(g, 16), int(b, 16)
upper_bounds = []
lower_bounds = []
# upper_bounds = ""
# lower_bounds = ""
for value in bounds:
upper = value + bound
lower = value - bound
while upper > 255:
upper -= 1
while lower < 0:
lower += 1
"""
upper = hex(upper).split("x")[-1].upper()
lower = hex(lower).split("x")[-1].upper()
if len(upper) == 1:
upper = "0" + upper
if len(lower) == 1:
lower = "0" + lower
"""
upper_bounds.append(upper)
lower_bounds.append(lower)
# upper_bounds += upper
# lower_bounds += lower
return (upper_bounds, lower_bounds)
def check_bounds(bounds, colors):
upper_bounds = bounds[0]
lower_bounds = bounds[1]
for color in colors:
r, g, b = color[:2], color[2:4], color[4:]
bounds = int(r, 16), int(g, 16), int(b, 16)
similar = [False, False, False]
for i in range(0, 3):
if bounds[i] <= upper_bounds[i] and bounds[i] >= lower_bounds[i]:
similar[i] = True
if similar[0] and similar[1] and similar[2]:
return False
return True
colors_dict = {"000000": "Black", "FFFFFF": "White"} #<------------ huge dictionary of colors this is just one example
img = Image.open("image.jpg")
size = w, h = img.size
data = img.load()
colors = []
for x in range(w):
for y in range(h):
color = data[x, y]
hex_color_lower = ''.join([hex(c)[2:].rjust(2, '0') for c in color])
hex_color = hex_color_lower.upper()
colors.append(hex_color)
total = w * h
color_hex = []
color_count = []
color_percent = []
df = pd.DataFrame()
def key(i):
try:
color = colors_dict[i[0]]
except:
color = ""
return color, i[1]
colors = Counter(colors).items()
for color, count in sorted(colors, key=key, reverse=True):
percent = count/total * \
100 # Do not make it int. Majority of colors are < 1%, unless you want >= 1%
if percent > 1:
# New functions to ignore colours that are similar to more frequent colours
# Make the bound value bigger to include more colours and smaller to include less
bounds = color_bounds(color, 16)
if check_bounds(bounds, color_hex):
color_hex.append(color)
color_count.append(count)
color_percent.append(percent)
else: break
df['color'] = color_hex
df['count'] = color_count
df['percent'] = color_percent
df['color_name'] = df['color'].map(colors_dict)
df.to_excel(r'export_dataframe.xlsx',
index=False, header=True)
print('done')
Given a little more time I could make the code much more efficient, but as of yet I think I've answered your question, please tell me if this was helpful :D
PS You should be able to adjust the bound in color_bounds
to include more or less colours
PPS I left in the code for color_bounds
to convert the bounds back to hex, if you want to do that it will just require you to add in a function into check_bounds
to re-convert it back into decimal rgb values