I'm writing an OCR app (for Hebrew script).
The first part in the app is thresholding,
This is how my original image looks like:
And this is how it looks like after the thresholding:
As you can see, it is mostly fine, but the "crowns" or "decorations" on the letters sometimes disappear like in this word:
That becomes:
The thing is that after I apply RGB2GRAY on the original image, the black crowns are really not dark enough, and thus they are getting white in the thresholding process, but one can see easily that it "should" be black, the question is how do I tell the algorithm to detect it...
My current thresholding code uses otzu + local thresholding, this is the code:
def apply_threshold(img, is_cropped=False):
'''
this function applies a threshold on the image,
the first is Otsu TH on all the image, and afterwards an adaptive TH,
based on the size of the image.
I apply a logical OR between all the THs, becasue my assumption is that a letter will always be black,
while the background can sometimes be black and sometimes white -
thus I need to apply OR to have the background white.
'''
if len(np.unique(img)) == 2: # img is already binary
# return img
gray_img = rgb2gray(img)
_, binary_img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
return binary_img
gray_img = rgb2gray(img)
_, binary_img = cv2.threshold(gray_img.astype('uint8'), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
connectivity = 8
output_stats = cv2.connectedComponentsWithStats(binary_img.max() - binary_img, connectivity, cv2.CV_32S)
df = pd.DataFrame(output_stats[2], columns=['left', 'top', 'width', 'height', 'area'])[1:]
if df['area'].max() / df['area'].sum() > 0.1 and is_cropped and False:
binary_copy = gray_img.copy()
gray_img_max = gray_img[np.where(output_stats[1] == df['area'].argmax())]
TH1, _ = cv2.threshold(gray_img_max.astype('uint8'), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# curr_img = binary_copy[np.where(output_stats[1] == df['area'].argmax())]
binary_copy[np.where((output_stats[1] == df['area'].argmax()) & (gray_img > TH1))] = 255
binary_copy[np.where((output_stats[1] == df['area'].argmax()) & (gray_img <= TH1))] = 0
gray_img_not_max = gray_img[np.where(output_stats[1] != df['area'].argmax())]
TH2, _ = cv2.threshold(gray_img_not_max.astype('uint8'), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
binary_copy[np.where((output_stats[1] != df['area'].argmax()) & (gray_img > TH2))] = 255
binary_copy[np.where((output_stats[1] != df['area'].argmax()) & (gray_img <= TH2))] = 0
binary_img = binary_copy.copy()
# N = [3, 5, 7, 9, 11, 13,27, 45] # sizes to divide the image shape in
# N = [20,85]
N = [3, 5, 25]
min_dim = min(binary_img.shape)
for n in N:
block_size = int(min_dim / n)
if block_size % 2 == 0:
block_size += 1 # block_size needs to be odd
binary_img = binary_img | cv2.adaptiveThreshold(gray_img.astype('uint8'), 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, block_size, 10)
return binary_img
Any creative idea will be appreciated!