What I want to do is segment the lines and characters of an image's text that looks like this:
For line segmentation, I have done the following (based on this answer):
import cv2
import numpy as np
from matplotlib import pyplot as plt
#path
img = cv2.imread("1.png")
#grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#threshold
th, threshed = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)
#r
pts = cv2.findNonZero(threshed)
ret = cv2.minAreaRect(pts)
(cx,cy), (w,h), ang = ret
if w>h:
w,h = h,w
ang += 90
#find top and bottom line
hist = cv2.reduce(threshed,1, cv2.REDUCE_AVG).reshape(-1)
th = 2
H,W = img.shape[:2]
uppers = [y for y in range(H-1) if hist[y]<=th and hist[y+1]>th]
lowers = [y for y in range(H-1) if hist[y]>th and hist[y+1]<=th]
threshed = cv2.cvtColor(threshed, cv2.COLOR_GRAY2BGR)
# merge lines for detecting both above and below cosonants
temp = uppers[0]
i=0
while i<len(uppers)-1:
if (uppers[i+1]-temp)<18:
print(uppers[i+1]-temp)
lowers.pop(i)
temp = uppers[i+1]
uppers.pop(i+1)
else:
temp = uppers[i+1]
i+=1
y1 = []
#lines
for y in uppers:
cv2.line(threshed, (0,y), (W, y), (255,0,0), 1)
y1.append(y)
y2 = []
for y in lowers:
cv2.line(threshed, (0,y), (W, y), (0,255,0), 1)
y2.append(y)
c=0
while c<len(y1):
imageCropped = threshed[y1[c]:y2[c]]
# cv2.imshow('ld', imageCropped)
# cv2.waitKey()
cv2.imwrite(str(y1[c])+'.JPG', imageCropped) ##save cropped image
c=c+1
cv2.imwrite("resultofwholeimage.png", threshed)
The image here shows resulting lines being drawn
However, now I want to segment individual characters, the characters should be segmented with the consonant that's above or below it; for example the first character from the right segmented should look like this: Single character segmentation
Is such a method possible?