Segmenting boxes into different images

Question

I am trying to read information from documents in which the data fields are of the type where you can enter only one letter/digit in a box. I managed to segment box arrays for respective data fields, however I am having problem in segmenting individual boxes in these box arrays.

a box array

I have tried to use the cv2.approxPolyDP and cv2.HoughLines functions however both give unacceptable results. The sudoku question at one point uses the fact that the length of vertical/horizontal line is much bigger than individual numbers. In my case the numbers sometimes spill out of the boxes and almost always touch the boxes.

This function is not able to detect small boxes individually:

def detect_boxes(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    sharp_img = cv2.filter2D(np.asarray(gray), -1, kernel) 
    ret,thresh = cv2.threshold(sharp_img,180,255,1)
    edges = cv2.Canny(sharp_img,50,150,apertureSize = 3)
    _,contours,h = cv2.findContours(thresh,1,2)
    boxes = []
    for cnt in contours:
        approx = cv2.approxPolyDP(cnt,0.01*cv2.arcLength(cnt,True),True)
        temp = img
        if len(approx)==4:
            boxes.append(cnt)
            print(cnt.shape)
            print(max(cnt[0])-min(cnt[0]),max(cnt[1])-min(cnt[1]))
            cv2.drawContours(temp,[cnt],0,(0,0,255),-1)
            cv2_imshow(temp)
    return boxes

The approxPolyDP result is:

approxPolyDP result

The other function is:

def det_box(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    sharp_img = cv2.filter2D(np.asarray(gray), -1, kernel)
    ret,thresh = cv2.threshold(sharp_img,180,255,1)
    edges = cv2.Canny(gray,50,150,apertureSize = 3)
    cv2_imshow(edges)
    lines = cv2.HoughLines(edges,1,np.pi/180,200)
    temp = img
    for rho,theta in lines[0]:
        a = np.cos(theta)
        b = np.sin(theta)
        x0 = a*rho
        y0 = b*rho
        x1 = int(x0 + 1000*(-b))
        y1 = int(y0 + 1000*(a))
        x2 = int(x0 - 1000*(-b))
        y2 = int(y0 - 1000*(a))

        cv2.line(temp,(x1,y1),(x2,y2),(0,0,255),2)
    cv2_imshow(temp)
    return lines

The HoughLines result is:

HoughLines result

I was trying to get box points/contours of each individual small box in order. Any help will be appreciated. Even removing the horizontal and vertical lines in the boxes will be helpful.

If the documents with the unfilled boxes are known to you in advance, you could e.g. use matchTemplate or sift/surf with the empty document as template, and just hardcode the position of each box in relation to the template, or subtract the template from the document to get only the hand drawn parts — HugoRune, May 26 '19 at 19:47
@HugoRune I thought of that as a last resort. However, I managed to find a solution without it. — REVOLUTION, May 28 '19 at 06:17

score 1 · Accepted Answer · answered May 28 '19 at 06:11

Took me some time but I figured it out myself.

Actual image:

if len(img.shape) != 2:
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
else:
    gray = img

kernel = np.array([[-1,-1,-1],[-1,9,-1],[-1,-1,-1]])
sharp_img = cv2.filter2D(np.asarray(gray), -1, kernel)
gray = cv2.bitwise_not(gray)
ret,bw = cv2.threshold(sharp_img,200,255,1) 

#### HORIZONTAL TRANSFORMATIONS #######
hz_kernel = np.array([[1,2,1],[0,0,0],[-1,-2,-1]])
vert_kernel = np.array([[-1,0,1],[-2,0,2],[-1,0,1]])

hz_img = cv2.filter2D(np.asarray(bw),-1,hz_kernel)
dilated = cv2.dilate(hz_img, np.ones((1, 5)),iterations = 2)
hz_img = cv2.erode(dilated,np.ones((1,5)),iterations = 4)
#cv2_imshow(bw)

print('after hz sobel->')
cv2_imshow(hz_img)

After horizontal sobel filter:

_, contours, hierarchy = cv2.findContours(
        hz_img, 
        cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
mask = np.ones(img.shape[:2], dtype="uint8") * 255
for cnt in contours:
    x,y,w,h = cv2.boundingRect(cnt)
    if w < (img.shape[1] - 10):
        #print(w)
        cv2.drawContours(mask, [cnt], -1, 0, -1)

hz_lines = cv2.bitwise_and(hz_img, hz_img, mask=mask)
if i == 0:
    print("after removing noise")
    cv2_imshow(hz_lines)

After horizontal noise removal:

######## VERTICAL TRANSFORMATIONS #########
vert_img = cv2.filter2D(np.asarray(bw),-1,vert_kernel)
dilated = cv2.dilate(vert_img, np.ones((3, 1)),iterations = 1)
vert_img = cv2.erode(dilated,np.ones((3,1)),iterations = 1)

print("after vertical soble->")
cv2_imshow(vert_img)

After vertical sobel filter:

_, vert_contours, _ = cv2.findContours(
        vert_img, 
        cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
vert_mask = np.ones(img.shape[:2], dtype="uint8") * 255
for cnt in vert_contours:
    x,y,w,h = cv2.boundingRect(cnt)
    if h<vert_img.shape[0]-10 or w > 5:
        #print(w)
        cv2.drawContours(vert_mask, [cnt], -1, 0, -1)

vert_lines = cv2.bitwise_and(vert_img, vert_img, mask=vert_mask)


print('after removing noise ->')
cv2_imshow(vert_lines)

After vertical noise removal:

####### COMBINATION ##########
boxes_array = cv2.bitwise_or(vert_lines,hz_lines)

print('box array')
cv2_imshow(boxes_array)

Bitwise or of results:

dilated = cv2.dilate(boxes_array, np.ones((7, 7)),iterations = 3)
eroded = cv2.bitwise_not(cv2.erode(dilated,np.ones((7,7)),iterations = 3))


print('dilated and inverted->')
cv2_imshow(eroded)

After dilation, erosion and inversion:

# Finally find the contours and find the bounding boxes
imz,contours,_ = cv2.findContours(
        eroded, 
        cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[::-1]
boxes = []
for cnt in contours:
    rect = cv2.boundingRect(cnt)
    if rect[2]/rect[3] < 0.6 or rect[3]/rect[2] < 0.6:
        continue
    boxes.append(rect)
    num_img = img[rect[1]:rect[1]+rect[3],rect[0]:rect[0]+rect[2]]
    cv2_imshow(num)

A box after cropping:

Segmenting boxes into different images

1 Answers1