0

I have written a code for column detection from a table image. I have followed the following steps :

  1. Remove horizontal rows from table image
  2. Invert Image
  3. Apply dilation with a kernel of (25,4) for 2 iterations.
  4. Apply erosion on a dilated image with a kernel of (2,6) for 3 iterations.
  5. Calculate the image column sum on the resultant image. Now I am finding valleys within the window.
  6. Calculate the window size as 35% of image width.
  7. Calculate the horizontal threshold in order to consider valleys as valleys within window regions. (with variation threshold of minima-maxima of 0.1 and gaussian sigma = 15)
  8. Smoothen the window region with gaussian sigma = 15 and find valleys which are below the horizontal threshold by setting height = horizontal threshold calculated above.
  9. Now we have a list of all possible valleys (column coordinates) with false positives.

remove false positive from the above list

  1. Columns that are crossing the text regions (threshold of crossing more than 2 text regions)
  2. Combine multiple columns present in big columnar space into only one column

The problem is:

  1. If spacing between words within table is enough to detect column then column will be drawn there:

extra column is drawn in description

  1. If a column is empty or not having enough values to detect troughs then the column will not be drawn.

column is not drawn between check number and description

I need a solution for these 2 problems. Is there any other way to do this task more accurately?

import cv2
from google.colab.patches import cv2_imshow
import os
from google.colab import files

import numpy as np 
import matplotlib.pyplot as plt
from itertools import groupby
from operator import itemgetter

from scipy.ndimage.filters import gaussian_filter1d

from scipy.signal import find_peaks
from cv2 import imwrite
import pandas as pd

image = cv2.imread("table_with_rows.png") #handle corner cases with jpeg images
#print(image.shape)
img=image
#Convert to grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
#Threshold the grayscale image 
thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1] #
print(thresh.shape)
# Remove horizontal
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)

cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(image, [c], -1, (0,255,0), 2)
    x,y,w,h = cv2.boundingRect(c)
    #print(x,y,w,h)
    if x == 0 or y==0:
      img[y-2:y+h+2,x:x+w+1]=255 #Taking two pixels above and below and making them white too
    else:
      img[y-2:y+h+2,x-1:x+w+1]=255 #If the bounding box doesn't start from zeroth pixel then we can use a pixel before the x co-od also
cv2.imwrite("row_removed_1.jpg",img)
cv2_imshow(image)

img = cv2.imread("row_removed_1.jpg", 0)
h, w = img.shape
window_size = int(w*0.35)



inv_img = 255-img
dilation_kernel = np.ones((25,2),np.uint8) 
dilation = cv2.dilate(inv_img,dilation_kernel,iterations = 2)
inv_dilation=cv2.bitwise_not(dilation)
erosion_kernel=np.ones((2,8),np.uint8) 
erosion = cv2.erode(dilation,erosion_kernel,iterations = 3)
cv2_imshow(erosion)


#Taking column-wise sum of pixel values in image (returns a list)
img_col_sum = np.sum(erosion,axis=0).tolist()

#Normalising the values of img col sum
for i in range(len(img_col_sum)):
  img_col_sum[i]=img_col_sum[i]/max(img_col_sum)

print('window_size :::',window_size)

print('*********** Image col sum graph after normalization ***********')
#Plotting the graph of columnwise sum
plt.plot(img_col_sum)
#plt.savefig("img_col_sum_" + filename)
plt.show()

deviation = 300 if window_size > 300 else window_size
peaks=[]

for i in range(0,len(img_col_sum),window_size):
  i = 0 if i==0 else (i-deviation) ## version2
  print('i===', i)
  window_val = img_col_sum[i:i+window_size+300]
  #print(window_val)


  ysmoothed_15 = gaussian_filter1d(window_val,sigma=15)
  #Plotting the smoothed graph (to pick the dips in graph)
  plt.plot(ysmoothed_15)
  #plt.savefig("img_col_sum_flattened_" + str(filename.split('.')[0]))
  plt.title('with sigma 15')
  plt.show()

  #Getting minimas of the smoothened graph and plotting
  min_peaks_15,_=find_peaks(-1*ysmoothed_15)
  a = np.array(ysmoothed_15)
  #print(a[peaks])
  plt.plot(ysmoothed_15)
  plt.plot(min_peaks_15,ysmoothed_15[min_peaks_15],"x", label='min - x')

  #max_minima_val = 0
  print('..... minimas for window .....')
  print(ysmoothed_15[min_peaks_15])
  max_minima_val = np.min(ysmoothed_15[min_peaks_15] if len(min_peaks_15)!=0 else 0 )
  print(max_minima_val)

  #Getting maximas of the smoothened graph and plotting
  max_peaks_15,_=find_peaks(ysmoothed_15)
  a = np.array(ysmoothed_15)
  #print(a[peaks])
  plt.plot(ysmoothed_15)
  plt.plot(max_peaks_15,ysmoothed_15[max_peaks_15],"o", label='max - o')

  plt.show()

  print('..... maximas for window .....')
  print(ysmoothed_15[max_peaks_15])
  max_maxima_val = np.max(ysmoothed_15[max_peaks_15] if len(max_peaks_15)!=0 else 1)
  print(max_maxima_val)

  diff = max_maxima_val - max_minima_val
  print('difference between minima and maxima point for the range of {0} - {1} ===> {2}'.format(i,(i+window_size+300),diff))

  #################### . calculate plateau ###########################

  ######## first check if plateau region is below threshold then only draw column 
  print('checking plateau............................!!!!!!')
  print(i)
  #if window_val[0] < height:
  diff1 = np.diff(window_val)
  #array([ 0,  0,  0,  2,  1, -2,  0,  0,  0])
  gradient = np.sign(diff1)
  if gradient[0] == 0:
    peaks.append(i)
    print('column will be drawn at :::: ', i)


  # if difference between minima and maxima is greater than certain threshold then only there is variation which can be considered as column    

  if diff > 0.1 :  
    height = max_minima_val + diff/1.5
    print('Threshold for the range of {0} - {1} ===> {2}'.format(i,(i+window_size+300),height))

    ysmoothed_20 = gaussian_filter1d(window_val,sigma=15)
    #Plotting the smoothed graph (to pick the dips in graph)
    #plt.plot(ysmoothed_20)
    #plt.savefig("img_col_sum_flattened_" + str(filename.split('.')[0]))
    #plt.show()

    #Getting minimas of the smoothened graph and plotting
    win_peaks_20,_=find_peaks(-1*ysmoothed_20, height = -1*height)
    a = np.array(ysmoothed_20)
    #print(a[peaks])
    plt.plot(ysmoothed_20)
    plt.plot(win_peaks_20,ysmoothed_20[win_peaks_20],"x")
    plt.title('with sigma 15')
    plt.show()

    x_cord_win_peaks = [x + i for x in win_peaks_20] 

    peaks.extend(x_cord_win_peaks)
    #print(peaks)

# for loop of window is completed

print('troughs by taking winodws',peaks)


########## after this false positive is removed through ocr response
ForamJ
  • 146
  • 1
  • 5
  • Your question is too long and more detailed. You can share a [minimal code snippet](https://stackoverflow.com/help/minimal-reproducible-example) and more specify the question. – Yunus Temurlenk Apr 27 '20 at 08:21
  • Thank you @YunusTemurlenk I have added code to detect columns in table image. – ForamJ Apr 27 '20 at 11:03

0 Answers0