1

I am trying to read the numbers of a gas meter using Python and OpenCV. I have found the numbers as seen here

enter image description here

I then try to run pytesseract on the extracted black and white numbers as seen here

enter image description here enter image description here enter image description here enter image description here enter image description here enter image description here

But I can't get pytesseract to find the numbers. I have tried the other segmentation modes without any luck, so I guess it must be the preprocessing? any suggestions?

My code:

img = cv2.imread("gasmeter.jpg")

# Resize image
scale_percent = 40
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
resized_img = cv2.resize(img, dim, interpolation=cv2.INTER_AREA)

grayscale = cv_funcs.get_grayscale(resized_img)
thresh = cv_funcs.thresholding(grayscale)

contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

sorted_contours = []
for cnt in contours:
    area = cv2.contourArea(cnt)
    approx = cv2.approxPolyDP(cnt,0.01*cv2.arcLength(cnt,True),True)    # Find number of edges of the contours

    if area > 840.0 and area < 1000.0 and len(approx) > 4 and len(approx) < 12:
        sorted_contours.append(cnt)

    if area > 6000.0 and area < 6500.0 and len(approx) < 12:
        sorted_contours.append(cnt)

all_contours_img = resized_img.copy()
correct_contours_img = resized_img.copy()
con = cv2.drawContours(all_contours_img, contours, -1, (0,255,0), 3)
con2 = cv2.drawContours(correct_contours_img, sorted_contours, -1, (0,255,0), 3)

print("Finding numbers:")
bounding_rect_img = resized_img.copy()
# Find bounding rectangle
for i,cnt in enumerate(sorted_contours):
    x,y,w,h = cv2.boundingRect(cnt)
    cv2.rectangle(bounding_rect_img,(x,y),(x+w,y+h),(0,255,0),2)
    # Find number
    crop_img = thresh[y:y+h, x:x+w]
    invert_img = cv2.bitwise_not(crop_img)
    erode_img = cv_funcs.erode(invert_img)
    dialate_img = cv_funcs.dilate(erode_img)
    cv2.imshow(str(i), erode_img)
    cv2.imwrite(str(i)+'.jpg', erode_img) 
    text = pytesseract.image_to_string(erode_img, config='--psm 10 digits')
    print(text)
tobiasrj
  • 43
  • 5

0 Answers0