I'm in a struggle with a project that takes an image of a pretty clear font from say a label for example reads the "text region" and outputs it as a string using OCR tesseract for instance.
Now I've made quite some progress with the thing as I added varios global filters to get to a quite clear result but I'm struggling with finding method of filtering just the text out of there and then you have to think about rotating it to be as horizontal as possible and then after that the easy part should be to crop it.
May I have any leads to how to do that not using traning data and over complicating the system sins I only use a rasdpberry pi to do the computing?
Thanks for helping here's what I've came up with so far:
Original Image(Captured from PiCamera):
Adaptive thresh after shadow removal:
[
Glocad tresh after shadow removal:
Here's the code:
# import the necessary packages
from PIL import Image
import pytesseract
import argparse
import cv2
import os
import picamera
import time
import numpy as np
#preprocess = "tresh"
#Remaining textcorping and rotating:
import math
import json
from collections import defaultdict
from scipy.ndimage.filters import rank_filter
def dilate(ary, N, iterations):
"""Dilate using an NxN '+' sign shape. ary is np.uint8."""
kernel = np.zeros((N,N), dtype=np.uint8)
kernel[(N-1)/2,:] = 1
dilated_image = cv2.dilate(ary / 255, kernel, iterations=iterations)
kernel = np.zeros((N,N), dtype=np.uint8)
kernel[:,(N-1)/2] = 1
dilated_image = cv2.dilate(dilated_image, kernel, iterations=iterations)
return dilated_image
def props_for_contours(contours, ary):
"""Calculate bounding box & the number of set pixels for each contour."""
c_info = []
for c in contours:
x,y,w,h = cv2.boundingRect(c)
c_im = np.zeros(ary.shape)
cv2.drawContours(c_im, [c], 0, 255, -1)
c_info.append({
'x1': x,
'y1': y,
'x2': x + w - 1,
'y2': y + h - 1,
'sum': np.sum(ary * (c_im > 0))/255
})
return c_info
def union_crops(crop1, crop2):
"""Union two (x1, y1, x2, y2) rects."""
x11, y11, x21, y21 = crop1
x12, y12, x22, y22 = crop2
return min(x11, x12), min(y11, y12), max(x21, x22), max(y21, y22)
def intersect_crops(crop1, crop2):
x11, y11, x21, y21 = crop1
x12, y12, x22, y22 = crop2
return max(x11, x12), max(y11, y12), min(x21, x22), min(y21, y22)
def crop_area(crop):
x1, y1, x2, y2 = crop
return max(0, x2 - x1) * max(0, y2 - y1)
def find_border_components(contours, ary):
borders = []
area = ary.shape[0] * ary.shape[1]
for i, c in enumerate(contours):
x,y,w,h = cv2.boundingRect(c)
if w * h > 0.5 * area:
borders.append((i, x, y, x + w - 1, y + h - 1))
return borders
def angle_from_right(deg):
return min(deg % 90, 90 - (deg % 90))
def remove_border(contour, ary):
"""Remove everything outside a border contour."""
# Use a rotated rectangle (should be a good approximation of a border).
# If it's far from a right angle, it's probably two sides of a border and
# we should use the bounding box instead.
c_im = np.zeros(ary.shape)
r = cv2.minAreaRect(contour)
degs = r[2]
if angle_from_right(degs) <= 10.0:
box = cv2.cv.BoxPoints(r)
box = np.int0(box)
cv2.drawContours(c_im, [box], 0, 255, -1)
cv2.drawContours(c_im, [box], 0, 0, 4)
else:
x1, y1, x2, y2 = cv2.boundingRect(contour)
cv2.rectangle(c_im, (x1, y1), (x2, y2), 255, -1)
cv2.rectangle(c_im, (x1, y1), (x2, y2), 0, 4)
return np.minimum(c_im, ary)
def find_components(edges, max_components=16):
"""Dilate the image until there are just a few connected components.
Returns contours for these components."""
# Perform increasingly aggressive dilation until there are just a few
# connected components.
count = 21
dilation = 5
n = 1
while count > 16:
n += 1
dilated_image = dilate(edges, N=3, iterations=n)
contours, hierarchy = cv2.findContours(dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
count = len(contours)
#print dilation
#Image.fromarray(edges).show()
#Image.fromarray(255 * dilated_image).show()
return contours
def find_optimal_components_subset(contours, edges):
"""Find a crop which strikes a good balance of coverage/compactness.
Returns an (x1, y1, x2, y2) tuple.
"""
c_info = props_for_contours(contours, edges)
c_info.sort(key=lambda x: -x['sum'])
total = np.sum(edges) / 255
area = edges.shape[0] * edges.shape[1]
c = c_info[0]
del c_info[0]
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
crop = this_crop
covered_sum = c['sum']
while covered_sum < total:
changed = False
recall = 1.0 * covered_sum / total
prec = 1 - 1.0 * crop_area(crop) / area
f1 = 2 * (prec * recall / (prec + recall))
#print '----'
for i, c in enumerate(c_info):
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
new_crop = union_crops(crop, this_crop)
new_sum = covered_sum + c['sum']
new_recall = 1.0 * new_sum / total
new_prec = 1 - 1.0 * crop_area(new_crop) / area
new_f1 = 2 * new_prec * new_recall / (new_prec + new_recall)
# Add this crop if it improves f1 score,
# _or_ it adds 25% of the remaining pixels for <15% crop expansion.
# ^^^ very ad-hoc! make this smoother
remaining_frac = c['sum'] / (total - covered_sum)
new_area_frac = 1.0 * crop_area(new_crop) / crop_area(crop) - 1
if new_f1 > f1 or (
remaining_frac > 0.25 and new_area_frac < 0.15):
print '%d %s -> %s / %s (%s), %s -> %s / %s (%s), %s -> %s' % (
i, covered_sum, new_sum, total, remaining_frac,
crop_area(crop), crop_area(new_crop), area, new_area_frac,
f1, new_f1)
crop = new_crop
covered_sum = new_sum
del c_info[i]
changed = True
break
if not changed:
break
return crop
def pad_crop(crop, contours, edges, border_contour, pad_px=15):
"""Slightly expand the crop to get full contours.
This will expand to include any contours it currently intersects, but will
not expand past a border.
"""
bx1, by1, bx2, by2 = 0, 0, edges.shape[0], edges.shape[1]
if border_contour is not None and len(border_contour) > 0:
c = props_for_contours([border_contour], edges)[0]
bx1, by1, bx2, by2 = c['x1'] + 5, c['y1'] + 5, c['x2'] - 5, c['y2'] - 5
def crop_in_border(crop):
x1, y1, x2, y2 = crop
x1 = max(x1 - pad_px, bx1)
y1 = max(y1 - pad_px, by1)
x2 = min(x2 + pad_px, bx2)
y2 = min(y2 + pad_px, by2)
return crop
crop = crop_in_border(crop)
c_info = props_for_contours(contours, edges)
changed = False
for c in c_info:
this_crop = c['x1'], c['y1'], c['x2'], c['y2']
this_area = crop_area(this_crop)
int_area = crop_area(intersect_crops(crop, this_crop))
new_crop = crop_in_border(union_crops(crop, this_crop))
if 0 < int_area < this_area and crop != new_crop:
print '%s -> %s' % (str(crop), str(new_crop))
changed = True
crop = new_crop
if changed:
return pad_crop(crop, contours, edges, border_contour, pad_px)
else:
return crop
def downscale_image(im, max_dim=2048):
"""Shrink im until its longest dimension is <= max_dim.
Returns new_image, scale (where scale <= 1).
"""
a, b = im.size
if max(a, b) <= max_dim:
return 1.0, im
scale = 1.0 * max_dim / max(a, b)
new_im = im.resize((int(a * scale), int(b * scale)), Image.ANTIALIAS)
return scale, new_im
def process_image(inputImg):
opnImg = Image.open(inputImg)
scale, im = downscale_image(opnImg)
edges = cv2.Canny(np.asarray(im), 100, 200)
# TODO: dilate image _before_ finding a border. This is crazy sensitive!
contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
borders = find_border_components(contours, edges)
borders.sort(key=lambda (i, x1, y1, x2, y2): (x2 - x1) * (y2 - y1))
border_contour = None
if len(borders):
border_contour = contours[borders[0][0]]
edges = remove_border(border_contour, edges)
edges = 255 * (edges > 0).astype(np.uint8)
# Remove ~1px borders using a rank filter.
maxed_rows = rank_filter(edges, -4, size=(1, 20))
maxed_cols = rank_filter(edges, -4, size=(20, 1))
debordered = np.minimum(np.minimum(edges, maxed_rows), maxed_cols)
edges = debordered
contours = find_components(edges)
if len(contours) == 0:
print '%s -> (no text!)' % path
return
crop = find_optimal_components_subset(contours, edges)
crop = pad_crop(crop, contours, edges, border_contour)
crop = [int(x / scale) for x in crop] # upscale to the original image size.
#draw = ImageDraw.Draw(im)
#c_info = props_for_contours(contours, edges)
#for c in c_info:
# this_crop = c['x1'], c['y1'], c['x2'], c['y2']
# draw.rectangle(this_crop, outline='blue')
#draw.rectangle(crop, outline='red')
#im.save(out_path)
#draw.text((50, 50), path, fill='red')
#orig_im.save(out_path)
#im.show()
text_im = opnImg.crop(crop)
text_im.save('Cropted_and_rotated_image.jpg')
return text_im
'''
text_im.save(out_path)
print '%s -> %s' % (path, out_path)
'''
#Camera capturing stuff:
myCamera = picamera.PiCamera()
myCamera.vflip = True
myCamera.hflip = True
'''
myCamera.start_preview()
time.sleep(6)
myCamera.stop_preview()
'''
myCamera.capture("Captured_Image.png")
#End capturing persidure
imgAddr = '/home/pi/My_examples/Mechanical_display_converter/Example1.jpg'
#imgAddr = "Captured_Image.png"
# construct the argument parse and parse the arguments
#ap = argparse.ArgumentParser()
'''
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
ap.add_argument("-p", "--preprocess", type=str, default="thresh",
help="type of preprocessing to be done")
args = vars(ap.parse_args())
'''
# load the example image and convert it to grayscale
img = cv2.imread(imgAddr)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow('Step1_gray_filter', gray)
'''
# check to see if we should apply thresholding to preprocess the
# image
if args["preprocess"] == "thresh":
gray = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove
# noise
elif args["preprocess"] == "blur":
gray = cv2.medianBlur(gray, 3)
if preprocess == "thresh":
gray = cv2.threshold(gray, 150, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# make a check to see if median blurring should be done to remove
# noise
elif preprocess == "blur":
gray = cv2.medianBlur(gray, 3)
'''
rgb_planes = cv2.split(img)
result_planes = []
result_norm_planes = []
for plane in rgb_planes:
dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
bg_img = cv2.medianBlur(dilated_img, 21)
diff_img = 255 - cv2.absdiff(plane, bg_img)
norm_img = cv2.normalize(diff_img, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
result_planes.append(diff_img)
result_norm_planes.append(norm_img)
result = cv2.merge(result_planes)
result_norm = cv2.merge(result_norm_planes)
cv2.imshow('shadows_out.png', result)
cv2.imshow('shadows_out_norm.png', result_norm)
grayUnShadowedImg = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
cv2.imshow('Shadow_Gray_CVT', grayUnShadowedImg)
ret, threshUnShadowedImg = cv2.threshold(grayUnShadowedImg, 200, 255, cv2.THRESH_BINARY)
cv2.imshow('unShadowed_Thresh_filtering', threshUnShadowedImg)
#v2.imwrite('unShadowed_Thresh_filtering.jpg', threshUnShadowedImg)
#croptedunShadowedImg = process_image('unShadowed_Thresh_filtering.jpg')
adptThreshUnShadowedImg = cv2.adaptiveThreshold(grayUnShadowedImg, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 1)
cv2.imshow('unShadowed_Adaptive_Thresh_filtering', adptThreshUnShadowedImg)
'''
blurFImg = cv2.GaussianBlur(adptThreshUnShadowedImg,(25,25), 0)
ret, f3Img = cv2.threshold(blurFImg,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
cv2.imshow('f3Img', f3Img )
'''
#OCR Stage:
'''
# write the grayscale image to disk as a temporary file so we can
# apply OCR to it
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, threshImg)
# load the image as a PIL/Pillow image, apply OCR, and then delete
# the temporary file
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
print("\n" + text)
'''
cv2.waitKey(0)
cv2.destroyAllWindows()
Tryed this source out as well but this doesn't seem to work and is not that clear to understand: