I want to deskew an image using. To do that I wrote (admittedly with lots of help) a program that:
- transforms image to be a easier to compute (thresh, dilation, etc.)
- draws contours around all objects
- computes four extreme points around the text contours (ignoring anything with a margin)
- draws a rectangle around that area using cv2.minAreaRect
The idea was that cv2.minAreaRect returns the angle as well, which I could use to deskew the image. However, in my case it's –90°.
You can see a sample input image .
You can see the result I get
.
I tested the program on a “clean” image (MS Word Screenshot rotaten ≈ 30° in Gimp) and it gave an identical result.
My code:
import numpy as np
import cv2
import itertools
img = cv2.imread('zuo.png')
imgray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(imgray,64,255,0)
############
kernel = np.ones((2,2),np.uint8)
img_e = cv2.dilate(thresh,kernel,iterations = 1)
# cv2.imwrite("out_eroded.png", img_e)
# http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html
# img_e = thresh
############
imgbw, contours, hierarchy = cv2.findContours(img_e,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
# imgbw, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
margin_distance = 25
def flatten(arr, n = 1):
# print(arr)
ret = list(itertools.chain.from_iterable(arr))
# print(ret)
if n != 1:
return flatten(ret, n - 1)
else:
return ret
# print(list(flatten([[1,2,3],[4,5,6], [7], [8,9]])))
def get_min_max_values(cs, im_y, im_x):
# print(flatten(cs), 1)
# print(im_y, im_x)
min_y = im_y - margin_distance
min_x = im_x - margin_distance
max_y = margin_distance
max_x = margin_distance
for lvl1 in cs:
for lvl2 in lvl1:
x, y = lvl2[0]
# x = im_x - x
# y = im_y - y
max_y = max(y, max_y) if y + margin_distance < im_y else max_y
max_x = max(x, max_x) if x + margin_distance < im_x else max_x
min_y = min(y, min_y) if y > margin_distance else min_y
min_x = min(x, min_x) if x > margin_distance else min_x
return ((min_y, min_x), (min_y, max_x), (max_y, min_x), (max_y, max_x))
new_rect = get_min_max_values(contours, len(img), len(img[0]))
new_rect = list(map(lambda x: list(x)[::-1], list(new_rect)))
print(new_rect)
rect = cv2.minAreaRect(np.int0(new_rect))
# print(rect)
print(rect)
box = cv2.boxPoints(rect)
box = np.int0(box)
img_out = cv2.drawContours(img, [box], -1, (0,0,255), 5) # -1 = wszystkie kontury
img_out = cv2.drawContours(img, contours, -1, (0,255,0), 3)
cv2.imwrite("out.png", img_out)
Why isn't the rectangle skewed to match the text? I don't see any artifacts that would justify that.