How can I detect text area in an image?

Question

This is code frome Detect text area in an image using python and opencv

This my picture

import cv2
from google.colab.patches import cv2_imshow

# Load image, grayscale, Gaussian blur, adaptive threshold
image = cv2.imread('/content/drive/MyDrive/project/test/R1/a_5.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,17,23)

# Dilate to combine adjacent text contours
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations=1)

# Find contours, highlight text areas, and extract ROIs
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]

ROI_number = 0
for c in cnts:
    area = cv2.contourArea(c)
    if area > 10000:
        x,y,w,h = cv2.boundingRect(c)
        cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 3)
        # ROI = image[y:y+h, x:x+w]
        # cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
        # ROI_number += 1

cv2_imshow(thresh)
cv2_imshow(dilate)
cv2_imshow(image)
cv2.waitKey()

The result is

But I want it cover all my word like this

please Help me to complete this!

TanjiroLL · Accepted Answer · 2023-04-22T19:11:45.207

First, you need to lower the threshold to get the last letter. Second, we combine all the rectangles by taking the minimum of xmin, ymin and maximum of xmax, ymax:

ROI_number = 0
xmin, ymin = float('inf'), float('inf')
xmax, ymax = float('-inf'), float('-inf')
hall, wall = 0, 0
for c in cnts:
    area = cv2.contourArea(c)
    if area > 1000:
        x,y,w,h = cv2.boundingRect(c)
        xmin, ymin = min(x, xmin), min(y, ymin)
        xmax, ymax = max(x+w, xmax), max(y+h, ymax)

w = xmax - xmin
h = ymax - ymin
cv2.rectangle(image, (xmin, ymin), (xmin + w, ymin + h), (36,255,12), 3)
        # ROI = image[y:y+h, x:x+w]
        # cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
        # ROI_number += 1

cv2_imshow(thresh)
cv2_imshow(dilate)
cv2_imshow(image)
cv2.waitKey()

Update for two words, we combine rectangles conditioned on y. Note you may need to change thresh to find a good one:

thresh1 = 50
thresh2 = 100
ROI_number = 0
rectangles = []
for c in cnts:
    area = cv2.contourArea(c)
    if area > thresh1:
        x,y,w,h = cv2.boundingRect(c)
        rectangles.append((x, y, w, h))
        # ROI = image[y:y+h, x:x+w]
        # cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
        # ROI_number += 1

rectangles = sorted(rectangles,key=lambda x: x[1])
start = rectangles[0]
yc0 = start[1]+start[3]//2
xmin, ymin, xmax, ymax = start[0], start[1], start[0] +start[2], start[1] + start[3]
for idx, rect in enumerate(rectangles[1:]):
  yc = rect[1] + rect[3]//2
  if abs(yc0 - yc) > thresh2:
    w = xmax - xmin
    h = ymax - ymin
    cv2.rectangle(image, (xmin, ymin), (xmin + w, ymin + h), (36,255,12), 3)
    xmin, ymin, xmax, ymax = rect[0], rect[1], rect[0] +rect[2], rect[1] + rect[3]
    yc0 = yc
  else:
    x, y, w, h = rect
    xmin, ymin = min(x, xmin), min(y, ymin)
    xmax, ymax = max(x+w, xmax), max(y+h, ymax)
w = xmax - xmin
h = ymax - ymin
cv2.rectangle(image, (xmin, ymin), (xmin + w, ymin + h), (36,255,12), 3);


cv2_imshow(thresh)
cv2_imshow(dilate)
cv2_imshow(image)
cv2.waitKey()

this answer amounts to `cv.convexHull`. -- giving each word its own bounding box requires you to decide how far apart lines may be to still be considered one word. — Christoph Rackwitz, Apr 22 '23 at 14:40
`convexHull` is to for connected lines, not for disconnected lines. — TanjiroLL, Apr 22 '23 at 18:45

How can I detect text area in an image?

1 Answers1