Preserving Character ordering during image processing with OpenCv

Question

In the below code, I am trying to pre-process the image and using findContours I am trying to extract each character as an image.

    Mat inImage = Imgcodecs.imread("CaptureMultiLines.jpg", Imgcodecs.CV_LOAD_IMAGE_COLOR);
    Mat destination = new Mat(inImage.rows(), inImage.cols(), inImage.type());
    ImageProcUtils.showImage("initial", inImage);

   // convert to grayscale
    Imgproc.cvtColor(inImage, destination, Imgproc.COLOR_BGR2GRAY);
    ImageProcUtils.showImage("grayscaleimage", destination);

    Mat binImg = new Mat(inImage.rows(), inImage.cols(), inImage.type());

    // binarize the image
    double thresh = Imgproc.threshold(destination, binImg, 127, 255, Imgproc.THRESH_BINARY_INV);
    ImageProcUtils.showImage("Binary Image", binImg);

   // dilation
    Mat dilMat = new Mat(inImage.rows(), inImage.cols(), inImage.type());
    Mat kernel = Mat.ones(2,1, CvType.CV_8U); // able to extract character
    Imgproc.dilate(binImg, dilMat, kernel);

    ImageProcUtils.showImage("Dilated Image", dilMat);

    // find contours
    List<MatOfPoint> contours = new ArrayList<MatOfPoint>(); 
    Mat hierarchy = new Mat();
    Imgproc.findContours(dilMat.clone(), contours, hierarchy, Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_SIMPLE);
    System.out.println();

    ImageProcUtils.showImage("Contours", dilMat);

    CharacterSegmentation inst = new CharacterSegmentation();        

    for (int j = 0; j < contours.size(); j++) {
         Imgproc.drawContours(dilMat, contours, j, new Scalar(255,0,0));
    }

    Mat drawing = Mat.zeros( dilMat.size(), CvType.CV_8UC3 );
    Point centroid = inst.massCenterMatOfPoint2f(submat);

    for( int i = 0; i< contours.size(); i++ )
    {

        Rect box = Imgproc.boundingRect(contours.get(i));

        Mat submat = dilMat.submat(box);

        int[] flattenedArr = inst.flattenAnImage(submat);
        ImageProcUtils.printArray("Contour.."+i, flattenedArr);

        Imgcodecs.imwrite("character-Line"+ i +".jpg", submat);
    }

Below is the image that I am using,

I am able to extract characters successfully, but not able to preserve the order. I want to name each character according to its occurrence in the image. For example, F should be named as Character-0-0 meaning it occured in first row and first column. Similarly, B as Character-2-7

I know there are several similar threads exists like,

Trying to segment characters and save it in order to image files. But contours are being drawn in a different order? OpenCV findContours are not in order How can I sort contours from left to right and top to bottom?

But, still, I don't know how to implement this.

I tried the below Python code too, But still no luck,

import cv2
import numpy as np
image = cv2.imread("D:\\Users\\Downloads\\CaptureMultiLines.jpg")
cv2.imshow('orig',image)
# image = cv2.resize(image_original,None,fx=4, fy=4, interpolation = cv2.INTER_CUBIC)

#grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
# original_resized = cv2.resize(gray, (0,0), fx=.2, fy=.2)
cv2.imshow('gray',gray)
cv2.waitKey(0)

#Remove Salt and pepper noise
saltpep = cv2.fastNlMeansDenoising(gray,None,9,13)
# original_resized = cv2.resize(saltpep, (0,0), fx=.2, fy=.2)
cv2.imshow('Grayscale',saltpep)
cv2.waitKey(0)

#blur
blured = cv2.blur(saltpep,(3,3))
# original_resized = cv2.resize(blured, (0,0), fx=.2, fy=.2)
cv2.imshow('blured',blured)
cv2.waitKey(0)

#binary
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
# original_resized = cv2.resize(thresh, (0,0), fx=.2, fy=.2)
cv2.imshow('Threshold',thresh)
cv2.waitKey(0)

#dilation
kernel = np.ones((5,100), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
# original_resized = cv2.resize(img_dilation, (0,0), fx=.2, fy=.2)
cv2.imshow('dilated',img_dilation)
cv2.waitKey(0)

#find contours
im2,ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[1])

for i, ctr in enumerate(sorted_ctrs):

    # Get bounding box
    x, y, w, h = cv2.boundingRect(ctr)

    # Getting ROI
    roi = image[y:y+h, x:x+w]

# #   show ROI
    cv2.imshow('segment no:' +str(i),roi)
    cv2.waitKey(0)

    cv2.rectangle(image,(x,y),( x + w, y + h ),(90,0,255),2)
    cv2.waitKey(0)

    im = cv2.resize(roi,None,fx=4, fy=4, interpolation = cv2.INTER_CUBIC)
    ret_1,thresh_1 = cv2.threshold(im,127,255,cv2.THRESH_BINARY_INV)
    # original_resized = cv2.resize(thresh, (0,0), fx=.2, fy=.2)
    cv2.imshow('Threshold_1',thresh_1)
    cv2.waitKey(0)

    #dilation
    kernel_1 = np.ones((5,5), np.uint8)
    img_dilation_1 = cv2.dilate(thresh_1, kernel_1, iterations=1)
    # original_resized = cv2.resize(img_dilation, (0,0), fx=.2, fy=.2)
    cv2.imshow('dilatedn_loop_1',img_dilation_1)
    cv2.waitKey(0)

    #find contours
    im,ctrs_1, hier = cv2.findContours(img_dilation_1.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    #sort contours
    sorted_ctrs_1 = sorted(ctrs_1, key=lambda ctr: cv2.boundingRect(ctr)[1])

    for j, ctr_1 in enumerate(sorted_ctrs_1):

        # Get bounding box
        x_1, y_1, w_1, h_1 = cv2.boundingRect(ctr_1)

        # Getting ROI
        roi_1 = image[y_1:y_1+h_1, x_1:x_1+w_1]

        # #   show ROI
        cv2.imshow('Line no: ' + i + "Column no : " + j  +str(j),roi_1)
        cv2.waitKey(0)


# original_resized = cv2.resize(image, (0,0), fx=.2, fy=.2)
# cv2.imshow('marked areas',original_resized)
cv2.imshow('marked areas',image)
cv2.waitKey(0)

Ishara Madhawa · Accepted Answer · 2018-06-04T13:10:54.237

Try this:

import cv2
import numpy as np
image = cv2.imread("D:\\Users\\Downloads\\CaptureMultiLines.jpg")
cv2.imshow('orig',image)
# image = cv2.resize(image_original,None,fx=4, fy=4, interpolation = cv2.INTER_CUBIC)

#grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
# original_resized = cv2.resize(gray, (0,0), fx=.2, fy=.2)
cv2.imshow('gray',gray)
cv2.waitKey(0)

#Remove Salt and pepper noise
saltpep = cv2.fastNlMeansDenoising(gray,None,9,13)
# original_resized = cv2.resize(saltpep, (0,0), fx=.2, fy=.2)
cv2.imshow('Grayscale',saltpep)
cv2.waitKey(0)

#blur
blured = cv2.blur(saltpep,(3,3))
# original_resized = cv2.resize(blured, (0,0), fx=.2, fy=.2)
cv2.imshow('blured',blured)
cv2.waitKey(0)

#binary
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
# original_resized = cv2.resize(thresh, (0,0), fx=.2, fy=.2)
cv2.imshow('Threshold',thresh)
cv2.waitKey(0)

#dilation
kernel = np.ones((5,100), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
# original_resized = cv2.resize(img_dilation, (0,0), fx=.2, fy=.2)
cv2.imshow('dilated',img_dilation)
cv2.waitKey(0)

#find contours
im2,ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[1])

for i, ctr in enumerate(sorted_ctrs):

    # Get bounding box
    x, y, w, h = cv2.boundingRect(ctr)

    # Getting ROI
    roi = image[y:y+h, x:x+w]

# #   show ROI
    cv2.imshow('segment no:' +str(i),roi)
    cv2.waitKey(0)


    im = cv2.resize(roi,None,fx=4, fy=4, interpolation = cv2.INTER_CUBIC)
    ret_1,thresh_1 = cv2.threshold(im,127,255,cv2.THRESH_BINARY_INV)
    # original_resized = cv2.resize(thresh, (0,0), fx=.2, fy=.2)
    cv2.imshow('Threshold_1',thresh_1)
    cv2.waitKey(0)
    thresh_1=cv2.cvtColor(thresh_1, cv2.COLOR_BGR2GRAY);

    #find contours
    im,ctrs_1, hier = cv2.findContours(thresh_1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    #sort contours
    sorted_ctrs_1 = sorted(ctrs_1, key=lambda ctr: cv2.boundingRect(ctr)[0])

    for j, ctr_1 in enumerate(sorted_ctrs_1):

        # Get bounding box
        x_1, y_1, w_1, h_1 = cv2.boundingRect(ctr_1)

        # Getting ROI
        roi_1 = thresh_1[y_1:y_1+h_1, x_1:x_1+w_1]

        # #   show ROI
        cv2.imshow('Line no: ' + str(i) + "Column no : " +str(j),roi_1)
        cv2.waitKey(0)


# original_resized = cv2.resize(image, (0,0), fx=.2, fy=.2)
# cv2.imshow('marked areas',original_resized)
cv2.imshow('marked areas',image)
cv2.waitKey(0)

score 0 · Answer 2 · answered Jun 03 '18 at 11:42

0

This is the code I have used for doing the same thing, but using python:

sorted_contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0] + cv2.boundingRect(ctr)[1] * image.shape[1] )

Hope this will be helpful. This will sort contours according to the x and y coordinates of the character.

answered Jun 03 '18 at 11:42

Ishara Madhawa

3,549
5
24
42

I tried the below code, but no luck `Collections.sort(contourList, new Comparator() { // @Override public int compare(MatOfPoint arg0, MatOfPoint arg1) { Rect r1 = Imgproc.boundingRect(arg0); Rect r2 = Imgproc.boundingRect(arg1); double area1 = r1.area(); double area2 = r2.area(); int retVal = (r1.height < r2.height) ? 1 : 0; return retVal; } })` – Gladiator Jun 03 '18 at 12:42
I have edited the question to add corresponding python code along with the Sorting Contours as per your suggestion, but no luck. Can you please help me – Gladiator Jun 03 '18 at 14:43
Could you get the disordered images using python code? – Ishara Madhawa Jun 03 '18 at 15:09
I'm still getting a disordered character images even with the python code – Gladiator Jun 03 '18 at 17:07
Yes, I checked. You can’t use my line of code since it doesn’t accurate much because it gives results based on height of the character. Best thing you can do is first segment the image into lines. Then sort contours by x coordinates. – Ishara Madhawa Jun 03 '18 at 17:11
Updated the python code to extract each row and then sort contours by X. But facing the below error. Can you please help me? `error: (-210) [Start]FindContours supports only CV_8UC1 images when mode != CV_RETR_FLOODFILL otherwise supports CV_32SC1 images only in function cvStartFindContours_Impl` – Gladiator Jun 04 '18 at 02:00
I posted a new answer, check that out. – Ishara Madhawa Jun 05 '18 at 02:24
It was perfect. But, I am not unable to detect spaces between words, because of which I am not able to reconstruct a meaningful line. Can you please suggest an approach? – Gladiator Jun 05 '18 at 09:47
Yes. After segmenting into sentences you should segment each sentence into words. I tried now. I can provide the solution if you post a new question, since posting a different answer here is a kind of a mess. – Ishara Madhawa Jun 05 '18 at 11:06

Preserving Character ordering during image processing with OpenCv

2 Answers2