17

I am using OpenCV to prepare images for OCR from an iPhone camera, and I have been having trouble getting the results I need for an accurate OCR scan. Here is the code I am using now.

    cv::cvtColor(cvImage, cvImage, CV_BGR2GRAY);
    cv::medianBlur(cvImage, cvImage, 0);
    cv::adaptiveThreshold(cvImage, cvImage, 255, CV_ADAPTIVE_THRESH_MEAN_C, CV_THRESH_BINARY, 5, 4);

This method takes a bit too long and does not provide me good results. enter image description here enter image description here

Any suggestions on how I could make this more effective? The images are coming from an iPhone camera.

After using Andry's suggestion.

enter image description here

    cv::Mat cvImage = [self cvMatFromUIImage:image];
    cv::Mat res;
    cv::cvtColor(cvImage, cvImage, CV_RGB2GRAY);
    cvImage.convertTo(cvImage,CV_32FC1,1.0/255.0);
    CalcBlockMeanVariance(cvImage,res);
    res=1.0-res;
    res=cvImage+res;
    cv::threshold(res,res, 0.85, 1, cv::THRESH_BINARY);
    cv::resize(res, res, cv::Size(res.cols/2,res.rows/2));
    image = [self UIImageFromCVMat:cvImage];

Method:

void CalcBlockMeanVariance(cv::Mat Img,cv::Mat Res,float blockSide=21) // blockSide - the parameter (set greater for larger font on image)
{
    cv::Mat I;
    Img.convertTo(I,CV_32FC1);
    Res=cv::Mat::zeros(Img.rows/blockSide,Img.cols/blockSide,CV_32FC1);
    cv::Mat inpaintmask;
    cv::Mat patch;
    cv::Mat smallImg;
    cv::Scalar m,s;

    for(int i=0;i<Img.rows-blockSide;i+=blockSide)
    {
        for (int j=0;j<Img.cols-blockSide;j+=blockSide)
        {
             patch=I(cv::Rect(j,i,blockSide,blockSide));
            cv::meanStdDev(patch,m,s);
            if(s[0]>0.01) // Thresholding parameter (set smaller for lower contrast image)
            {
                Res.at<float>(i/blockSide,j/blockSide)=m[0];
            }else
            {
                Res.at<float>(i/blockSide,j/blockSide)=0;
            }
        }
    }

    cv::resize(I,smallImg,Res.size());

    cv::threshold(Res,inpaintmask,0.02,1.0,cv::THRESH_BINARY);

    cv::Mat inpainted;
    smallImg.convertTo(smallImg,CV_8UC1,255);

    inpaintmask.convertTo(inpaintmask,CV_8UC1);
    inpaint(smallImg, inpaintmask, inpainted, 5, cv::INPAINT_TELEA);

    cv::resize(inpainted,Res,Img.size());
    Res.convertTo(Res,CV_32FC1,1.0/255.0);

}

Any idea why I am getting this result? The OCR results are pretty good, but would be better if I could get an image similar to the one you got. I am developing for iOS if that matters. I had to use cvtColor because the method expects a single channel image.

  • 2
    Isn't that third param the radius of the convolution mask? Must be odd, and non-zero. – danh Mar 02 '14 at 00:33
  • Yeah, you're right let me go check out what the default is and try that. EDIT: Tried a few and hardly changed the results, anything else? –  Mar 02 '14 at 00:35
  • change blocksize parameter of adaptive threshold to some higher values, like 25 etc. – Abid Rahman K Mar 02 '14 at 06:55

3 Answers3

19

Here is my result: enter image description here

Here is the code:

#include <iostream>
#include <vector>
#include <stdio.h>
#include <stdarg.h>
#include "opencv2/opencv.hpp"
#include "fstream"
#include "iostream"
using namespace std;
using namespace cv;

//-----------------------------------------------------------------------------------------------------
// 
//-----------------------------------------------------------------------------------------------------
void CalcBlockMeanVariance(Mat& Img,Mat& Res,float blockSide=21) // blockSide - the parameter (set greater for larger font on image)
{
    Mat I;
    Img.convertTo(I,CV_32FC1);
    Res=Mat::zeros(Img.rows/blockSide,Img.cols/blockSide,CV_32FC1);
    Mat inpaintmask;
    Mat patch;
    Mat smallImg;
    Scalar m,s;

    for(int i=0;i<Img.rows-blockSide;i+=blockSide)
    {       
        for (int j=0;j<Img.cols-blockSide;j+=blockSide)
        {
            patch=I(Range(i,i+blockSide+1),Range(j,j+blockSide+1));
            cv::meanStdDev(patch,m,s);
            if(s[0]>0.01) // Thresholding parameter (set smaller for lower contrast image)
            {
                Res.at<float>(i/blockSide,j/blockSide)=m[0];
            }else
            {
                Res.at<float>(i/blockSide,j/blockSide)=0;
            }           
        }
    }

    cv::resize(I,smallImg,Res.size());

    cv::threshold(Res,inpaintmask,0.02,1.0,cv::THRESH_BINARY);

    Mat inpainted;
    smallImg.convertTo(smallImg,CV_8UC1,255);

    inpaintmask.convertTo(inpaintmask,CV_8UC1);
    inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA);

    cv::resize(inpainted,Res,Img.size());
    Res.convertTo(Res,CV_32FC1,1.0/255.0);

}
//-----------------------------------------------------------------------------------------------------
// 
//-----------------------------------------------------------------------------------------------------
int main( int argc, char** argv )
{
    namedWindow("Img");
    namedWindow("Edges");
    //Mat Img=imread("D:\\ImagesForTest\\BookPage.JPG",0);
    Mat Img=imread("Test2.JPG",0);
    Mat res;
    Img.convertTo(Img,CV_32FC1,1.0/255.0);
    CalcBlockMeanVariance(Img,res); 
    res=1.0-res;
    res=Img+res;
    imshow("Img",Img);
    cv::threshold(res,res,0.85,1,cv::THRESH_BINARY);
    cv::resize(res,res,cv::Size(res.cols/2,res.rows/2));
    imwrite("result.jpg",res*255);
    imshow("Edges",res);
    waitKey(0);

    return 0;
}

And Python port:

import cv2 as cv
import numpy as np 

#-----------------------------------------------------------------------------------------------------
# 
#-----------------------------------------------------------------------------------------------------
def CalcBlockMeanVariance(Img,blockSide=21): # blockSide - the parameter (set greater for larger font on image)            
    I=np.float32(Img)/255.0
    Res=np.zeros( shape=(int(Img.shape[0]/blockSide),int(Img.shape[1]/blockSide)),dtype=np.float)

    for i in range(0,Img.shape[0]-blockSide,blockSide):           
        for j in range(0,Img.shape[1]-blockSide,blockSide):        
            patch=I[i:i+blockSide+1,j:j+blockSide+1]
            m,s=cv.meanStdDev(patch)
            if(s[0]>0.001): # Thresholding parameter (set smaller for lower contrast image)
                Res[int(i/blockSide),int(j/blockSide)]=m[0]
            else:            
                Res[int(i/blockSide),int(j/blockSide)]=0

    smallImg=cv.resize(I,(Res.shape[1],Res.shape[0] ) )    
    _,inpaintmask=cv.threshold(Res,0.02,1.0,cv.THRESH_BINARY);    
    smallImg=np.uint8(smallImg*255)    

    inpaintmask=np.uint8(inpaintmask)
    inpainted=cv.inpaint(smallImg, inpaintmask, 5, cv.INPAINT_TELEA)    
    Res=cv.resize(inpainted,(Img.shape[1],Img.shape[0] ) )
    Res=np.float32(Res)/255    
    return Res

#-----------------------------------------------------------------------------------------------------
# 
#-----------------------------------------------------------------------------------------------------

cv.namedWindow("Img")
cv.namedWindow("Edges")
Img=cv.imread("F:\\ImagesForTest\\BookPage.JPG",0)
res=CalcBlockMeanVariance(Img)
res=1.0-res
Img=np.float32(Img)/255
res=Img+res
cv.imshow("Img",Img);
_,res=cv.threshold(res,0.85,1,cv.THRESH_BINARY);
res=cv.resize(res,( int(res.shape[1]/2),int(res.shape[0]/2) ))
cv.imwrite("result.jpg",res*255);
cv.imshow("Edges",res)
cv.waitKey(0)
Andrey Smorodov
  • 10,649
  • 2
  • 35
  • 42
  • 13
    perhaps you should add more explanation for your method and code. – flowfree Mar 02 '14 at 12:24
  • The code mainly based on the paper: https://www.google.ru/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CDQQFjAA&url=http%3A%2F%2Fwww.xrce.xerox.com%2Fcontent%2Fdownload%2F6708%2F51560%2Ffile%2FBinarising-camera-images-for-OCR.pdf&ei=CSoTU_vHH-Sz4AS00YGoDA&usg=AFQjCNEZ19Uj6AmM_untvJRSjQOw3cns-A&sig2=PojmFSuKC5Dw-XqwG6bbnQ&bvm=bv.62286460,d.bGE – Andrey Smorodov Mar 02 '14 at 12:57
  • Thanks man, I'll try to implement this in Obj-C then get back to you. –  Mar 02 '14 at 18:58
  • So I only have one line of code giving me problems and if you could help me fix it that'd be great. `patch=I(NSMakeRange(i,i+blockSide+1),NSMakeRange(j,j+blockSide+1));` No matching function for call to object of type 'cv::Mat' –  Mar 02 '14 at 19:33
  • 1
    Take a look here: http://stackoverflow.com/questions/12781874/smoothing-in-a-particular-area-of-image You can use cv::Rect for extracting patch (take care with rows and cols it not the same order as width and height). – Andrey Smorodov Mar 02 '14 at 19:40
  • 1
    You can replace this line with: patch=I(cv::Rect(j,i,blockSide,blockSide)); – Andrey Smorodov Mar 02 '14 at 19:47
  • Thanks got that fixed, it get an error on the CalcBlockMeanVariance method, do I need to convert it to RGB2GRAY before it? Thats the only way I was able to get it to work, I am also getting some funky results I'll update my questions with my code if you could take a look that would be great. –  Mar 02 '14 at 22:08
  • 2
    Yes, the image must be converted to grayscale. I didn't make this because Mat Img=imread("Test2.JPG",0); loads image in grayscale. – Andrey Smorodov Mar 03 '14 at 06:51
  • 2
    I'm not an expert in iOS programming, but your output image is color image, so I think you miss color to gray conversion somewhere. Check image types with debugger in runtime. It may be also problem with output format image. May be you need convert the result back to BGR before displaying. – Andrey Smorodov Mar 03 '14 at 07:02
  • 1
    Ups! I remember similar problem in other topic. It seems the problem with conversion between Mat and UIImage. UIImage assumes integer type of image element. So convert Mat type to something like CV_8UC3 (don't forget scaling to range 0-255), then convert it to UIImage. – Andrey Smorodov Mar 03 '14 at 07:08
  • Thanks for the response that conversion is giving me a solid black image. `cvImage.convertTo(cvImage,CV_8UC3,1.0/255.0);` Any other suggestions? –  Mar 03 '14 at 07:24
  • 1
    Remove 1.0/255.0 scale factor. – Andrey Smorodov Mar 03 '14 at 07:25
  • Ok so after that OCR is giving me pretty accurate results, still a black box. Which is fine but I'd really like to see the image so I can try to improve upon it. –  Mar 03 '14 at 07:28
  • 1
    Try set scale factor to 255.0. ( cvImage.convertTo(cvImage,CV_8UC3,255.0); ) – Andrey Smorodov Mar 03 '14 at 07:31
  • That worked, however converting it back I think reset it to gray scale, that is all I am getting which is fine! OCR results are quite good thanks for your help. Any other suggestions are still welcome! –  Mar 03 '14 at 07:34
  • Nice code, +1, but facing similar issue here http://stackoverflow.com/questions/22660153/opencv-code-removes-text-color-also-with-the-backgraound-color/22660358?noredirect=1#comment34518024_22660358 – Dilip Manek Apr 01 '14 at 06:09
  • I guess you get green image because of OpenCV <-> iOS convertion method. As I remember (I have not any experience with iOS programming, but I've seen one of implementations of such convertors) it works with integer valued 3-channel images. So you need to do proper conversions. And this method works with 1-channel image, but you show color image. Try to convert output to 3-channel integer valued image before converting to iOS image. – Andrey Smorodov Apr 01 '14 at 13:51
  • Could you provide a way to do this operation with an inverse threshold for white text on a dark BG? Thanks. – Clip May 02 '14 at 19:07
  • The simpliest solution is to invert your image before operation, then invert it after. – Andrey Smorodov May 03 '14 at 17:32
  • The loop in CalcBlockMeanVariance will ignore the regions from last column and last row. I edited your answer. – lmiguelmh Jul 17 '15 at 21:37
  • @AndreySmorodov, have created a new question about your code.. Hope you got time :) http://stackoverflow.com/questions/34191953/binarize-image-return-compile-error – clarkk Dec 10 '15 at 01:00
  • @AndreySmorodov The link you shared to the paper is dead if you might take a look! – Ahmed Hegazy Jan 17 '19 at 15:01
9

JAVA CODE: A long time has passed since this question was made, but I've rewritten this code from C++ to Java in case someone will need it (I needed to use it for developing an app on android studio).

public Bitmap Thresholding(Bitmap bitmap)
{
    Mat imgMat = new Mat();
    Utils.bitmapToMat(bitmap, imgMat);
    imgMat.convertTo(imgMat, CvType.CV_32FC1, 1.0 / 255.0);

    Mat res = CalcBlockMeanVariance(imgMat, 21);
    Core.subtract(new MatOfDouble(1.0), res, res);
    Imgproc.cvtColor( imgMat, imgMat, Imgproc.COLOR_BGRA2BGR);
    Core.add(imgMat, res, res);

    Imgproc.threshold(res, res, 0.85, 1, Imgproc.THRESH_BINARY);

    res.convertTo(res, CvType.CV_8UC1, 255.0);
    Utils.matToBitmap(res, bitmap);

    return bitmap;
}

public Mat CalcBlockMeanVariance (Mat Img, int blockSide)
{
    Mat I = new Mat();
    Mat ResMat;
    Mat inpaintmask = new Mat();
    Mat patch;
    Mat smallImg = new Mat();
    MatOfDouble mean = new MatOfDouble();
    MatOfDouble stddev = new MatOfDouble();

    Img.convertTo(I, CvType.CV_32FC1);
    ResMat = Mat.zeros(Img.rows() / blockSide, Img.cols() / blockSide, CvType.CV_32FC1);

    for (int i = 0; i < Img.rows() - blockSide; i += blockSide)
    {
        for (int j = 0; j < Img.cols() - blockSide; j += blockSide)
        {
            patch = new Mat(I,new Rect(j,i, blockSide, blockSide));
            Core.meanStdDev(patch, mean, stddev);

            if (stddev.get(0,0)[0] > 0.01)
                ResMat.put(i / blockSide, j / blockSide, mean.get(0,0)[0]);
            else
                ResMat.put(i / blockSide, j / blockSide, 0);
        }
    }

    Imgproc.resize(I, smallImg, ResMat.size());
    Imgproc.threshold(ResMat, inpaintmask, 0.02, 1.0, Imgproc.THRESH_BINARY);

    Mat inpainted = new Mat();
    Imgproc.cvtColor(smallImg, smallImg, Imgproc.COLOR_RGBA2BGR);
    smallImg.convertTo(smallImg, CvType.CV_8UC1, 255.0);

    inpaintmask.convertTo(inpaintmask, CvType.CV_8UC1);
    Photo.inpaint(smallImg, inpaintmask, inpainted, 5, Photo.INPAINT_TELEA);

    Imgproc.resize(inpainted, ResMat, Img.size());
    ResMat.convertTo(ResMat, CvType.CV_32FC1, 1.0 / 255.0);

    return ResMat;
}
Dainius Šaltenis
  • 1,644
  • 16
  • 29
  • what version of openCV did you use? When I'm trying to run your snippet my app crashing with Fatal signal 11 (SIGSEGV) error. Do you know why is this happening? – Andrey Mohyla Jan 24 '16 at 03:04
  • 1
    I am using 2.4.8 version of OpenCV. You should search that error code in google for some clues, because code for me works without errors. If you find what causes the error, write that in the comments. – Dainius Šaltenis Jan 24 '16 at 10:02
  • 1
    So I've changed OpenCV to 2.4.8 and now everything works fine) I can't find a line which causes the crash because this error refers to C++ openCV library. – Andrey Mohyla Jan 24 '16 at 13:36
2

As the light is almost in uniform, and the foreground is easily distinguished with the background. So I think just directly threshold (using OTSU) is ok for OCR. (Almost the same with @Andrey's answer in text regions).

enter image description here


OpenCV 3 Code in Python:

#!/usr/bin/python3
# 2018.01.17 16:41:20 CST
import cv2
import numpy as np

img = cv2.imread("ocr.jpg")
gray = cv2.cvtColor(median, cv2.COLOR_BGR2GRAY)
th, threshed = cv2.threshold(gray,127,255, cv2.THRESH_BINARY|cv2.THRESH_OTSU)
print(th)

cv2.imwrite("res.png", threshed)
Kinght 金
  • 17,681
  • 4
  • 60
  • 74