I need to recognize some handwriting on text written with soft pen. Using OpenCV, different thresholding methods, bilateral filtering etc, I get quite good results extracting text from paper. But I also get artifacts from folding:
I cannot change way how paper is handled or photographed before it will be processed. After thresholding same paper looks like this:
I want to remove these artifacts. Biggest trouble for me is situation when some character like "T" happens to be on this line. Horizontal part of "T" may nicely fit to this line.
What I do now: I can detect if there is a standalone line. If something is few pixels tall and very wide, I eliminate it.
I have been reading a lot of information about shadow elimination (because I assume problem is shadow). But they all expect to work in other context - surveillance video feed or image with color background.
Any ideas?
UPDATE:
Was working on ideas based on similar works: http://ivrgwww.epfl.ch/alumni/fredemba/papers/FFICPR06.pdf
Test input
Output of test code:
Source code:
#include "opencv2/opencv.hpp"
using namespace std;
using namespace cv;
int filt1_trackbar=13;
int filt2_trackbar=49;
int filt3_trackbar=6;
int main( int argc, char** argv ) {
Mat src, shadow;
src = imread( argv[1], 1 );
if( !src.data ) {
return -1;
}
Mat histImage1( src.rows, src.cols, CV_8UC3, Scalar(127,127,127) );
Mat histImage2( src.rows, src.cols, CV_8UC3, Scalar(127,127,127) );
int cn = src.channels();
uint8_t* pixelPtr = (uint8_t*)src.data;
for(int i=0 ; i< src.rows;i++) {
for(int j=0 ; j< src.cols;j++) {
Scalar_<uint8_t> bgrPixel;
bgrPixel.val[0] = pixelPtr[i*src.cols*cn + j*cn + 0]; // B
bgrPixel.val[1] = pixelPtr[i*src.cols*cn + j*cn + 1]; // G
bgrPixel.val[2] = pixelPtr[i*src.cols*cn + j*cn + 2]; // R
if(bgrPixel.val[2] !=0 ) { // avoid division by zero
float a= 100.0*(((float)bgrPixel.val[0] / (float)bgrPixel.val[2])); // B/R
float b= 100.0*(((float)bgrPixel.val[1] / (float)bgrPixel.val[2])); // G/R
if(!isinf(a) && !isinf(b)) {
histImage1.at<Vec3b>(i,j)=Vec3b(a,a,a);
histImage2.at<Vec3b>(i,j)=Vec3b(b,b,b);
}
}
}
}
addWeighted(histImage1, 2.0, histImage2, -1.0, 0, shadow);
Mat hsv1,hsv2;
cvtColor(shadow, hsv1, CV_BGR2HSV);
cvtColor(src, hsv2, CV_BGR2HSV);
vector<Mat> channels1;
vector<Mat> channels2;
split(hsv1, channels1);
split(hsv2, channels2);
addWeighted(channels1[2], 0.5, channels2[2], 0.5, 0, channels1[2]);
insertChannel(channels1[2],hsv2,2);
Mat unshadow;
cvtColor(hsv2,unshadow, CV_HSV2BGR);
namedWindow( "src", WINDOW_NORMAL);
namedWindow( "shadow", WINDOW_NORMAL);
namedWindow( "unshadow", WINDOW_NORMAL);
imshow("src", src);
imshow("shadow", shadow);
imshow("unshadow", unshadow);
imwrite("shadow.png", shadow);
imwrite("unshadow.png", unshadow);
waitKey(0);
return 0;
}
It did improve image but not good enough in my opinion. I was impressed it worked at all on such grayscale context. Maybe someone can spot something wrong?