4

I am writing a .Net wrapper for Tesseract Ocr and if I use a grayscale image instead of rgb image as an input file to it then results are pretty good.

So I was searching the web for C# solution to convert a Rgb image to grayscale image and I found this code.

This performs 3 operations to increase the accuracy of tesseract.

  1. Resize the image
  2. then convert into grayscale image and remove noise from image

Now this converted image gives almost 90% accurate results.

//Resize

public Bitmap Resize(Bitmap bmp, int newWidth, int newHeight)
{    
    Bitmap temp = (Bitmap)bmp;
    Bitmap bmap = new Bitmap(newWidth, newHeight, temp.PixelFormat);

    double nWidthFactor = (double)temp.Width / (double)newWidth;
    double nHeightFactor = (double)temp.Height / (double)newHeight;

    double fx, fy, nx, ny;
    int cx, cy, fr_x, fr_y;
    Color color1 = new Color();
    Color color2 = new Color();
    Color color3 = new Color();
    Color color4 = new Color();
    byte nRed, nGreen, nBlue;

    byte bp1, bp2;

    for (int x = 0; x < bmap.Width; ++x)
    {
        for (int y = 0; y < bmap.Height; ++y)
        {
            fr_x = (int)Math.Floor(x * nWidthFactor);
            fr_y = (int)Math.Floor(y * nHeightFactor);

            cx = fr_x + 1;
            if (cx >= temp.Width)
                cx = fr_x;

            cy = fr_y + 1;
            if (cy >= temp.Height)
                cy = fr_y;

            fx = x * nWidthFactor - fr_x;
            fy = y * nHeightFactor - fr_y;
            nx = 1.0 - fx;
            ny = 1.0 - fy;

            color1 = temp.GetPixel(fr_x, fr_y);
            color2 = temp.GetPixel(cx, fr_y);
            color3 = temp.GetPixel(fr_x, cy);
            color4 = temp.GetPixel(cx, cy);

            // Blue
            bp1 = (byte)(nx * color1.B + fx * color2.B); 
            bp2 = (byte)(nx * color3.B + fx * color4.B);
            nBlue = (byte)(ny * (double)(bp1) + fy * (double)(bp2));

            // Green
            bp1 = (byte)(nx * color1.G + fx * color2.G);    
            bp2 = (byte)(nx * color3.G + fx * color4.G);    
            nGreen = (byte)(ny * (double)(bp1) + fy * (double)(bp2));

            // Red
            bp1 = (byte)(nx * color1.R + fx * color2.R);   
            bp2 = (byte)(nx * color3.R + fx * color4.R);
            nRed = (byte)(ny * (double)(bp1) + fy * (double)(bp2));

            bmap.SetPixel(x, y, System.Drawing.Color.FromArgb(255, nRed, nGreen, nBlue));
        }
    }

    //here i included the below to functions logic without the for loop to remove repetitive use of for loop but it did not work and taking the same time.
    bmap = SetGrayscale(bmap);
    bmap = RemoveNoise(bmap);

    return bmap;
}

//SetGrayscale
public Bitmap SetGrayscale(Bitmap img)
{
    Bitmap temp = (Bitmap)img;
    Bitmap bmap = (Bitmap)temp.Clone();
    Color c;
    for (int i = 0; i < bmap.Width; i++)
    {
        for (int j = 0; j < bmap.Height; j++)
        {
            c = bmap.GetPixel(i, j);
            byte gray = (byte)(.299 * c.R + .587 * c.G + .114 * c.B);

            bmap.SetPixel(i, j, Color.FromArgb(gray, gray, gray));
        }
    }
    return (Bitmap)bmap.Clone();
}

//RemoveNoise
public Bitmap RemoveNoise(Bitmap bmap)
{    
    for (var x = 0; x < bmap.Width; x++)
    {
        for (var y = 0; y < bmap.Height; y++)
        {
            var pixel = bmap.GetPixel(x, y);
            if (pixel.R < 162 && pixel.G < 162 && pixel.B < 162)
                bmap.SetPixel(x, y, Color.Black);
        }
    }

    for (var x = 0; x < bmap.Width; x++)
    {
        for (var y = 0; y < bmap.Height; y++)
        {
            var pixel = bmap.GetPixel(x, y);
            if (pixel.R > 162 && pixel.G > 162 && pixel.B > 162)
                bmap.SetPixel(x, y, Color.White);
        }
    }
    return bmap;
}

But the problem is it takes lot of time to convert it

So I included SetGrayscale(Bitmap bmap) RemoveNoise(Bitmap bmap) function logic inside the Resize() method to remove repetitive use of for loop

but it did not solve my problem.

Community
  • 1
  • 1

2 Answers2

4

The Bitmap class's GetPixel() and SetPixel() methods are notoriously slow for multiple read/writes. A much faster way to access and set individual pixels in a bitmap is to lock it first.

There's a good example here on how to do that, with a nice class LockedBitmap to wrap around the stranger Marshaling code.

Essentially what it does is use the LockBits() method in the Bitmap class, passing a rectangle for the region of the bitmap you want to lock, and then copy those pixels from its unmanaged memory location to a managed one for easier access.

Here's an example on how you would use that example class with your SetGrayscale() method:

public Bitmap SetGrayscale(Bitmap img)
{
    LockedBitmap lockedBmp = new LockedBitmap(img.Clone());
    lockedBmp.LockBits(); // lock the bits for faster access
    Color c;
    for (int i = 0; i < lockedBmp.Width; i++)
    {
        for (int j = 0; j < lockedBmp.Height; j++)
        {
            c = lockedBmp.GetPixel(i, j);
            byte gray = (byte)(.299 * c.R + .587 * c.G + .114 * c.B);

            lockedBmp.SetPixel(i, j, Color.FromArgb(gray, gray, gray));
        }
    }
    lockedBmp.UnlockBits(); // remember to release resources
    return lockedBmp.Bitmap; // return the bitmap (you don't need to clone it again, that's already been done).
}

This wrapper class has saved me a ridiculous amount of time in bitmap processing. Once you've implemented this in all your methods, preferably only calling LockBits() once, then I'm sure your application's performance will improve tremendously.


I also see that you're cloning the images a lot. This probably doesn't take up as much time as the SetPixel()/GetPixel() thing, but its time can still be significant especially with larger images.

PC Luddite
  • 5,883
  • 6
  • 23
  • 39
1

The easiest way would be to redraw the image onto itself using DrawImage and passing a suitable ColorMatrix. Google for ColorMatrix and gray scale and you'll find a ton of examples, this one for example: http://www.codeproject.com/Articles/3772/ColorMatrix-Basics-Simple-Image-Color-Adjustment

Dan Byström
  • 9,067
  • 5
  • 38
  • 68