I am planning to use Tesseract OCR in visual studio windows or web application to read tiff/pdf images. The below code i tried, but hand written tiff images not able to read properly, i got junk character. My ultimate aim is each question 4,5,6,7A,7B,7C,7D,7E,7F is selected or not and what is the comments (refer my sample tiff).
Please suggest how to read. The code have used:
reference dll c:\Samples\packages\Tesseract.2.3.0.0\lib\net40\Tesseract.dll
webform1.aspx.cs
protected void submitFile_ServerClick(object sender, EventArgs e)
{
if (imageFile.PostedFile != null && imageFile.PostedFile.ContentLength > 0)
{
// for now just fail hard if there's any error however in a propper app I would expect a full demo.
using (var engine = new TesseractEngine(Server.MapPath(@"~/tessdata"), "eng", EngineMode.Default))
{
// have to load Pix via a bitmap since Pix doesn't support loading a stream.
using (var image = new System.Drawing.Bitmap(imageFile.PostedFile.InputStream))
{
using (var pix = PixConverter.ToPix(image))
{
using (var page = engine.Process(pix))
{
meanConfidenceLabel.InnerText = String.Format("{0:P}", page.GetMeanConfidence());
resultText.InnerText = page.GetText();
}
}
}
}
inputPanel.Visible = false;
resultPanel.Visible = true;
}
}
protected void restartButton_ServerClick(object sender, EventArgs e)
{
resultPanel.Visible = false;
inputPanel.Visible = true;
}