Trying to find coordinates of text in scanned images. Scanned image has many text data need to convert that image data to text and then get the coordinates of text.Coordinates represents boundingboxes such as X,Y Axis,Height and Width where text is
Using Microsoft OCR ProjectOxford Vision
using Microsoft.ProjectOxford.Vision;
using Microsoft.ProjectOxford.Vision.Contract;
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
namespace TextExtraction
{
class Program
{
const string API_key = "<<Key>>";
const string API_location =
"https://westcentralus.api.cognitive.microsoft.com/vision/v1.0";
static void Main(string[] args)
{
string imgToAnalyze = @"C:\Users\abhis\Desktop\image.jpg";
HandwritingExtraction(imgToAnalyze, false);
Console.ReadLine();
}
public static void PrintResults(string[] res)
{
foreach (string r in res)
Console.WriteLine(r);
Console.ReadLine();
}
public static void HandwritingExtraction(string fname, bool wrds)
{
Task.Run(async () =>
{
string[] res = await HandwritingExtractionCore(fname, wrds);
PrintResults(res);
}).Wait();
}
public static async Task<string[]> HandwritingExtractionCore(string fname, bool wrds)
{
VisionServiceClient client = new VisionServiceClient(API_key, API_location);
string[] textres = null;
if (File.Exists(fname))
using (Stream stream = File.OpenRead(fname))
{
HandwritingRecognitionOperation op = await
client.CreateHandwritingRecognitionOperationAsync(stream);
HandwritingRecognitionOperationResult res = await
client.GetHandwritingRecognitionOperationResultAsync(op);
textres = GetExtracted(res, wrds);
}
return textres;
}
public static string[] GetExtracted(HandwritingRecognitionOperationResult res, bool wrds)
{
List<string> items = new List<string>();
foreach (HandwritingTextLine l in res.RecognitionResult.Lines)
if (wrds)
items.AddRange(GetWords(l));
else
items.Add(GetLineAsString(l));
return items.ToArray();
}
public static List<string> GetWords(HandwritingTextLine line)
{
List<string> words = new List<string>();
foreach (HandwritingTextWord w in line.Words)
words.Add(w.Text);
return words;
}
public static string GetLineAsString(HandwritingTextLine line)
{
List<string> words = GetWords(line);
return words.Count > 0 ? string.Join(" ", words) : string.Empty;
}
}
}
Expected Output : Get Text with their respective coordinates(x,y,height,width)
Json output
{ "status": "Succeeded", "succeeded": true, "failed": false, "finished": true, "recognitionResults": [ { "page": 1, "clockwiseOrientation": 359.62, "width": 505, "height": 399, "unit": "pixel", "lines": [ { "boundingBox": [ 224, 58, 380, 57, 381, 74, 225, 75 ], "text": "GOVERNMENT OF INDIA", "words": [ { "boundingBox": [ 229, 59, 321, 58, 320, 75, 229, 75 ], "text": "GOVERNMENT" }, { "boundingBox": [ 324, 58, 341, 58, 341, 75, 323, 75 ], "text": "OF" }, { "boundingBox": [ 344, 58, 381, 58, 381, 75, 344, 75 ], "text": "INDIA" } ] }, { "boundingBox": [ 211, 159, 429, 160, 428, 180, 210, 178 ], "text": "FH faPet/ DOB: 27/07/1982", "words": [ { "boundingBox": [ 225, 160, 243, 160, 243, 179, 225, 179 ], "text": "FH" }, { "boundingBox": [ 247, 160, 286, 160, 286, 179, 247, 179 ], "text": "faPet/" }, { "boundingBox": [ 290, 160, 333, 160, 333, 179, 290, 179 ], "text": "DOB:" }, { "boundingBox": [ 337, 160, 428, 162, 428, 180, 337, 179 ], "text": "27/07/1982" } ] }, { "boundingBox": [ 209, 192, 313, 190, 314, 208, 210, 210 ], "text": "you / MALE", "words": [ { "boundingBox": [ 214, 192, 247, 192, 246, 209, 214, 210 ], "text": "you" }, { "boundingBox": [ 254, 192, 260, 192, 260, 209, 254, 209 ], "text": "/" }, { "boundingBox": [ 264, 192, 314, 192, 313, 208, 263, 209 ], "text": "MALE" } ] }, { "boundingBox": [ 201, 314, 351, 313, 352, 330, 202, 331 ], "text": "66 66 6666 6666", "words": [ { "boundingBox": [ 204, 315, 225, 314, 225, 330, 204, 331 ], "text": "66" }, { "boundingBox": [ 229, 314, 251, 314, 251, 330, 229, 330 ], "text": "66" }, { "boundingBox": [ 255, 314, 301, 314, 301, 330, 255, 330 ], "text": "6666" }, { "boundingBox": [ 307, 314, 352, 314, 351, 331, 306, 330 ], "text": "6666" } ] } ] } ] }