When extracting text from a PDF, the exponentials are not kept inline. How would I go about resolving this?
string text = string.Empty;
using (PdfReader reader = new PdfReader(fileLocation))
{
ITextExtractionStrategy strategy;
RenderFilter[] filter = new RenderFilter[1];
for (int page = 2; page < reader.NumberOfPages; page++)
{
RectangleJ mediaBox = reader.GetPageSize(page);
filter[0] = new RegionTextRenderFilter(new RectangleJ(mediaBox.Left, mediaBox.Bottom+60, mediaBox.Right, mediaBox.Top-140));
strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
text += PdfTextExtractor.GetTextFromPage(reader, page, strategy) + "\n\n";
}
}
If the line of text in the PDF is:
The result after the text has been extracted is:
-4 3
2.9 x 10 m
But it should be 2.9^-4 x10^3