I have 2 pdf libraries which I am reading all docs and parsing specific information from. One library processes without issues. THe other library only returns the footer of all the pages as follows: Page 1 of 6Page 2 of 6Page 3 of 6Page4 of 6..... The library which is working has one document with multiple pages.
The following is the pdfreader I am using. Has anyone experienced this behavior before and what is different between the documents and how should I handle the case where footer only is returned.
static string ReadPdfFile(string fileName)
{
string curFile = @fileName;
// Console.WriteLine(curFile);
// Console.WriteLine(File.Exists(curFile) ? "File exists." : "File does not exist.");
StringBuilder text = new StringBuilder();
if (File.Exists(curFile))
{
Console.Error.WriteLine("in: " + fileName);
PdfReader pdfReader = new PdfReader(fileName);
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
currentText =
Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8,
Encoding.Default.GetBytes(currentText)));
text.Append(currentText);
}
pdfReader.Close();
}
return text.ToString();
}