0

pdfbox issue

I used pdfbox to extract text from PDF to my richtextbox.

I don't know what's the problem but there are PDF that are good but there are PDF that throws an exception, the exception is:

Object reference not set to an instance of an object.

Here's my code:

using org.pdfbox.pdmodel;
using org.pdfbox.util;

private void pdfButton_Click(object sender, EventArgs e)
{
    OpenFileDialog openFD = new OpenFileDialog();
    openFD.FileName = "";
    openFD.InitialDirectory = "C:\\";
    openFD.Filter = "All PDF Files|*.PDF";
    openFD.Title = "Browse all PDF files";

    if (openFD.ShowDialog() == DialogResult.OK)
    {
        try
        {
            pdf_filename = Path.GetFileNameWithoutExtension(openFD.Filename);
            PDDocument pdfFile = PDDocument.load(openFD.Filename);

            PDFTextStripper pdfStripper = new PDFTextStripper();
            richtextBox1.Text = pdfStripper.getText(pdfFile);
            textBox1.Text = Path.GetFileName(openFD.Filename);
        }
        catch (Exception error)
        {
            MessageBox.Show(error.ToString());
        }
    }
}
John Saunders
  • 160,644
  • 26
  • 247
  • 397
jomsk1e
  • 3,585
  • 7
  • 34
  • 59
  • 2
    At which line do you get the exception? – tomsv Aug 31 '12 at 09:29
  • it seems that pdfFile or pdfStripper are null. At which line does the exception occurs? – codeteq Aug 31 '12 at 09:31
  • the exception was catch by the catch statement – jomsk1e Aug 31 '12 at 09:34
  • What these people want to know is after which line does it jump to the catch? – Terry Aug 31 '12 at 09:38
  • If I remove the try and catch statement the exception occurs on this line: pdfRT.Text = pdfStripper.getText(pdfFile); – jomsk1e Aug 31 '12 at 09:39
  • 1
    Have you debugged this? When the code hits the breakpoint on the line you just giving us, check if all objects in that line are instantiated (pdfRT, pdfStripper and pdfFile) – Terry Aug 31 '12 at 09:42
  • Welcome to Stack Overflow! Almost all cases of NullReferenceException are the same. Please see "[What is a NullReferenceException in .NET?](http://stackoverflow.com/questions/4660142/what-is-a-nullreferenceexception-in-net)" for some hints. – John Saunders Sep 03 '12 at 04:48

1 Answers1

0

I fixed the issue using iTextSharp. This was advised by my co-worker, I changed the PDFBox by iTextSharp.

If someone will have the same issue as me here's the working code:

using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;


private void pdfButton_Click(object sender, EventArgs e)
{
    OpenFileDialog openFD = new OpenFileDialog();
    openFD.FileName = "";
    openFD.InitialDirectory = "C:\\";
    openFD.Filter = "All PDF Files|*.PDF";
    openFD.Title = "Browse all PDF files";

    if (openFD.ShowDialog() == DialogResult.OK)
    {
        try
        {
            pdf_filename = Path.GetFileNameWithoutExtension(openFD.Filename);

            richtextBox1.Text = ReadPdf(openFD.FileName);
            textBox1.Text = Path.GetFileName(openFD.Filename);
        }

        catch (Exception error)
        {
            MessageBox.Show(error.ToString());
        }

    }
}


private string ReadPdf(string filename)
{

    if (!File.Exists(filename)) return string.Empty;

    PdfReader reader = new PdfReader(filename);

    string text = string.Empty;

    for (int page = 1; page <= reader.NumberOfPages; page++)
    {

        text += PdfTextExtractor.GetTextFromPage(reader, page);

    }

    return text;
}
jomsk1e
  • 3,585
  • 7
  • 34
  • 59