1

I am trying to create a process in .NET to convert a PDF and all it's pages + attachments to PNGs. I am evaluating libraries and came across PDFiumSharp but it is not working for me. Here is my code:

string Inputfile = "input.pdf";
string OutputFolder = "Output";

string fileName = Path.GetFileNameWithoutExtension(Inputfile);

using (PdfDocument doc = new PdfDocument(Inputfile))
{
   for (int i = 0; i < doc.Pages.Count; i++)
   {
      var page = doc.Pages[i];
      using (var bitmap = new PDFiumBitmap((int)page.Width, (int)page.Height, false))
      {
         page.Render(bitmap);
         var targetFile = Path.Combine(OutputFolder, fileName + "_" + i + ".png");
         bitmap.Save(targetFile);
      }
   }
}

When I run this code, I get this exception:

screenshot of exception

Does anyone know how to fix this? Also does PDFiumSharp support extracting PDF attachments? If not, does anyone have any other ideas on how to achieve my goal?

1 Answers1

0

PDFium does not look like it supports extracting PDF attachments. If you want to achieve your goal, then you can take a look at another library that supports both extracting PDF attachments as well as converting PDFs to PNGs.

I am an employee of the LEADTOOLS PDF SDK which you can try out via these 2 nuget packages: https://www.nuget.org/packages/Leadtools.Pdf/

https://www.nuget.org/packages/Leadtools.Document.Sdk/

Here is some code that will convert a PDF + all attachments in the PDF to separate PNGs in an output directory:

SetLicense();
cache = new FileCache { CacheDirectory = "cache" };

List<LEADDocument> documents = new List<LEADDocument>();

if (!Directory.Exists(OutputDir))
   Directory.CreateDirectory(OutputDir);

using var document = DocumentFactory.LoadFromFile("attachments.pdf", new LoadDocumentOptions { Cache = cache, LoadAttachmentsMode = DocumentLoadAttachmentsMode.AsAttachments });

if (document.Pages.Count > 0)
   documents.Add(document);

foreach (var attachment in document.Attachments)
   documents.Add(document.LoadDocumentAttachment(new LoadAttachmentOptions { AttachmentNumber = attachment.AttachmentNumber }));

ConvertDocuments(documents, RasterImageFormat.Png);

And the ConvertDocuments method:

static void ConvertDocuments(IEnumerable<LEADDocument> documents, RasterImageFormat imageFormat)
{
   using var converter = new DocumentConverter();
   using var ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD);
   ocrEngine.Startup(null, null, null, null);
   converter.SetOcrEngineInstance(ocrEngine, false);
   converter.SetDocumentWriterInstance(new DocumentWriter());

   foreach (var document in documents)
   {
      var name = string.IsNullOrEmpty(document.Name) ? "Attachment" : document.Name;
      string outputFile = Path.Combine(OutputDir, $"{name}.{RasterCodecs.GetExtension(imageFormat)}");
      int count = 1;
      while (File.Exists(outputFile))
         outputFile = Path.Combine(OutputDir, $"{name}({count++}).{RasterCodecs.GetExtension(imageFormat)}");

      var jobData = new DocumentConverterJobData
      {
         Document = document,
         Cache = cache,
         DocumentFormat = DocumentFormat.User,
         RasterImageFormat = imageFormat,
         RasterImageBitsPerPixel = 0,
         OutputDocumentFileName = outputFile,
      };
      var job = converter.Jobs.CreateJob(jobData);
      converter.Jobs.RunJob(job);
   }
}
hcham1
  • 1,799
  • 2
  • 16
  • 27