Convert PDF to Image Batch

Question

I am working on a solution where I can convert pdf files to images. I am using the following example from codeproject: http://www.codeproject.com/Articles/317700/Convert-a-PDF-into-a-series-of-images-using-Csharp?msg=4134859#xx4134859xx

now I tried with the following code to generate from more then 1000 pdf files new images:

using Cyotek.GhostScript;
using Cyotek.GhostScript.PdfConversion;
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace RefClass_PDF2Image
{
    class Program
    {
        static void Main(string[] args)
        {
            string outputPath = Properties.Settings.Default.outputPath;
            string pdfPath = Properties.Settings.Default.pdfPath;

            if (!Directory.Exists(outputPath))
            {
                Console.WriteLine("Der angegebene Pfad " + outputPath + " für den Export wurde nicht gefunden. Bitte ändern Sie den Pfad (outputPath) in der App.Config Datei.");
                return;
            }
            else
            {
                Console.WriteLine("Output Pfad: " + outputPath + " gefunden.");
            }

            if (!Directory.Exists(pdfPath))
            {
                Console.WriteLine("Der angegebene Pfad " + pdfPath + " zu den PDF Zeichnungen wurde nicht gefunden. Bitte ändern Sie den Pfad (pdfPath) in der App.Config Datei.");
                return;
            }
            else
            {
                Console.WriteLine("PDF Pfad: " + pdfPath + " gefunden.");
            }


            Pdf2ImageSettings settings = GetPDFSettings();

            DateTime start = DateTime.Now;
            TimeSpan span;

            Console.WriteLine("");
            Console.WriteLine("Extraktion der PDF Zeichnungen wird gestartet: " + start.ToShortTimeString());
            Console.WriteLine("");

            DirectoryInfo diretoryInfo = new DirectoryInfo(pdfPath);
            DirectoryInfo[] directories = diretoryInfo.GetDirectories();

            Console.WriteLine("");
            Console.WriteLine("Es wurden " + directories.Length + " verschiedende Verzeichnisse gefunden.");
            Console.WriteLine("");

            List<string> filenamesPDF = Directory.GetFiles(pdfPath, "*.pdf*", SearchOption.AllDirectories).Select(x => Path.GetFullPath(x)).ToList();
            List<string> filenamesOutput = Directory.GetFiles(outputPath, "*.*", SearchOption.AllDirectories).Select(x => Path.GetFullPath(x)).ToList();

            Console.WriteLine("");
            Console.WriteLine("Es wurden " + filenamesPDF.Count + " verschiedende PDF Zeichnungen gefunden.");
            Console.WriteLine("");

            List<string> newFileNames = new List<string>();
            int cutLength = pdfPath.Length;


            for (int i = 0; i < filenamesPDF.Count; i++)
            {
                string temp = filenamesPDF[i].Remove(0, cutLength);
                temp = outputPath + temp;
                temp = temp.Replace("pdf", "jpg");
                newFileNames.Add(temp);
            }

            for (int i = 0; i < filenamesPDF.Count; i++)
            {
                FileInfo fi = new FileInfo(newFileNames[i]);
                if (!fi.Exists)
                {
                    if (!Directory.Exists(fi.DirectoryName))
                    {
                        Directory.CreateDirectory(fi.DirectoryName);
                    }

                    Bitmap firstPage = new Pdf2Image(filenamesPDF[i], settings).GetImage();
                    firstPage.Save(newFileNames[i], System.Drawing.Imaging.ImageFormat.Jpeg);
                    firstPage.Dispose();
                }

                //if (i % 20 == 0)
                //{
                //  GC.Collect();
                //  GC.WaitForPendingFinalizers();
                //}
            }


            Console.ReadLine();
        }

        private static Pdf2ImageSettings GetPDFSettings()
        {
            Pdf2ImageSettings settings;
            settings = new Pdf2ImageSettings();
            settings.AntiAliasMode = AntiAliasMode.Medium;
            settings.Dpi = 150;
            settings.GridFitMode = GridFitMode.Topological;
            settings.ImageFormat = ImageFormat.Png24;
            settings.TrimMode = PdfTrimMode.CropBox;
            return settings;
        }
    }
}

unfortunately, I always get in the Pdf2Image.cs an out of memory exception. here the code:

public Bitmap GetImage(int pageNumber)
{
  Bitmap result;
  string workFile;

  //if (pageNumber < 1 || pageNumber > this.PageCount)
  //    throw new ArgumentException("Page number is out of bounds", "pageNumber");

  if (pageNumber < 1)
      throw new ArgumentException("Page number is out of bounds", "pageNumber");

  workFile = Path.GetTempFileName();

  try
  {
    this.ConvertPdfPageToImage(workFile, pageNumber);
    using (FileStream stream = new FileStream(workFile, FileMode.Open, FileAccess.Read))
    {
        result = new Bitmap(stream); // --->>> here is the out of memory exception
        stream.Close();
        stream.Dispose();
    }

  }
  finally
  {
    File.Delete(workFile);
  }

  return result;
}

how can I fix that to avoid this exception?

thanks for any help, tro

yes, that's what I do: firstPage.Dispose(); – tro Jun 28 '13 at 10:47 — tro, Jun 28 '13 at 10:47

score 3 · Accepted Answer · answered Jun 28 '13 at 12:35

Don't know if this is worth it for you, but it appears that you can do what you want without having a Bitmap in the middle. PdfToImage has this code in it:

public void ConvertPdfPageToImage(string outputFileName, int pageNumber)
{
  if (pageNumber < 1 || pageNumber > this.PageCount)
    throw new ArgumentException("Page number is out of bounds", "pageNumber");

  using (GhostScriptAPI api = new GhostScriptAPI())
    api.Execute(this.GetConversionArguments(this._pdfFileName, outputFileName, pageNumber, this.PdfPassword, this.Settings));
}

which writes a file for you where you want it. Why not just call that method directly instead of reading the image back in and writing it back out?

great! that was exactly what I'm looking for! thumps up – tro Jun 28 '13 at 14:51 — tro, Jun 28 '13 at 14:51

score 2 · Answer 2 · answered Jun 28 '13 at 10:20

This might not be answering your question directly, but could still be useful: Imagemagick provides a simple way of creating images from pdfs in batch mode

Single pdf file to many jogs:

convert -geometry 1024x768 -density 200 -colorspace RGB test.pdf +adjoin test_%0d.jpg

or if you want to process many pdf files:

mogrify -format jpg -alpha off -density 150 -quality 80 -resize 768 -unsharp 1.5 *.pdf

(The settings should obviously be adapted to your needs :) )

To do this programmatically in C# you could use the .NET imagemagick wrapper http://imagemagick.codeplex.com

score 2 · Answer 3 · answered Jun 28 '13 at 10:43

2

Add using for your resulted bitmap

using (FileStream stream = new FileStream(workFile, FileMode.Open, FileAccess.Read))
using (Bitmap result = new Bitmap(stream))
{
...
}

answered Jun 28 '13 at 10:43

Adam

31
3

This solution looks more elegant than just dispose. – Malhotra Jun 28 '13 at 14:01
**Using** wraps the enclosed block in a try/finally that calls **Dispose** in the finally block. This ensures that **Dispose** will be called even if an exception occurs. [link](http://stackoverflow.com/questions/10984336/net-using-using-blocks-vs-calling-dispose) – Adam Jun 29 '13 at 02:13

Convert PDF to Image Batch

3 Answers3