0

i am trying to extract image from pdf using this code

#region ExtractImagesFromPDF
    public static void ExtractImagesFromPDF(string sourcePdf, string outputPath)
    {
        // NOTE:  This will only get the first image it finds per page.
        PdfReader pdf = new PdfReader(sourcePdf);
        RandomAccessFileOrArray raf = new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf);

        try
        {
            for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
            {
                PdfDictionary pg = pdf.GetPageN(pageNumber);
                PdfDictionary res =
                    (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
                PdfDictionary xobj =
                    (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
                if (xobj != null)
                {
                    foreach (PdfName name in xobj.Keys)
                    {
                        PdfObject obj = xobj.Get(name);
                        if (obj.IsIndirect())
                        {
                            PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
                            PdfName type =
                                (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
                            if (PdfName.IMAGE.Equals(type))
                            {

                                int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
                                PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
                                PdfStream pdfStrem = (PdfStream)pdfObj;
                                byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);

                                if ((bytes != null))
                                {
                                    using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes))
                                    {
                                        memStream.Position = 0;
                                        System.Drawing.Image img = System.Drawing.Image.FromStream(memStream);
                                        // must save the file while stream is open.
                                        if (!Directory.Exists(outputPath))
                                            Directory.CreateDirectory(outputPath);

                                        string path = Path.Combine(outputPath, String.Format(@"{0}.jpg", pageNumber));
                                        System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1);
                                        parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0);
                                        // GetImageEncoder is found below this method
                                        System.Drawing.Imaging.ImageCodecInfo jpegEncoder = GetImageEncoder("JPEG");
                                        img.Save(path, jpegEncoder, parms);
                                        break;

                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        catch
        {
            throw;
        }
        finally
        {
            pdf.Close();
        }


    }
    #endregion

its all going right but the line

System.Drawing.Image img = System.Drawing.Image.FromStream(memStream);

giving an error “Parameter not valid”

i cant getting whats the problem is the stream is not a image stream then why iTextSharp reading it as image. please anyone help me out

  • 1
    *is the stream is not a image stream then why iTextSharp reading it as image.* - iTextSharp is not reading it as image. It merely returns you stream contents **your code** believes to be some images `System.Drawing.Imaging` can parse. Why don't you use iTextSharp's content parser to extract images for you? – mkl Feb 26 '15 at 11:48
  • Generally speaking. when you [copy code from somewhere](http://stackoverflow.com/questions/802269/extract-images-using-itextsharp), you should probably not copy from the question and instead use one of the answers. Further, you should read every answer, specifically [the one that says that the code makes a giant and usually incorrect assumption about the images](http://stackoverflow.com/a/804392/231316) in the document. – Chris Haas Feb 26 '15 at 14:37

0 Answers0