6

The code snippet below returns a corrupt PDF document however if I return mergedDocument instead it always returns a valid PDF. mergedDocument is based on a PDF file i created using Word, whereas completed document is entirely programmatically generated. The code "works" in that it throws no exceptions. Why is iTextSharp creating a corrupt PDF?

byte[] completedDocument = null;            
using (MemoryStream streamCompleted = new MemoryStream())
{
    using (Document document = new Document())
    {                    
        PdfCopy copy = new PdfCopy(document, streamCompleted);
        document.Open();
        copy.Open();                    

        foreach (var item in eventItems)
        {
            byte[] mergedDocument = null;
            PdfReader reader = new PdfReader(pdfTemplates[item.DataTokens[NotifyTokenType.OrganisationID]]);
            using (MemoryStream streamTemplate = new MemoryStream())
            {
                using (PdfStamper stamper = new PdfStamper(reader, streamTemplate))
                {
                    foreach (var token in item.DataTokens)
                    {
                        if (stamper.AcroFields.Fields.Any(fld => fld.Key == token.Key.ToString()))
                        {
                            stamper.AcroFields.SetField(token.Key.ToString(), token.Value);
                        }
                    }
                    stamper.FormFlattening = true;
                    stamper.Writer.CloseStream = false;
                }

                mergedDocument = new byte[streamTemplate.Length];
                streamTemplate.Position = 0;
                streamTemplate.Read(mergedDocument, 0, (int)streamTemplate.Length);
            }
            reader = new PdfReader(mergedDocument);

            for (int i = 1; i <= reader.NumberOfPages; i++)
            {
                document.SetPageSize(PageSize.A4);
                copy.AddPage(copy.GetImportedPage(reader, i));
            }
        }
        completedDocument = new byte[streamCompleted.Length];
        streamCompleted.Position = 0;
        streamCompleted.Read(completedDocument, 0, (int)streamCompleted.Length);
    }                
}
return completedDocument;
Ben Robinson
  • 21,601
  • 5
  • 62
  • 79

2 Answers2

23

You need to close the document and copy objects to flush the PDF writing buffer. This, however, causes some problems when trying to read the stream into an array. The fix for that is to use the ToArray() method of the MemoryStream which still works on closed streams. The changes I made have comments on them.

        byte[] completedDocument = null;
        using (MemoryStream streamCompleted = new MemoryStream())
        {
            using (Document document = new Document())
            {
                PdfCopy copy = new PdfCopy(document, streamCompleted);
                document.Open();
                copy.Open();

                foreach (var item in eventItems)
                {
                    byte[] mergedDocument = null;
                    PdfReader reader = new PdfReader(pdfTemplates[item.DataTokens[NotifyTokenType.OrganisationID]]);
                    using (MemoryStream streamTemplate = new MemoryStream())
                    {
                        using (PdfStamper stamper = new PdfStamper(reader, streamTemplate))
                        {
                            foreach (var token in item.DataTokens)
                            {
                                if (stamper.AcroFields.Fields.Any(fld => fld.Key == token.Key.ToString()))
                                {
                                    stamper.AcroFields.SetField(token.Key.ToString(), token.Value);
                                }
                            }
                            stamper.FormFlattening = true;
                            stamper.Writer.CloseStream = false;
                        }
                        //Copy the stream's bytes
                        mergedDocument = streamTemplate.ToArray();
                    }
                    reader = new PdfReader(mergedDocument);

                    for (int i = 1; i <= reader.NumberOfPages; i++)
                    {
                        document.SetPageSize(PageSize.A4);
                        copy.AddPage(copy.GetImportedPage(reader, i));
                    }
                    //Close the document and the copy
                    document.Close();
                    copy.Close();
                }
                //ToArray() can operate on closed streams
                completedDocument = streamCompleted.ToArray();
            }
        }
        return completedDocument;
Chris Haas
  • 53,986
  • 12
  • 141
  • 274
  • I would upvote this 1000 times if I could. I was having difficulty merging images and pdfs into a single pdf document, this helped to shed some light on the correct process. – jtiger Apr 10 '15 at 20:22
  • Thank you, putting memory stream to array solved my problem with getting back empty PDF file! – Taurib Oct 26 '17 at 11:56
0

Also make sure your html doesn't contains hr tag while converting html to pdf

hdnEditorText.Value.Replace("\"", "'").Replace("<hr />", "").Replace("<hr/>", "")
panky sharma
  • 2,029
  • 28
  • 45