I realize I'm pretty late to the party, but after reading the comments from @BrunoLowagie, I wanted to see if I could put something together myself that uses the examples from his linked sample chapter. It's probably overkill, but I put together some code that merges multiple PDFs into a single file that I posted on the Code Review SE site (the post, VB.NET - Error Handling in Generic Class for PDF Merge, contains the full class code). It only merges PDF files right now, but I'm planning on adding methods for additional functionality later.
The "master" method (towards the end of the Class
block in the linked post, and also posted below for reference) handles the actual merging of the PDF files, but the multiple overloads provide a number of options for how to define the list of original files. So far, I've included the following features:
- The methods return a
System.IO.FileInfo
object if the merge is successful.
- Provide a
System.IO.DirectoryInfo
object or a System.String
identifying a path and it will collect all PDF files in that directory (including sub-directories if specified) to merge.
- Provide a
List(Of System.String)
or a List(Of System.IO.FileInfo)
specifying the PDFs you want to merge.
- Identify how the PDFs should be sorted before the merge (especially useful if you use one of the
MergeAll
methods to get all PDF files in a directory).
- If the specified output PDF file already exists, you can specify whether or not you want to overwrite it. (I'm considering adding the "ability" to automatically adjust the output PDF file's name if it already exists).
Warning
and Error
properties provide a way to get feedback in the calling method, whether or not the merge is successful.
Once the code is in place, it can be used like this:
Dim PDFDir As New IO.DirectoryInfo("C:\Test Data\PDF\")
Dim ResultFile As IO.FileInfo = Nothing
Dim Merger As New PDFManipulator
ResultFile = Merger.MergeAll(PDFDir, "C:\Test Data\PDF\Merged.pdf", True, PDFManipulator.PDFMergeSortOrder.FileName, True)
Here is the "master" method. As I said, it's probably overkill (and I'm still tweaking it some), but I wanted to do my best to try to make it work as effectively as possible. Obviously it requires a Reference to the itextsharp.dll
for access to the library's functions.
I've commented out the references to the Error
and Warning
properties of the class for this post to help reduce any confusion.
Public Function Merge(ByVal PDFFiles As List(Of System.IO.FileInfo), ByVal OutputFileName As String, ByVal OverwriteExistingPDF As Boolean, ByVal SortOrder As PDFMergeSortOrder) As System.IO.FileInfo
Dim ResultFile As System.IO.FileInfo = Nothing
Dim ContinueMerge As Boolean = True
If OverwriteExistingPDF Then
If System.IO.File.Exists(OutputFileName) Then
Try
System.IO.File.Delete(OutputFileName)
Catch ex As Exception
ContinueMerge = False
'If Errors Is Nothing Then
' Errors = New List(Of String)
'End If
'Errors.Add("Could not delete existing output file.")
Throw
End Try
End If
End If
If ContinueMerge Then
Dim OutputPDF As iTextSharp.text.Document = Nothing
Dim Copier As iTextSharp.text.pdf.PdfCopy = Nothing
Dim PDFStream As System.IO.FileStream = Nothing
Dim SortedList As New List(Of System.IO.FileInfo)
Try
Select Case SortOrder
Case PDFMergeSortOrder.Original
SortedList = PDFFiles
Case PDFMergeSortOrder.FileDate
SortedList = PDFFiles.OrderBy(Function(f As System.IO.FileInfo) f.LastWriteTime).ToList
Case PDFMergeSortOrder.FileName
SortedList = PDFFiles.OrderBy(Function(f As System.IO.FileInfo) f.Name).ToList
Case PDFMergeSortOrder.FileNameWithDirectory
SortedList = PDFFiles.OrderBy(Function(f As System.IO.FileInfo) f.FullName).ToList
End Select
If Not IO.Directory.Exists(New IO.FileInfo(OutputFileName).DirectoryName) Then
Try
IO.Directory.CreateDirectory(New IO.FileInfo(OutputFileName).DirectoryName)
Catch ex As Exception
ContinueMerge = False
'If Errors Is Nothing Then
' Errors = New List(Of String)
'End If
'Errors.Add("Could not create output directory.")
Throw
End Try
End If
If ContinueMerge Then
OutputPDF = New iTextSharp.text.Document
PDFStream = New System.IO.FileStream(OutputFileName, System.IO.FileMode.OpenOrCreate)
Copier = New iTextSharp.text.pdf.PdfCopy(OutputPDF, PDFStream)
OutputPDF.Open()
For Each PDF As System.IO.FileInfo In SortedList
If ContinueMerge Then
Dim InputReader As iTextSharp.text.pdf.PdfReader = Nothing
Try
InputReader = New iTextSharp.text.pdf.PdfReader(PDF.FullName)
For page As Integer = 1 To InputReader.NumberOfPages
Copier.AddPage(Copier.GetImportedPage(InputReader, page))
Next page
If InputReader.IsRebuilt Then
'If Warnings Is Nothing Then
' Warnings = New List(Of String)
'End If
'Warnings.Add("Damaged PDF: " & PDF.FullName & " repaired and successfully merged into output file.")
End If
Catch InvalidEx As iTextSharp.text.exceptions.InvalidPdfException
'Skip this file
'If Errors Is Nothing Then
' Errors = New List(Of String)
'End If
'Errors.Add("Invalid PDF: " & PDF.FullName & " not merged into output file.")
Catch FormatEx As iTextSharp.text.pdf.BadPdfFormatException
'Skip this file
'If Errors Is Nothing Then
' Errors = New List(Of String)
'End If
'Errors.Add("Bad PDF Format: " & PDF.FullName & " not merged into output file.")
Catch PassworddEx As iTextSharp.text.exceptions.BadPasswordException
'Skip this file
'If Errors Is Nothing Then
' Errors = New List(Of String)
'End If
'Errors.Add("Password-protected PDF: " & PDF.FullName & " not merged into output file.")
Catch OtherEx As Exception
ContinueMerge = False
Finally
If Not InputReader Is Nothing Then
InputReader.Close()
InputReader.Dispose()
End If
End Try
End If
Next PDF
End If
Catch ex As iTextSharp.text.pdf.PdfException
ResultFile = Nothing
ContinueMerge = False
'If Errors Is Nothing Then
' Errors = New List(Of String)
'End If
'Errors.Add("iTextSharp Error: " & ex.Message)
If System.IO.File.Exists(OutputFileName) Then
If Not OutputPDF Is Nothing Then
OutputPDF.Close()
OutputPDF.Dispose()
End If
If Not PDFStream Is Nothing Then
PDFStream.Close()
PDFStream.Dispose()
End If
If Not Copier Is Nothing Then
Copier.Close()
Copier.Dispose()
End If
System.IO.File.Delete(OutputFileName)
End If
Throw
Catch other As Exception
ResultFile = Nothing
ContinueMerge = False
'If Errors Is Nothing Then
' Errors = New List(Of String)
'End If
'Errors.Add("General Error: " & other.Message)
If System.IO.File.Exists(OutputFileName) Then
If Not OutputPDF Is Nothing Then
OutputPDF.Close()
OutputPDF.Dispose()
End If
If Not PDFStream Is Nothing Then
PDFStream.Close()
PDFStream.Dispose()
End If
If Not Copier Is Nothing Then
Copier.Close()
Copier.Dispose()
End If
System.IO.File.Delete(OutputFileName)
End If
Throw
Finally
If Not OutputPDF Is Nothing Then
OutputPDF.Close()
OutputPDF.Dispose()
End If
If Not PDFStream Is Nothing Then
PDFStream.Close()
PDFStream.Dispose()
End If
If Not Copier Is Nothing Then
Copier.Close()
Copier.Dispose()
End If
If System.IO.File.Exists(OutputFileName) Then
If ContinueMerge Then
ResultFile = New System.IO.FileInfo(OutputFileName)
If ResultFile.Length <= 0 Then
ResultFile = Nothing
Try
System.IO.File.Delete(OutputFileName)
Catch ex As Exception
Throw
End Try
End If
Else
ResultFile = Nothing
Try
System.IO.File.Delete(OutputFileName)
Catch ex As Exception
Throw
End Try
End If
Else
ResultFile = Nothing
End If
End Try
End If
Return ResultFile
End Function