I've been working on a easy way to convert PDF to PDF/A. Finally I convert every page of the original PDF to images and I recreate the PDF just using images.
This way I don't care about fonts, forms or any other configuration.
public void usingImages(File pdfFile) {
try (PDDocument docIn = PDDocument.load(pdfFile))
{
try(PDDocument docOut = new PDDocument()) {
PDFRenderer pdfRenderer = new PDFRenderer(docIn);
for (int pageIx = 0; pageIx < docIn.getNumberOfPages(); ++pageIx) {
//convert the input page to img
BufferedImage bim = pdfRenderer.renderImageWithDPI(pageIx, 300, ImageType.RGB);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(bim, "png", baos);
byte[] toByteArray = baos.toByteArray();
//Create new output page
PDPage page = new PDPage();
docOut.addPage(page);
//Insert Image to new page
PDImageXObject pdImage = PDImageXObject.createFromByteArray(docOut, toByteArray, "Pagina_"+String.valueOf(page));
try (PDPageContentStream contentStream = new PDPageContentStream(docOut, page, PDPageContentStream.AppendMode.APPEND, true, true))
{
// contentStream.drawImage(ximage, 20, 20 );
// better method inspired by http://stackoverflow.com/a/22318681/535646
// reduce this value if the image is too large
float width = page.getCropBox().getWidth();
float height = page.getCropBox().getHeight();
float scale = width / pdImage.getWidth();
if (scale > (height / pdImage.getHeight()))
scale = height / pdImage.getHeight();
contentStream.drawImage(pdImage, page.getCropBox().getLowerLeftX(), page.getCropBox().getLowerLeftY(), pdImage.getWidth() * scale, pdImage.getHeight() * scale);
}
}
docOut.save(new File(pdfFile.getAbsolutePath() + ".PDFA.pdf"));
}
} catch (Exception ex) {
Logger.getLogger(PDFtoPDFA.class.getName()).log(Level.SEVERE, null, ex);
}
}