I have a PDF with four pages. Two images on the first page, one on the second, and one on the third. When I retrieve the value of the image on the second page or fourth,, I get a negative height. I tried setting it to Absolute as a quick fix but the Y position of the image was still slightly off. Also, the height and positioning on page three was fine.
Update: So far, this only seems to be a problem with PDF's created in Google Docs.
My code to extract the PDF images was taken from this thread Using iText 7, what's the proper way to export a Flate encoded image?.
This is how I access the height
var currentPDFImageInfo = extractedImages[i];
var currentPDFImageMatrix = currentPDFImageInfo.RenderInfo.GetImageCtm();
float pdfImageWidth = currentPDFImageMatrix.Get(iText.Kernel.Geom.Matrix.I11);
How I retrieve the PDF image data
public static List<PDFImageInfo> ExtractImagesFromPDF(string filePath)
{
Reader = new PdfReader(filePath);
Document = new PdfDocument(Reader);
var strategy = new ImageRenderListener();
PdfCanvasProcessor parser = new PdfCanvasProcessor(strategy);
for (int pageNumber = 1; pageNumber <= Document.GetNumberOfPages(); pageNumber++)
{
strategy.CurrentPageNumber = pageNumber;
parser.ProcessPageContent(Document.GetPage(pageNumber));
}
return strategy.ImageInfoList;
}
And of course the Strategy class
public class ImageRenderListener : IEventListener
{
public void EventOccurred(IEventData data, EventType type)
{
if (data is ImageRenderInfo imageData)
{
try
{
if (imageData.GetImage() == null)
{
Console.WriteLine("Image could not be read.");
}
else
{
var pdfImageInfo = new PDFImageInfo(CurrentPageNumber, imageData);
ImageInfoList.Add(pdfImageInfo);
}
}
catch (Exception ex)
{
Console.WriteLine("Image could not be read: {0}.", ex.Message);
}
}
}
public ICollection<EventType> GetSupportedEvents()
{
return null;
}
public int CurrentPageNumber { get; set; }
public List<PDFImageInfo> ImageInfoList { get; set; } = new List<PDFImageInfo>();
}