loading matplotlib object into reportlab

Question

I'm trying to load a matplotlib object into reportlab. Here is my code:

from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Image
from matplotlib import pyplot as plt

def __get_img_data():
    """
    returns the binary image data of the plot
    """
    img_file = NamedTemporaryFile(delete=False)
    plt.savefig(img_file.name)
    img_data = open(img_file.name + '.png', 'rb').read()
    os.remove(img_file.name)
    os.remove(img_file.name + '.png')
    return img_data

def get_plot():
    # HERE I PLOT SOME STUFF
    img_data = __get_img_data()
    plt.close()
    return img_data

class NumberedCanvas(canvas.Canvas):
    def __init__(self):
        pass

class ReportTemplate:
    def __init__(self):
        pass
    def _header_footer(self, canvas, doc):
        pass

    def get_data(self):
        elements = []
        elements.append('hello')
        ## HERE I WANT TO ADD THE IMAGE
        imgdata = get_plot()
        with open('/tmp/x.png', 'wb') as fh:
            fh.write(imgdata)
        im = Image('/tmp/x.png', width=usable_width, height=usable_width)
        elements.append(im)
        os.remove('/tmp/x.png')
        ######
        doc.build(elements, onFirstPage=self._header_footer,\
                  onLaterPages=self._header_footer,\
                  canvasmaker=NumberedCanvas)
        # blah blah
        return obj

My goal is to insert the plot image into the report. This works fine but I do not want to write to a temporary file. I tried installing PIL because I've read some people doing it with PIL's image library but as soon as I install PIL, I another part of my code breaks due to incompatible Pillow versions.

score 15 · Accepted Answer · edited May 23 '17 at 11:47

pdfrw documentation sucks

The sole reason the pdfrw example discussed in the first answer to this question is a bit klunky is because the pdfrw documentation sucks badly. Due to the sucky doc, that example's author @Larry-Meyn used the vectorpdf extension for rst2pdf as as starting point, and that extension is not really documented either, and has to deal with the quirks of rst2pdf as well as pdfrw (and is more general than you need, in that it can let rst2pdf display an arbitrary rectangle from an arbitray page of a preexisting PDF). It's amazing that Larry managed to make it work at all, and my hat's off to him.

I am perfectly qualified to say this, because I am the author of pdfrw and made a few contributions to rst2pdf, including that vectorpdf extension.

But you probably want to use pdfrw anyway

I wasn't really paying attention to stackoverflow until a month ago, and pdfrw itself languished for a few years, but I'm here now, and I think it would behoove you to take another look at pdfrw, even though the documentation still sucks.

Why? Because if you output to a png file, your image will be rasterized, and if you use pdfrw, it will remain in vector format, which means that it will look nice at any scale.

So I modified your answer's png example

Your png example wasn't quite a complete program -- the parameters to doc.build weren't defined, styles wasn't defined, it was missing a few imports, etc. But it was close enough to garner some intent and get it working.

Edit -- I just noticed that this example was actually a modified version of Larry's example, so that example is still very valuable because it's a bit more full-featured than this in some ways.

After I fixed those issues and got some output, I added an option to be able to use png or pdf, so you can see the difference. The program below will create two different PDF files, and you can compare the results for yourself.

import cStringIO
from matplotlib import pyplot as plt
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer, Image, Flowable
from reportlab.lib.units import inch
from reportlab.lib.styles import getSampleStyleSheet

from pdfrw import PdfReader, PdfDict
from pdfrw.buildxobj import pagexobj
from pdfrw.toreportlab import makerl

styles = getSampleStyleSheet()
style = styles['Normal']

def form_xo_reader(imgdata):
    page, = PdfReader(imgdata).pages
    return pagexobj(page)


class PdfImage(Flowable):
    def __init__(self, img_data, width=200, height=200):
        self.img_width = width
        self.img_height = height
        self.img_data = img_data

    def wrap(self, width, height):
        return self.img_width, self.img_height

    def drawOn(self, canv, x, y, _sW=0):
        if _sW > 0 and hasattr(self, 'hAlign'):
            a = self.hAlign
            if a in ('CENTER', 'CENTRE', TA_CENTER):
                x += 0.5*_sW
            elif a in ('RIGHT', TA_RIGHT):
                x += _sW
            elif a not in ('LEFT', TA_LEFT):
                raise ValueError("Bad hAlign value " + str(a))
        canv.saveState()
        img = self.img_data
        if isinstance(img, PdfDict):
            xscale = self.img_width / img.BBox[2]
            yscale = self.img_height / img.BBox[3]
            canv.translate(x, y)
            canv.scale(xscale, yscale)
            canv.doForm(makerl(canv, img))
        else:
            canv.drawImage(img, x, y, self.img_width, self.img_height)
        canv.restoreState()

def make_report(outfn, use_pdfrw):
    fig = plt.figure(figsize=(4, 3))
    plt.plot([1,2,3,4],[1,4,9,26])
    plt.ylabel('some numbers')
    imgdata = cStringIO.StringIO()
    fig.savefig(imgdata, format='pdf' if use_pdfrw else 'png')
    imgdata.seek(0)
    reader = form_xo_reader if use_pdfrw else ImageReader
    image = reader(imgdata)

    doc = SimpleDocTemplate(outfn)
    style = styles["Normal"]
    story = [Spacer(0, inch)]
    img = PdfImage(image, width=200, height=200)

    for i in range(10):
        bogustext = ("Paragraph number %s. " % i)
        p = Paragraph(bogustext, style)
        story.append(p)
        story.append(Spacer(1,0.2*inch))

    story.append(img)

    for i in range(10):
        bogustext = ("Paragraph number %s. " % i)
        p = Paragraph(bogustext, style)
        story.append(p)
        story.append(Spacer(1,0.2*inch))

    doc.build(story)

make_report("hello_png.pdf", False)
make_report("hello_pdf.pdf", True)

What are the downsides to this approach?

The first obvious downside is that there is now a requirement for pdfrw, but that's available from PyPI.

The next downside is that if you are putting a lot of matplotlib plots into a document, I think this technique will replicate resources such as fonts, because I don't believe that reportlab is smart enough to notice the duplicates.

I believe this problem can be solved by outputting all your plots to different pages of a single PDF. I haven't actually tried that with matplotlib, but pdfrw is perfectly capable of converting each page of an existing pdf to a separate flowable.

So if you have a lot of plots and it's making your final PDF too big, you could look into that, or just try one of the PDF optimizers out there and see if it helps. In any case, that's a different problem for a different day.

That's a pretty good point. The plot axis labels are rendered as text proving that you're right - pdfrw does keep the actual information in the pdf. I was not sure about this. Thanks — max, Aug 17 '15 at 19:43
yep, pdfew docs suck. Creating PDF in Python is in a sorry state ... — oz123, Aug 13 '16 at 08:28
This is nice, what a difference in quality. Thanks for sharing. — Tunn, Jan 09 '18 at 16:06
Many thanks to Patrick for his pdfrw code, his examples (here and for pdfrw) and his kind words about my somewhat complicated code. I noticed that his comment about possibly using matplotlib PdfPages to reduce PDF size when inserting multiple plots, so I added a new answer using this idea base on the example given here. See: https://stackoverflow.com/a/50765404/1902611 — Larry Meyn, Jun 08 '18 at 20:22
Thanks for the code! I'm having some trouble while trying to set the position of an image with the method drawOn. What should I pass to its first argument? — Murilo Sitonio, May 23 '19 at 20:27
drawOn parameters are in points (1/72 inch), and the origin is normally the lower left corner of the document. x values increase from left to right, and y values increase from bottom to top. — Patrick Maupin, May 23 '19 at 21:09
@PatrickMaupin thanks for your answer! But I'm having problems with the canv argument. I made a more complet answer here: https://stackoverflow.com/questions/56293130/how-to-set-the-position-of-an-image-in-reportlab-using-pdfrw If you could help I will be immensely grateful! — Murilo Sitonio, May 24 '19 at 12:56
Tiny note for users of a recent python: import StringIO needs to be replaced with import io, the call to the Image is then via imgdata = io.BytesIO() — Rriskit, Jan 19 '21 at 16:31

score 2 · Answer 2 · edited May 23 '17 at 12:34

I found 2 solutions:

1: using a package called pdfrw: Is there a matplotlib flowable for ReportLab?

2: a simpler cleaner way:

class PdfImage(Flowable):
    def __init__(self, img_data, width=200, height=200):
        self.img_width = width
        self.img_height = height
        self.img_data = img_data

    def wrap(self, width, height):
        return self.img_width, self.img_height

    def drawOn(self, canv, x, y, _sW=0):
        if _sW > 0 and hasattr(self, 'hAlign'):
            a = self.hAlign
            if a in ('CENTER', 'CENTRE', TA_CENTER):
                x += 0.5*_sW
            elif a in ('RIGHT', TA_RIGHT):
                x += _sW
            elif a not in ('LEFT', TA_LEFT):
                raise ValueError("Bad hAlign value " + str(a))
        canv.saveState()
        canv.drawImage(self.img_data, x, y, self.img_width, self.img_height)
        canv.restoreState()


def make_report():
    fig = plt.figure(figsize=(4, 3))
    plt.plot([1,2,3,4],[1,4,9,26])
    plt.ylabel('some numbers')
    imgdata = cStringIO.StringIO()
    fig.savefig(imgdata, format='png')
    imgdata.seek(0)
    image = ImageReader(imgdata)

    doc = SimpleDocTemplate("hello.pdf")
    style = styles["Normal"]
    story = [Spacer(0, inch)]
    img = PdfImage(image, width=200, height=200)

    for i in range(10):
        bogustext = ("Paragraph number %s. " % i)
        p = Paragraph(bogustext, style)
        story.append(p)
        story.append(Spacer(1,0.2*inch))

    story.append(img)

    for i in range(10):
        bogustext = ("Paragraph number %s. " % i)
        p = Paragraph(bogustext, style)
        story.append(p)
        story.append(Spacer(1,0.2*inch))

    doc.build(story, onFirstPage=myFirstPage, onLaterPages=myLaterPages, canvasmaker=PageNumCanvas)

loading matplotlib object into reportlab

2 Answers2

pdfrw documentation sucks

But you probably want to use pdfrw anyway

So I modified your answer's png example

What are the downsides to this approach?

Linked