How to add background image in pdf using Pymupdf module in python

Question

I am trying to add the background image in pdf using Pymupdf but it is creating a layer between pdf and image as you can see the output.

How can I bypass(remove) the layer between pdf and backround image? please help me regrading this.

This is how I am adding the background image in the pdf here:

import fitz
pdf_name = '3_giberish template.pdf'[enter image description here][1]
doc = fitz.open(pdf_name)
doc = fitz.open(input_file)
#open page first
page = doc.loadPage(0)
background_img_filename = 'background.png'
# insert background image to the full page
full_img_rect = fitz.Rect(0,0,650,792)
#overlay = False add the background image here
page.insertImage(full_img_rect, filename=background_img_filename, overlay=False)
# save doc
doc.save(output_file_path, garbage=4, deflate=True, clean=True)
print("completed")

please get output link from here: https://imgur.com/a/9uryHYP — Prabhat, Nov 18 '21 at 08:53
Do mean you want only the text to be in 'background.png' not the white background ? — manaclan, Nov 18 '21 at 09:26
Hi @manaclan thanks for response, you are right , this is what I want exactly. — Prabhat, Nov 18 '21 at 17:59

score 0 · Answer 1 · answered Nov 19 '21 at 05:44

Hi this answer might not be optimal but I think it would help you.
First you need to convert pdf page to to RGBA image. Then, every white pixel will be converted to be transparent so as to make it disappear when overlay onto the background.
After that, if the background is smaller than text image, scale it bigger for the text to be fitted in.
Finally put the text image on top of the background and export it to pdf.
Bellow are the images I tested.
text.jpg

background.jpg res.png

"""
pip install opencv-python
pip install pymupdf
pip install Pillow
"""

import fitz
import cv2
import numpy as np
from PIL import Image


def pix2np(pix):
    im = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n)
    im = np.ascontiguousarray(im[..., [2, 1, 0]])  # rgb to bgr
    return im

def resize(img,scale_percent):
  width = int(img.shape[1] * scale_percent / 100)
  height = int(img.shape[0] * scale_percent / 100)
  dim = (width, height)
    
  # resize image
  return cv2.resize(img, dim, interpolation = cv2.INTER_AREA)

doc = fitz.open('text.pdf')
# fitz to opencv image
# https://study.marearts.com/2020/04/pdf-to-opencv-as-page-by-page-using.html
for page_num, page in enumerate(doc.pages()):
  mat = fitz.Matrix(1, 1)
  pix = page.get_pixmap(matrix = mat)
  im = pix2np(pix)

  # white border removed and keep the text
  # https://stackoverflow.com/a/49907762/7828101
  gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
  gray = 255*(gray < 128).astype(np.uint8) # invert the text to white
  coords = cv2.findNonZero(gray) # Find all non-zero points (text)
  x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
  rect = im[y:y+h, x:x+w] # Crop the image - note we do this on the original image
  # cv2.imwrite('res.jpg',rect)

  # convert white background to transparent background
  new_img = cv2.cvtColor(rect, cv2.COLOR_BGR2BGRA)
  for i in range(new_img.shape[0]):
    for j in range(new_img.shape[1]):
      pixel = new_img[i,j]
      for k,value in enumerate(pixel):
        if value <250:
          break
        if k == 3:
          new_img[i,j,3] = 0

  # paste result image to background
  background = cv2.imread('background.jpg')
  background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA)
  if background.shape[0] < new_img.shape[0]:
    scale_percent = new_img.shape[0]/background.shape[0]
    background = resize(background,scale_percent)
  if background.shape[1] < new_img.shape[1]:
    scale_percent = new_img.shape[1]/background.shape[1]
    background = resize(background,scale_percent)
  
  y_position = int((background.shape[0] - new_img.shape[0])/2)
  x_position = int((background.shape[1] - new_img.shape[1])/2)

  # Merge two image
  # https://stackoverflow.com/a/14102014/7828101
  alpha_s = new_img[:, :, 3] / 255.0
  alpha_l = 1.0 - alpha_s

  for i in range(0,3):
    new_img_inside_background = background[y_position:y_position + new_img.shape[0],
                x_position:x_position + new_img.shape[1],:]
    background[y_position:y_position + new_img.shape[0],
                x_position:x_position + new_img.shape[1],i] = (alpha_s * new_img[:, :, i] +
                              alpha_l * new_img_inside_background[:,:,i])

  cv2.imwrite('res.png',background)
  background = cv2.cvtColor(background, cv2.COLOR_BGRA2RGB)
  im_pil = Image.fromarray(background)
  im_pil.save('{}_res.pdf'.format(page_num))

How to add background image in pdf using Pymupdf module in python

1 Answers1