Hi this answer might not be optimal but I think it would help you.
First you need to convert pdf page to to RGBA image. Then, every white pixel will be converted to be transparent so as to make it disappear when overlay onto the background.
After that, if the background is smaller than text image, scale it bigger for the text to be fitted in.
Finally put the text image on top of the background and export it to pdf.
Bellow are the images I tested.
text.jpg

background.jpg
res.png

"""
pip install opencv-python
pip install pymupdf
pip install Pillow
"""
import fitz
import cv2
import numpy as np
from PIL import Image
def pix2np(pix):
im = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n)
im = np.ascontiguousarray(im[..., [2, 1, 0]]) # rgb to bgr
return im
def resize(img,scale_percent):
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
# resize image
return cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
doc = fitz.open('text.pdf')
# fitz to opencv image
# https://study.marearts.com/2020/04/pdf-to-opencv-as-page-by-page-using.html
for page_num, page in enumerate(doc.pages()):
mat = fitz.Matrix(1, 1)
pix = page.get_pixmap(matrix = mat)
im = pix2np(pix)
# white border removed and keep the text
# https://stackoverflow.com/a/49907762/7828101
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
gray = 255*(gray < 128).astype(np.uint8) # invert the text to white
coords = cv2.findNonZero(gray) # Find all non-zero points (text)
x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
rect = im[y:y+h, x:x+w] # Crop the image - note we do this on the original image
# cv2.imwrite('res.jpg',rect)
# convert white background to transparent background
new_img = cv2.cvtColor(rect, cv2.COLOR_BGR2BGRA)
for i in range(new_img.shape[0]):
for j in range(new_img.shape[1]):
pixel = new_img[i,j]
for k,value in enumerate(pixel):
if value <250:
break
if k == 3:
new_img[i,j,3] = 0
# paste result image to background
background = cv2.imread('background.jpg')
background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA)
if background.shape[0] < new_img.shape[0]:
scale_percent = new_img.shape[0]/background.shape[0]
background = resize(background,scale_percent)
if background.shape[1] < new_img.shape[1]:
scale_percent = new_img.shape[1]/background.shape[1]
background = resize(background,scale_percent)
y_position = int((background.shape[0] - new_img.shape[0])/2)
x_position = int((background.shape[1] - new_img.shape[1])/2)
# Merge two image
# https://stackoverflow.com/a/14102014/7828101
alpha_s = new_img[:, :, 3] / 255.0
alpha_l = 1.0 - alpha_s
for i in range(0,3):
new_img_inside_background = background[y_position:y_position + new_img.shape[0],
x_position:x_position + new_img.shape[1],:]
background[y_position:y_position + new_img.shape[0],
x_position:x_position + new_img.shape[1],i] = (alpha_s * new_img[:, :, i] +
alpha_l * new_img_inside_background[:,:,i])
cv2.imwrite('res.png',background)
background = cv2.cvtColor(background, cv2.COLOR_BGRA2RGB)
im_pil = Image.fromarray(background)
im_pil.save('{}_res.pdf'.format(page_num))