2

Here is my function to convert PIL image into base64:

# input: single PIL image
def image_to_base64(self, image):
    output_buffer = BytesIO()
    
    now_time = time.time()
    image.save(output_buffer, format='PNG')
    print('--image.save:' + str(time.time()-now_time))
    
    now_time = time.time()
    byte_data = output_buffer.getvalue()
    print('--output_buffer.getvalue:' + str(time.time()-now_time))
    
    now_time = time.time()
    encoded_input_string  = base64.b64encode(byte_data)
    print('--base64.b64encode:' + str(time.time()-now_time))
    
    now_time = time.time()
    input_string = encoded_input_string.decode("utf-8")
    print('--encoded_input_string.decode:' + str(time.time()-now_time))  
                  
    return input_string

My output:

--image.save:1.05138802528

--output_buffer.getvalue:0.000611066818237

--base64.b64encode:0.01047706604

--encoded_input_string.decode:0.0172328948975

As we can see, the function is pathetically slow. How can we improve this?

[Edit]

Ok! Here is the full example

import time
import requests
import base64
from PIL import Image
from io import BytesIO


# input: single PIL image
def image_to_base64(image):
    output_buffer = BytesIO()

    now_time = time.time()
    image.save(output_buffer, format='PNG')
    print('--image.save:' + str(time.time()-now_time))

    now_time = time.time()
    byte_data = output_buffer.getvalue()
    print('--output_buffer.getvalue:' + str(time.time()-now_time))

    now_time = time.time()
    encoded_input_string  = base64.b64encode(byte_data)
    print('--base64.b64encode:' + str(time.time()-now_time))

    now_time = time.time()
    input_string = encoded_input_string.decode("utf-8")
    print('--encoded_input_string.decode:' + str(time.time()-now_time))  

    return input_string

img_url = "https://www.cityscapes-dataset.com/wordpress/wp-content/uploads/2015/07/stuttgart03.png"
response = requests.get(img_url)
img = Image.open(BytesIO(response.content))
input_string = image_to_base64(img)

The bottleneck here is

image.save(output_buffer, format='PNG')

which transform the PIL image into byte. I think it would be nice if I can speed up this step.

Community
  • 1
  • 1
王智寬
  • 415
  • 1
  • 5
  • 17
  • There are faster third-party modules for base64 en- and decoding if you look for them. For performance testing, you really need to provide a reference test image. – martineau Nov 05 '19 at 07:01
  • I found `pyvips` to be 13x faster than PIL in a comparison here... https://stackoverflow.com/a/56718821/2836621 – Mark Setchell Nov 05 '19 at 08:04

2 Answers2

2

As suggested in the comments, I tried pyvips as below:

#!/usr/bin/env python3
import requests
import base64
import numpy as np
from PIL import Image
from io import BytesIO
from cv2 import imencode
import pyvips

def vips_2PNG(image,compression=6):
    # Convert PIL Image to Numpy array
    na = np.array(image)
    height, width, bands = na.shape

    # Convert Numpy array to Vips image
    dtype_to_format = {
       'uint8': 'uchar',
       'int8': 'char',
       'uint16': 'ushort',
       'int16': 'short',
       'uint32': 'uint',
       'int32': 'int',
       'float32': 'float',
       'float64': 'double',
       'complex64': 'complex',
       'complex128': 'dpcomplex',
    }
    linear = na.reshape(width * height * bands)
    vi = pyvips.Image.new_from_memory(linear.data, width, height, bands,dtype_to_format[str(na.dtype)])

    # Save to memory buffer as PNG
    data = vi.write_to_buffer(f".png[compression={compression}]")
    return data

def vips_including_reading_from_disk(image):
    # Load image from disk
    image = pyvips.Image.new_from_file('stuttgart.png', access='sequential')
    # Save to memory buffer as PNG
    data = image.write_to_buffer('.png')
    return data

def faster(image):
    image_arr = np.array(image)
    _, byte_data = imencode('.png', image_arr)        
    return byte_data

def orig(image, faster=True):    
    output_buffer = BytesIO()
    image.save(output_buffer, format='PNG')
    byte_data = output_buffer.getvalue()
    return byte_data

# img_url = "https://www.cityscapes-dataset.com/wordpress/wp-content/uploads/2015/07/stuttgart03.png"
filename = 'stuttgart.png'
img = Image.open(filename)

# r = orig(img)
# print(len(r))
# %timeit r = orig(img)

# r = faster(img)
# print(len(r))
# %timeit r = faster(img)

# r = vips_including_reading_from_disk(filename)
# print(len(r))
# %timeit r = vips_including_reading_from_disk(filename)

# r = vips_2PNG(img,0)
# print(len(r))
# %timeit r = vips_2PNG(img,0)

I was looking at trading off the compression parameter between file size and speed. Here is what I got - I wouldn't compare absolute values, but rather look at the performance relative to each other on my machine:

               Filesize        Time
PIL            1.7MB           1.12s
OpenCV         2.0MB           173ms   <--- COMPARE
vips(comp=0)   6.2MB           66ms
vips(comp=1)   2.0MB           132ms   <--- COMPARE
vips(comp=2)   2.0MB           153ms

I have put arrows next to the ones I would compare.

Mark Setchell
  • 191,897
  • 31
  • 273
  • 432
1

I use cv2.imencode which is 5x faster than before. Here's the code

import time
import requests
import base64
import numpy as np
from PIL import Image
from io import BytesIO
from cv2 import imencode


# input: single PIL image
def image_to_base64(image, faster=True):    
    now_time = time.time()
    if faster:        
        image_arr = np.array(image)
        _, byte_data = imencode('.png', image_arr)        
        print('--imencode: ' + str(time.time()-now_time))
    else:
        output_buffer = BytesIO()
        image.save(output_buffer, format='PNG')
        byte_data = output_buffer.getvalue()
        print('--image.save:' + str(time.time()-now_time))

    now_time = time.time()
    encoded_input_string  = base64.b64encode(byte_data)
    print('--base64.b64encode: ' + str(time.time()-now_time))

    now_time = time.time()
    input_string = encoded_input_string.decode("utf-8")
    print('--encoded_input_string.decode: ' + str(time.time()-now_time))  

    return input_string

img_url = "https://www.cityscapes-dataset.com/wordpress/wp-content/uploads/2015/07/stuttgart03.png"
response = requests.get(img_url)
img = Image.open(BytesIO(response.content))
now_time = time.time()
input_string = image_to_base64(img, faster=True)
print('total: ' + str(time.time()-now_time))

I wonder if there is any solution which can run faster.

王智寬
  • 415
  • 1
  • 5
  • 17