python 2d convolution optimization

Question

I'm interested in image convolution. Here is my code to perform convolutions with a 3x3 kernel. I'm looking for any ideas on how to make it run faster.

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import numpy as np
img = mpimg.imread('benfrank.png')
imgCopy = img.copy()
Width = 1200
Height = 1464
x1 = 0
y1 = 0
cWidth = 3
cHeight = 3
convul = np.array([[0,0,-5],
                  [0,1,0],
                  [-5,0,0]])

summ = convul[2,2]+convul[2,1]+convul[2,0]+convul[1,2]+convul[1,1]+convul[1,0]+convul[0,2]+convul[0,1]+convul[0,0]

def convulute3x3(x,y):
    global convul
    global img,imgCopy, Width, Height, summ
    
    i = x
    j = y
    if(i < 1 or i > Width-2 ):
        return
    elif(j < 1 or j > Height-2 ):
        return
    for c in range(3):
        n11 = img[j-1,i-1,c]*convul[0,0]
        n22 = img[j-1,i,c]*convul[1,0]
        n33 = img[j-1,i+1,c]*convul[2,0]
        n44= img[j,i-1,c]*convul[0,1]
        n55 = img[j,i,c]*convul[1,1]
        n66 = img[j,i+1,c]*convul[2,1]
        n77 = img[j+1,i-1,c]*convul[0,2]
        n88 = img[j+1,i,c]*convul[1,2]
        n99 = img[j+1,i+1,c]*convul[2,2]   
        color = (n11+n22+n33+n44+n55+n66+n77+n88+n99)/summ       
        imgCopy[j,i,c] = color               
for x in img:
    x1=0
    for y in x:
        convulute3x3(x1,y1) 
        x1 = x1+1
    y1 = y1+1
plt.imshow(imgCopy)
plt.show()

Have you looked at [`scipy.signal.convolve2d`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.convolve2d.html)? — Reti43, May 06 '21 at 00:09

Naphat Amundsen · Answer 1 · 2021-05-06T10:42:20.093

As @Reti43 has mentioned in the comments, there already exists libraries to do so, but I suspect you just want to play around with some home made implementations.

I too have been interested in how to implement convolutions manually in Python. Python loops are terribly slow, and if you care about speed you should stay away from pure python loops and instead stick to more vectorized methods.

The best I have so far is to use numpy.lib.stride_tricks.as_strided, which allows you to get very customized views of numpy arrays. I use as_strided to get a sliding window view of the image, then I use np.tensordot to do a "more general matrix multiplication" (docs) with the kernel. Furthermore, numpy 1.20 (iirc) has numpy.lib.stride_tricks.sliding_window_view, which is a little less general version of my code below (as of this date), as it cannot do custom strides.

import numpy as np 
from numpy.lib.stride_tricks import as_strided


def get_sliding_window(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    imgChannels, imgRows, imgCols = x.shape
    _, kernelRows, kernelCols = k.shape
    u = np.array(x.itemsize) # Used to scale stride size, as_astrided wants stride sizes in bits
    return as_strided(x,
        shape=((imgRows-kernelRows)//rowstride+1, (imgCols-kernelCols)//colstride+1, imgChannels, kernelRows, kernelCols), 
        strides=u*(imgCols*rowstride, colstride, imgRows*imgCols, imgCols, 1)
    )


def conv2d(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    """
    Performs 2d convolution on images with arbitrary number of channels where you can
    specify the strides as well. 

    x: np.ndarray, image array of shape (C x N x M), where C is number of channels
    k: np.ndarray, convolution kernel of shape (C x P x Q), where C is number of channels
    rowstride: int, "vertical" step size
    colstride: int, "horizontal" step size
    """
    sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
    return np.tensordot(sliding_window_view, k, axes=3)


x = np.array([
    [[1,1,1,1],
     [1,1,1,1],
     [2,2,2,2],
     [2,2,2,2]], 

    [[1,1,2,2],
     [1,1,2,2],
     [4,4,8,8],
     [4,4,8,8]]
])


k = np.array([
    [[1,1],  
     [1,1]],

    [[1,1],  
     [1,1]]
]) / 8

print(conv2d(x,k,1,1))
#[[1.    1.25  1.5  ]
# [2.    2.625 3.25 ]
# [3.    4.    5.   ]]

print(conv2d(x,k,2,2))
#[[1.  1.5]
# [3.  5. ]]

Bonus

I implemented an ascii visualization thing to sanity check that sliding windows is correct:

import time
def conv2d_asciiviz(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    x = x.copy().astype(object)
    sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
    highlighter = np.vectorize(lambda x: f"\x1b[33m{x}\x1b[0m")
    r = np.full(sliding_window_view.shape[:2], np.nan)
    with np.printoptions(nanstr="", formatter={"all":lambda x: str(x)}):
        for i, row in enumerate(sliding_window_view):
            for j, window in enumerate(row):
                temp = window.copy()
                r[i,j] = np.tensordot(window, k, axes=3)
                window[...] = highlighter(window)
                print(f"\x1b[JChannels:\n{x}\n\nResult:\n{str(r)}\x1b[{x.shape[0]*x.shape[1]+len(r)+4}A")
                window[...] = temp
                time.sleep(0.69)
    print(f"\x1b[{x.shape[0]*x.shape[1]+len(r)+4}B")
    return r

print("Output:\n",conv2d(x,k,1,1))

python 2d convolution optimization

1 Answers1

Bonus

Linked