I am performing DCT(in Raspberry Pi). I've broken the image into 8x8 blocks. Initially I performed DCT in nested for loop (without multithreading). I observed that it takes about 18 seconds for a 512x512 image. But, Here's the code with multi-threads
#!/usr/bin/env python
from __future__ import print_function,division
import time
start_time = time.time()
import cv2
import numpy as np
import sys
import pylab as plt
import threading
import Queue
from numpy import empty,arange,exp,real,imag,pi
from numpy.fft import rfft,irfft
from pprint import pprint
queue = Queue.Queue()
if len(sys.argv)>1:
im = cv2.imread(sys.argv[1])
else :
im = cv2.imread('baboon.jpg')
im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
h, w = im.shape[:2]
DF = np.zeros((h,w))
Nb=8
def dct2(y):
M = y.shape[0]
N = y.shape[1]
a = empty([M,N],float)
b = empty([M,N],float)
for i in range(M):
a[i,:] = dct(y[i,:])
for j in range(N):
b[:,j] = dct(a[:,j])
queue.put(b)
def dct(y):
N = len(y)
y2 = empty(2*N,float)
y2[:N] = y[:]
y2[N:] = y[::-1]
c = rfft(y2)
phi = exp(-1j*pi*arange(N)/(2*N))
return real(phi*c[:N])
def Main():
jobs = []
for row in range(0, h, Nb):
for col in range(0, w, Nb):
f = im[(row):(row+Nb), (col):(col+Nb)]
thread = threading.Thread(target=dct2(f))
jobs.append(thread)
df = queue.get()
DF[row:row+Nb, col:col+Nb] = df
for j in jobs:
j.start()
for j in jobs:
j.join()
if __name__ == "__main__":
Main()
cv2.imwrite('dct_img.jpg', DF)
print("--- %s seconds ---" % (time.time() - start_time))
plt.imshow(DF1, cmap = 'Greys')
plt.show()
cv2.waitKey(0)
cv2.destroyAllWindows()
After using multiple threads, this code take about 25 seconds to get executed. What's wrong? Have I implemented multi-threading wrongly? I want to reduce the time taken to perform DCT as much as possible (1-5 seconds). Any suggestions?
Any other concept or method (I've read post on multiprocessing) that'll significantly reduce my execution and processing time?