The code I'm using is posted below. I'm running on Ubuntu 16.04 and my laptop has a i7 Quad-Core processor. "data" is a matrix that has ~100,000 rows and 4 columns. "eemd" is a computationally expensive function. On my machine, processing all columns take 5 minutes, regardless of whether I do each column in parallel or use Pool.map(), as shown below.
I have seen other examples on this site with blocks of code that I have been able to run and successfully demonstrate Pool.map() shortening the amount of time necessary to run the code by a factor of the number of processes, but that doesn't work for me here and I can't figure out why.
The result is the same whether I use Pool.map() or Pool.imap().
#!/usr/bin/python
import time
from pyeemd import eemd
import numpy as np
import linecache
data = np.loadtxt("test_data.txt")
idx = range(4)
def eemd_sans_multi():
t = time.time()
for i in idx:
eemd(data[:,i])
print("Without multiprocessing...")
print time.time()-t
def eemd_wrapper(idx):
imfs = eemd(data[:,idx])
return imfs
def eemd_with_multi():
import multiprocessing as mp
pool = mp.Pool(processes=4)
t = time.time()
for x in pool.map(eemd_wrapper, idx):
print(x)
print("With multiprocessing...")
print time.time()-t
if __name__ == "__main__":
eemd_sans_multi()
eemd_with_multi()
New Code Based on Dunes' Reply
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
import ctypes
from time import time
from pyeemd import eemd
import numpy as np
import re
import linecache
data = np.loadtxt("test_data.txt",skiprows=8)
headers = re.split(r'\t+',linecache.getline("test_data.txt", 8))
idx = [i for i, x in enumerate(headers) if x.endswith("Z")]
idx = idx[0:2]
print(idx)
def eemd_wrapper(idx):
imfs = eemd(data[:,idx])
return imfs
def main():
print("serial")
start = time()
for i in idx:
eemd_wrapper(i)
end = time()
print("took {} seconds\n".format(end-start))
for executor_class in (ThreadPoolExecutor, ProcessPoolExecutor):
print(executor_class.__name__)
start = time()
# we'll only be using two workers so as to make time comparisons simple
with executor_class(max_workers=2) as executor:
executor.map(eemd_wrapper, idx)
end = time()
print("took {} seconds\n".format(end-start))
if __name__ == '__main__':
main()