I have a loop for finding several sum:
for t in reversed(range(len(inputs))):
dy = np.copy(ps[t])
dy[targets[t]] -= 1
dWhy += np.dot(dy, hs[t].T)
dby += dy
Input value is too big and i must to make it parallel. So I so I converted the loop to a separate function. I've tried to use ThreadPoolExecutor, but result time is slow compared to the sequential algorithm.
That's my minimal working example:
import numpy as np
import concurrent.futures
import time, random
from concurrent.futures import ThreadPoolExecutor
import threading
#parameters
dWhy = np.random.sample(300)
dby = np.random.sample(300)
def Func(ps, targets, hs, t):
global dWhy, dby
dy = np.copy(ps[t])
dWhy += np.dot(dy, hs[t].T)
dby += dy
return dWhy, dby
if __name__ == '__main__':
ps = np.random.sample(100000)
targets = np.random.sample(100000)
hs = np.random.sample(100000)
start = time.time()
for t in range(100000):
dy = np.copy(ps[t])
dWhy += np.dot(dy, hs[t].T)
dby += dy
finish = time.time()
print("One thread: ")
print(finish-start)
dWhy = np.random.sample(300)
dby = np.random.sample(300)
start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
args = ((ps, targets, hs, t) for t in range(100000))
for out1, out2 in executor.map(lambda p: Func(*p), args):
dWhy, dby = out1, out2
finish = time.time()
print("Multithreads time: ")
print(finish-start)
On my PC One thread-time ~ 3s, Multithreads time ~ 1 minute.