I am processing a large folder with individual images. Each image has multiple channels and needs to be split and saved as a new image per channel. I assumed this would be a candidate problem to tackle with multithreading. However, after timing a conventional for loop and compared it with my multithreading solution, it performed not different at all. Perhaps my code is flawed, or perhaps this problem is not benifited by multithreading? Any insights would be appreciated. My code is below!
from nd2reader import ND2Reader
import threading
import os
import re
from PIL import Image
import time
import warnings
inputdir = r'C:\inputdir'
outputdir = r'C:\outputdir'
def extractDataFromString(input):
delimiters = ',', '_', '.'
regexPattern = '|'.join(map(re.escape, delimiters))
_, _, _, seq, well, time, _ = re.split(regexPattern, input)
return seq, well, time
def executeSave(file,inputdir,outputdir):
with ND2Reader(os.path.join(inputdir, file)) as images:
for i in range(len(images)):
seq, well, time = extractDataFromString(file)
image = Image.fromarray(images[i])
if i == 0:
c = 'C1'
elif i == 1:
c = 'C2'
elif i == 2:
c = 'C3'
elif i == 3:
c = 'C4'
image.save(os.path.join(outputdir, seq + '_' + well + '_' + time + '_' + c + '.tif'), format="TIFF")
class FileThread(threading.Thread):
def __init__(self, file, inputdir, outputdir):
threading.Thread.__init__(self)
self.file = file
self.inputdir = inputdir
self.outputdir = outputdir
def run(self):
self.seq, self.well, self.time = extractDataFromString(self.file)
executeSave(self.file, self.inputdir, self.outputdir)
#threading
threadlist = [(FileThread(file, inputdir=inputdir,outputdir=outputdir)) for file in os.listdir(inputdir)]
t1 = time.time()
[thread.start() for thread in threadlist]
[thread.join() for thread in threadlist]
t2 = time.time()
#conventional for loop
for file in os.listdir(inputdir):
executeSave(file,inputdir,outputdir)
t3 = time.time()
print('threading execution took {} seconds'.format(t2-t1))
print('non threading execution took {} seconds'.format(t3-t2))