I am trying to use my other cores in my python program. And the following is the basic structure/logic of my code:
import multiprocessing as mp
import pandas as pd
import gc
def multiprocess_RUN(param):
result = Analysis_Obj.run(param)
return result
class Analysis_Obj():
def __init__(self, filename):
self.DF = pd.read_csv(filename)
return
def run_Analysis(self, param):
# Multi-core option
pool = mp.Pool(processes=1)
run_result = pool.map(multiprocess_RUN, [self, param])
# Normal option
run_result = self.run(param)
return run_result
def run(self, param):
# Let's say I have written a function to count the frequency of 'param' in the target file
result = count(self.DF, param)
return result
if __name__ == "__main__":
files = ['file1.csv', 'file2.csv']
params = [1,2,3,4]
results = []
for i in range(0,len(files)):
analysis = Analysis_Obj(files[i])
for j in range(0,len(params)):
result = analysis.run_Analysis(params[j])
results.append(result)
del result
del analysis
gc.collect()
If I comment out the 'Multi-core option' and run the 'Normal option' everything runs fine. But even if I run the 'Multi-core option' with processes=1
I get a Memory Error
when my for loop starts on the 2nd file. I have deliberately set it up so that I create and delete an Analysis object in each for loop, so that the file that has been processed will be cleared from memory. Clearly this hasn't worked. Advice of how to get around this would be very much appreciated.
Cheers
EDIT:
Here is the error message I have in the terminal:
Exception in thread Thread-7:
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 801, in __bootstrap_inner
self.run()
File "/usr/lib/python2.7/threading.py", line 754, in run
self.__target(*self.__args, **self.__kwargs)
File "/usr/lib/python2.7/multiprocessing/pool.py", line 326, in _handle_workers
pool._maintain_pool()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 230, in _maintain_pool
self._repopulate_pool()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 223, in _repopulate_pool
w.start()
File "/usr/lib/python2.7/multiprocessing/process.py", line 130, in start
self._popen = Popen(self)
File "/usr/lib/python2.7/multiprocessing/forking.py", line 121, in __init__
self.pid = os.fork()
OSError: [Errno 12] Cannot allocate memory