I have below code where I execute 4 commands using subprocess.Popen. I am processing log files using below code. When I process the files sequentially using below code, it works fine. Now I created thread, one for each file for parallelism and bound below function to each thread. But some of them gives me desired output and some throws error.
Code:
def process_log_file(file):
proc= subprocess.Popen(['python27', 'countmapper.py',"C:\\pythonPrograms\\04-03-2014\\17IL\\"+file],cwd="C:\pythonPrograms\\",stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
out, err = proc.communicate()
sortedop= subprocess.Popen(['sort'],cwd="C:\pythonPrograms\\",stdout=subprocess.PIPE,stdin=subprocess.PIPE, stderr=subprocess.STDOUT)
out, err = sortedop.communicate(out)
countReducer= subprocess.Popen(['python27', 'countreducer.py'],cwd="C:\pythonPrograms\\",stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.STDOUT)
out, err = countReducer.communicate(out)
countpostprocesser= subprocess.Popen(['python27', 'countpostprocesser.py'],cwd="C:\pythonPrograms\\",stdin=subprocess.PIPE,stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
out, err = countpostprocesser.communicate(out)
jsondata2=json.loads(out)
fd=open(file+".json","w")
json.dump(jsondata2,fd,sort_keys=True,indent=2)
fd.close()
return
Error Received:
Exception in thread Thread-42:
Traceback (most recent call last):
File "C:\Python27\lib\threading.py", line 810, in __bootstrap_inner
self.run()
File "C:\Python27\lib\threading.py", line 763, in run
self.__target(*self.__args, **self.__kwargs)
File "C:\pythonPrograms\counts_batch_threading.py", line 45, in process_log_file
jsondata2=json.loads(out)
File "C:\Python27\lib\json\__init__.py", line 338, in loads
return _default_decoder.decode(s)
File "C:\Python27\lib\json\decoder.py", line 365, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python27\lib\json\decoder.py", line 383, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
Code used for thread creation:
for file in glob.glob("SAMPLE*.log"):
thread1 = threading.Thread(target=process_log_file,args=(str(file),))
threads.append(thread1)
thread1.start()
# Wait for all threads to complete
for t in threads:
t.join()
Can someone help me on this?