I have been trying to make simple matrix addition program with MRJob library. I have created this simple program as with a separate mapper and reducer it works fine locally and on Hadoop cluster now i am trying to create this program on a single class (MRJob) library. but i could not find the error i am getting, mapper function is working fine but reducer is not working correctly , I could not find any solution on the internet. Any suggestions would be appreciated.
Here is the input file (input.txt):
a,0,0,10
a,0,1,20
a,0,2,30
a,1,0,40
a,1,1,50
a,1,2,60
a,2,0,70
a,2,1,80
a,2,2,90
b,0,0,1
b,0,1,2
b,0,2,3
b,1,0,4
b,1,1,5
b,1,2,6
b,2,0,7
b,2,1,8
b,2,2,9
Here is the code file:
from mrjob.job import MRJob
class MRWordFrequencyCount(MRJob):
def mapper(self, _, line):
row = line.strip().split(',')
key = row[0]
value = row[1:]
if key == 'a':
yield key, value
elif key == 'b':
yield key, value
def reducer(self, key, value):
A = dict()
B = dict()
if key == 'a':
A[int(value[0]), int(value[1])] = int(value[2])
elif key == 'b':
B[int(value[0]), int(value[1])] = int(value[2])
for row in range(3):
for col in range(3):
yield (row, col), A[(row, col)]+B[(row, col)]
if __name__ == '__main__':
MRWordFrequencyCount.run()
Error I am getting
No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory /tmp/MRJMatrix.ahsan.20220228.212900.538764
Running step 1 of 1...
Error while reading from /tmp/MRJMatrix.ahsan.20220228.212900.538764/step/000/reducer/00000/input:
Traceback (most recent call last):
File "MRJMatrix.py", line 29, in <module>
MRWordFrequencyCount.run()
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/job.py", line 616, in run
cls().execute()
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/job.py", line 687, in execute
self.run_job()
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/job.py", line 636, in run_job
runner.run()
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/runner.py", line 503, in run
self._run()
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/sim.py", line 161, in _run
self._run_step(step, step_num)
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/sim.py", line 170, in _run_step
self._run_streaming_step(step, step_num)
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/sim.py", line 187, in _run_streaming_step
self._run_reducers(step_num, num_reducer_tasks)
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/sim.py", line 289, in _run_reducers
self._run_multiple(
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/sim.py", line 130, in _run_multiple
func()
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/sim.py", line 746, in _run_task
invoke_task(
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/inline.py", line 133, in invoke_task
task.execute()
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/job.py", line 681, in execute
self.run_reducer(self.options.step_num)
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/job.py", line 795, in run_reducer
for k, v in self.reduce_pairs(read_lines(), step_num=step_num):
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/job.py", line 866, in reduce_pairs
for k, v in self._combine_or_reduce_pairs(pairs, 'reducer', step_num):
File "/home/ahsan/.local/lib/python3.8/site-packages/mrjob/job.py", line 889, in _combine_or_reduce_pairs
for k, v in task(key, values) or ():
File "MRJMatrix.py", line 19, in reducer
A[int(value[0]), int(value[1])] = int(value[2])
TypeError: 'generator' object is not subscriptable