I asked this question, but I haven't solve my problem. However, an alternative solution came to my mind. In mapper, number of words is counted, and it can be written to intermediate output with minimum key(so that this value is in head) in cleanup function which runs a the end of the mapper. In the reducer, number of words is calculating by adding values in head. The sample code and a part of its output is available below.
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
import java.util.StringTokenizer;
/**
* Created by tolga on 1/26/16.
*/
public class WordCount {
static enum TestCounters { TEST }
public static class Map extends Mapper<Object, Text, Text, LongWritable> {
private final static LongWritable one = new LongWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
context.write(word, one);
context.getCounter(TestCounters.TEST).increment(1);
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
context.write(new Text("!"),new LongWritable(context.getCounter(TestCounters.TEST).getValue()));
}
}
public static class Reduce extends Reducer<Text, LongWritable, Text, LongWritable> {
public void reduce(Text key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (LongWritable val : values) {
sum += val.get();
}
context.write(key, new LongWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "WordCount");
job.setJarByClass(WordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
Text File:
Turgut Özal University is a private university located in Ankara, Turkey. It was established in 2008 by the Turgut Özal Thought and Action Foundation and is named after former Turkish president Turgut Özal.
Intermediate Output
**! 33**
2008 1
Action 1
Ankara, 1
Foundation 1
It 1
Thought 1
Turgut 1
Turgut 1
Turgut 1
**! 33**
2008 1
Action 1
Ankara, 1
Foundation 1
It 1
Thought 1
Turgut 3