I'm trying to make it so that when a user enters a word the program will go through the txt file and count all the instances of that word.
I'm using MapReduce and i'm new at it.
I know there is a really simple way to do this and i've been trying to figure that out for a while.
In this code I'm trying to make it so that it would ask for the user input and the program would go through the file and find instances.
I've seen some codes on stack overflow and someone mentioned that setting the configuration to conf.set("userinput","Data") would help somehow.
Also there is some updated way to have the user input.
The if statement in my program is an example of when the user word is entered it only finds that word.
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static class TokenizerMapper
extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
//So I've seen that this is the correct way of setting it up.
// However I've heard that there mroe efficeint ways of setting it up as well.
/*
public void setup(Context context) {
Configuration config=context.getConfiguration();
String wordstring=config.get("mapper.word");
word.setAccessibleHelp(wordstring);
}
*/
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
if(word=="userinput") {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
}
public static class IntSumReducer
extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}