0

I'm trying to make it so that when a user enters a word the program will go through the txt file and count all the instances of that word.
I'm using MapReduce and i'm new at it.
I know there is a really simple way to do this and i've been trying to figure that out for a while.

In this code I'm trying to make it so that it would ask for the user input and the program would go through the file and find instances.

I've seen some codes on stack overflow and someone mentioned that setting the configuration to conf.set("userinput","Data") would help somehow.
Also there is some updated way to have the user input.

The if statement in my program is an example of when the user word is entered it only finds that word.

     import java.util.StringTokenizer;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    public class WordCount {

      public static class TokenizerMapper
           extends Mapper<Object, Text, Text, IntWritable>{

        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();


    //So I've seen that this is the correct way of setting it up. 
// However I've heard that there mroe efficeint ways of setting it up as well. 
/*
public void setup(Context context) {
     Configuration config=context.getConfiguration();
     String wordstring=config.get("mapper.word");
     word.setAccessibleHelp(wordstring);
 }
*/


        public void map(Object key, Text value, Context context
                        ) throws IOException, InterruptedException {
          StringTokenizer itr = new StringTokenizer(value.toString());

          while (itr.hasMoreTokens()) {
              if(word=="userinput") {
            word.set(itr.nextToken());
            context.write(word, one);
              }
          }
        }
      }

      public static class IntSumReducer
           extends Reducer<Text,IntWritable,Text,IntWritable> {
        private IntWritable result = new IntWritable();

        public void reduce(Text key, Iterable<IntWritable> values,
                           Context context
                           ) throws IOException, InterruptedException {
          int sum = 0;
          for (IntWritable val : values) {
            sum += val.get();
          }
          result.set(sum);
          context.write(key, result);
        }
      }

      public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();


        Job job = Job.getInstance(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
      }
    }
Phantômaxx
  • 37,901
  • 21
  • 84
  • 115
black
  • 59
  • 2
  • 11

1 Answers1

0

I'm not sure about the setup method, but you pass the input at the command line as an argument.

conf.set("mapper.word",args[0]);
Job job =... 
// Notice you now need 3 arguments to run this 
FileInputFormat.addInputPath(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));

In the mapper or reducer, you can get the string

 Configuration config=context.getConfiguration();
 String wordstring=config.get("mapper.word");

And you need to get the string from the tokenizer before you can compare it. You also need to compare strings, not a string to a text object

String wordstring=config.get("mapper.word");
while (itr.hasMoreTokens()) {
    String token = itr.nextToken();
    if(wordstring.equals(token)) {
        word.set(token);
        context.write(word, one);
   }
OneCricketeer
  • 179,855
  • 19
  • 132
  • 245