0

Im writing a program for MapSide Join. Im getting error regarding the path of the cache file present in my project.

Mapper Class:

    public class MapSideJoinMapper extends Mapper<LongWritable, Text, Text, Text>{
    private Map<String, String> departmentMap = new HashMap<String, String>();
    private BufferedReader br;
    private String strDeptName = new String();
    private Text txtMapOutputKey = new Text(" ");
    private Text txtMapOutputValue = new Text(" ");

    enum MyCounter {
        RECORD_COUNT,
        FILE_EXISTS,
        FILE_NOT_FOUND,
        OTHER_EXCEPTION
    }

    // This method gets the file from cache and reads it
    protected void setup(Context ctx) throws IOException {
        URI[] cachefiles = ctx.getCacheFiles();
        for(URI eachUri: cachefiles) {
            if(eachUri.toString().equals("department")) {
                ctx.getCounter(MyCounter.FILE_EXISTS).increment(1);
                loadDepartmentsHashMap(eachUri,ctx);
            }
        }
    }

    // This method loads the department file into the memory
    private void loadDepartmentsHashMap(URI eachUri,Context ctx) throws IOException {
        String readLine = new String();
        try {
            br = new BufferedReader(new FileReader(eachUri.toString()));    // Load the cache file and read line by line
            while((readLine = br.readLine()) != null) {
                String[] deptFieldArray = readLine.split("\t");
                departmentMap.put(deptFieldArray[0], deptFieldArray[1]);
            }
        } catch(FileNotFoundException e) {
            e.printStackTrace();
            ctx.getCounter(MyCounter.FILE_NOT_FOUND).increment(1);
        } catch(IOException e) {
            e.printStackTrace();
            ctx.getCounter(MyCounter.OTHER_EXCEPTION).increment(1);
        } finally {
            if(br != null) {
                br.close();
            }
        }
    }

    public void map(LongWritable key, Text value, Context ctx) throws IOException, InterruptedException {
        ctx.getCounter(MyCounter.RECORD_COUNT).increment(1);
        if(value.toString().length() > 0) {
            String[] empAttributes = value.toString().split("\t");
            try {
                strDeptName = departmentMap.get(empAttributes[3].toString()); // This method gets the department for the employees that are present in both the files
            } finally {
                strDeptName = ((strDeptName.equals(null) || strDeptName.equals(""))?"Not-Found":strDeptName);
            }
            txtMapOutputKey.set(empAttributes[0].toString());
            txtMapOutputValue.set(empAttributes[0].toString() + "\t" + 
                                  empAttributes[1].toString() + "\t" + 
                                  empAttributes[2].toString() + "\t" + 
                                  empAttributes[3].toString() + "\t" + 
                                  strDeptName );
        }
        ctx.write(txtMapOutputKey, txtMapOutputValue);
        strDeptName = " ";
    }
}

My Driver class:

public class MapJoinDriver extends Configured implements Tool {
public static void main(String[] args) throws Exception {
    int exitcode = ToolRunner.run(new Configuration(), new MapJoinDriver(), args);
    System.exit(exitcode);
}

@Override
public int run(String[] args) throws Exception {
    if(args.length != 2) {
        System.out.printf("Two parameters are required - <input dir> <output dir>");
        return -1;
    }
    Job job = Job.getInstance(new Configuration());
    job.setJobName("Map side join with Distributed Cache");
    job.addCacheFile(new Path("/MapSideJoin/data/department").toUri());
    job.setJarByClass(MapJoinDriver.class);
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClass(MapSideJoinMapper.class);
    job.setNumReduceTasks(0);
    boolean success = job.waitForCompletion(true);
    return success?0:1;
}
}

The error Im getting: enter image description here

When I execute the jar file in the linux, I am getting the exception "File does not exists". I tried to run it by giving path in different ways. Some I couldn't fix it.

Below is my project's hierarchy enter image description here

Can anyone tell me what is the mistake I'm doing with the file path ? My question is not about where to add the folder or file. I want to fix the path exception which will properly read the file present in the folder.

Metadata
  • 2,127
  • 9
  • 56
  • 127
  • Possible duplicate of [How do I add a resources folder to my Java project in eclipse](http://stackoverflow.com/questions/27934796/how-do-i-add-a-resources-folder-to-my-java-project-in-eclipse) – Nico Van Belle Mar 07 '17 at 07:11
  • @NicoVanBelle I think my question is a bit different to the one you mentioned. I know where to add the folder and how to add a file to it. But where Im stuck is at the point you mention the path of the file in driverclass. Mine is a FileNotFound Exception. The one you suggest is different. – Metadata Mar 07 '17 at 08:52

0 Answers0