1

I'm trying out Spring Data - Hadoop for executing the MR code on a remote cluster from my local machine's IDE

//Hadoop 1.1.2, Spring 3.2.4, Spring-Data-Hadoop 1.0.0

Tried with these versions :

Hadoop 1.2.1, Spring 4.0.1, Spring-Data-Hadoop 2.0.2

applicationContext.xml :

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hdp="http://www.springframework.org/schema/hadoop"
    xmlns:context="http://www.springframework.org/schema/context"
    xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd 
    http://www.springframework.org/schema/hadoop http://www.springframework.org/schema/hadoop/spring-hadoop.xsd
    http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.2.xsd">

    <context:property-placeholder location="resources/hadoop.properties" />

    <hdp:configuration file-system-uri="${hd.fs}" job-tracker-uri="${hd.jobtracker.uri}">

    </hdp:configuration>

    <hdp:job id="wc-job" mapper="com.hadoop.basics.WordCounter.WCMapper"
        reducer="com.hadoop.basics.WordCounter.WCReducer" input-path="${wordcount.input.path}"
        output-path="${wordcount.output.path}" user="bigdata">
    </hdp:job>

    <hdp:job-runner id="myjobs-runner" job-ref="wc-job"
        run-at-startup="true" />

    <hdp:resource-loader id="resourceLoader" uri="${hd.fs}"
        user="bigdata" />   
</beans>

WordCounter.java :

    package com.hadoop.basics;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.springframework.context.support.AbstractApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;

public class WordCounter {

    private static IntWritable one = new IntWritable(1);

    public static class WCMapper extends Mapper<Text, Text, Text, IntWritable> {

        @Override
        protected void map(
                Text key,
                Text value,
                org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            StringTokenizer strTokenizer = new StringTokenizer(value.toString());
            Text token = new Text();

            while (strTokenizer.hasMoreTokens()) {
                token.set(strTokenizer.nextToken());
                context.write(token, one);
            }
        }
    }

    public static class WCReducer extends
            Reducer<Text, IntWritable, Text, IntWritable> {
        @Override
        protected void reduce(
                Text key,
                Iterable<IntWritable> values,
                org.apache.hadoop.mapreduce.Reducer<Text, IntWritable, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            // TODO Auto-generated method stub

            int sum = 0;

            for (IntWritable value : values) {
                sum += value.get();
            }

            context.write(key, new IntWritable(sum));
        }
    }

    public static void main(String[] args) {
        AbstractApplicationContext context = new ClassPathXmlApplicationContext(
                "applicationContext.xml", WordCounter.class);
        System.out.println("Word Count Application Running");
        context.registerShutdownHook();
    }
}

The output is :

Aug 23, 2013 11:07:48 AM org.springframework.context.support.AbstractApplicationContext prepareRefresh
INFO: Refreshing org.springframework.context.support.ClassPathXmlApplicationContext@1815338: startup date [Fri Aug 23 11:07:48 IST 2013]; root of context hierarchy
Aug 23, 2013 11:07:48 AM org.springframework.beans.factory.xml.XmlBeanDefinitionReader loadBeanDefinitions
INFO: Loading XML bean definitions from class path resource [com/hadoop/basics/applicationContext.xml]
Aug 23, 2013 11:07:48 AM org.springframework.core.io.support.PropertiesLoaderSupport loadProperties
INFO: Loading properties file from class path resource [resources/hadoop.properties]
Aug 23, 2013 11:07:48 AM org.springframework.beans.factory.support.DefaultListableBeanFactory preInstantiateSingletons
INFO: Pre-instantiating singletons in org.springframework.beans.factory.support.DefaultListableBeanFactory@7c197e: defining beans [org.springframework.context.support.PropertySourcesPlaceholderConfigurer#0,hadoopConfiguration,wc-job,myjobs-runner,resourceLoader]; root of factory hierarchy
Aug 23, 2013 11:07:49 AM org.springframework.data.hadoop.mapreduce.JobExecutor$2 run
INFO: Starting job [wc-job]
Aug 23, 2013 11:07:49 AM org.apache.hadoop.mapred.JobClient copyAndConfigureFiles
WARNING: No job jar file set.  User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
Aug 23, 2013 11:07:49 AM org.apache.hadoop.mapreduce.lib.input.FileInputFormat listStatus
INFO: Total input paths to process : 1
Aug 23, 2013 11:07:50 AM org.apache.hadoop.util.NativeCodeLoader <clinit>
WARNING: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Aug 23, 2013 11:07:50 AM org.apache.hadoop.io.compress.snappy.LoadSnappy <clinit>
WARNING: Snappy native library not loaded
Aug 23, 2013 11:07:52 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO: Running job: job_201308231532_0002
Aug 23, 2013 11:07:53 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO:  map 0% reduce 0%
Aug 23, 2013 11:08:12 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO: Task Id : attempt_201308231532_0002_m_000000_0, Status : FAILED
java.lang.RuntimeException: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper
    at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:849)
    at org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:199)
    at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:719)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper
    at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:423)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:356)
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:264)
    at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:802)
    at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:847)
    ... 8 more

Aug 23, 2013 11:08:33 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO: Task Id : attempt_201308231532_0002_m_000000_1, Status : FAILED
java.lang.RuntimeException: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper
    at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:849)
    at org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:199)
    at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:719)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper
    at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:423)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:356)
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:264)
    at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:802)
    at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:847)
    ... 8 more

Aug 23, 2013 11:08:51 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO: Task Id : attempt_201308231532_0002_m_000000_2, Status : FAILED
java.lang.RuntimeException: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper
    at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:849)
    at org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:199)
    at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:719)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:370)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1149)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: java.lang.ClassNotFoundException: com.hadoop.basics.WordCounter$WCMapper
    at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:423)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:356)
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:264)
    at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:802)
    at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:847)
    ... 8 more

Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.JobClient monitorAndPrintJob
INFO: Job complete: job_201308231532_0002
Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log
INFO: Counters: 7
Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log
INFO:   Job Counters 
Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log
INFO:     SLOTS_MILLIS_MAPS=86688
Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log
INFO:     Total time spent by all reduces waiting after reserving slots (ms)=0
Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log
INFO:     Total time spent by all maps waiting after reserving slots (ms)=0
Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log
INFO:     Launched map tasks=4
Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log
INFO:     Data-local map tasks=4
Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log
INFO:     SLOTS_MILLIS_REDUCES=0
Aug 23, 2013 11:09:24 AM org.apache.hadoop.mapred.Counters log
INFO:     Failed map tasks=1
Aug 23, 2013 11:09:24 AM org.springframework.data.hadoop.mapreduce.JobExecutor$2 run
INFO: Completed job [wc-job]
Aug 23, 2013 11:09:24 AM org.springframework.beans.factory.support.DefaultSingletonBeanRegistry destroySingletons
INFO: Destroying singletons in org.springframework.beans.factory.support.DefaultListableBeanFactory@7c197e: defining beans [org.springframework.context.support.PropertySourcesPlaceholderConfigurer#0,hadoopConfiguration,wc-job,myjobs-runner,resourceLoader]; root of factory hierarchy
Exception in thread "main" org.springframework.beans.factory.BeanCreationException: Error creating bean with name 'myjobs-runner': Invocation of init method failed; nested exception is java.lang.IllegalStateException: Job wc-job] failed to start; status=FAILED
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1482)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.doCreateBean(AbstractAutowireCapableBeanFactory.java:521)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.createBean(AbstractAutowireCapableBeanFactory.java:458)
    at org.springframework.beans.factory.support.AbstractBeanFactory$1.getObject(AbstractBeanFactory.java:295)
    at org.springframework.beans.factory.support.DefaultSingletonBeanRegistry.getSingleton(DefaultSingletonBeanRegistry.java:223)
    at org.springframework.beans.factory.support.AbstractBeanFactory.doGetBean(AbstractBeanFactory.java:292)
    at org.springframework.beans.factory.support.AbstractBeanFactory.getBean(AbstractBeanFactory.java:194)
    at org.springframework.beans.factory.support.DefaultListableBeanFactory.preInstantiateSingletons(DefaultListableBeanFactory.java:628)
    at org.springframework.context.support.AbstractApplicationContext.finishBeanFactoryInitialization(AbstractApplicationContext.java:932)
    at org.springframework.context.support.AbstractApplicationContext.refresh(AbstractApplicationContext.java:479)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:197)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:172)
    at org.springframework.context.support.ClassPathXmlApplicationContext.<init>(ClassPathXmlApplicationContext.java:158)
    at com.hadoop.basics.WordCounter.main(WordCounter.java:58)
Caused by: java.lang.IllegalStateException: Job wc-job] failed to start; status=FAILED
    at org.springframework.data.hadoop.mapreduce.JobExecutor$2.run(JobExecutor.java:219)
    at org.springframework.core.task.SyncTaskExecutor.execute(SyncTaskExecutor.java:49)
    at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:168)
    at org.springframework.data.hadoop.mapreduce.JobExecutor.startJobs(JobExecutor.java:160)
    at org.springframework.data.hadoop.mapreduce.JobRunner.call(JobRunner.java:52)
    at org.springframework.data.hadoop.mapreduce.JobRunner.afterPropertiesSet(JobRunner.java:44)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.invokeInitMethods(AbstractAutowireCapableBeanFactory.java:1541)
    at org.springframework.beans.factory.support.AbstractAutowireCapableBeanFactory.initializeBean(AbstractAutowireCapableBeanFactory.java:1479)
    ... 13 more

What config. have I missed? Is it really possible to submit a Hadoop job remotely using Spring Data without creation of a jar etc. ?

Kaliyug Antagonist
  • 3,512
  • 9
  • 51
  • 103

4 Answers4

0

You can try jar-by-class attribute to locate the jar and let hadoop upload the jar to the TaskTracker.

<hdp:job id="wc-job" mapper="com.hadoop.basics.WordCounter.WCMapper"
    reducer="com.hadoop.basics.WordCounter.WCReducer"
    input-path="${wordcount.input.path}"
    jar-by-class="com.hadoop.basics.WordCounter"
    output-path="${wordcount.output.path}" user="bigdata">
</hdp:job>

At last, WCMapper and WCReducer should be static, or they can not be created by hadoop.

zsxwing
  • 20,270
  • 4
  • 37
  • 59
  • Tried that but the same error. What does this warning wants me to configure(a Tool runner or something else?): WARNING: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String). – Kaliyug Antagonist Aug 23 '13 at 07:23
  • Try jar="file:///a/b/c/foo.jar"? – zsxwing Aug 23 '13 at 07:24
  • Please correct if I'm wrong : Spring-data is used to connect to a remote cluster and to save the pains of creating a jar, copying it to the cluster and then executing 'hadoop jar...'. Then why create a jar anywhere and give its path in the config file? I should be simply able to do a 'Run as' and get my job running. I'm quite confused ... – Kaliyug Antagonist Aug 23 '13 at 07:34
  • I found the error. WCMapper and WCReducer should be static. e.g. `public static class WCReducer`. And `jar-by-class="com.hadoop.basics.WordCounter"`. I have updated my answer. – zsxwing Aug 23 '13 at 07:54
  • Had tried that earlier before posting but the error persisted/persists. Edited the code. – Kaliyug Antagonist Aug 23 '13 at 08:52
0

I had same problem with my very first hadoop job here, and the solution was with way of compiling the jar file in eclipse.

When you want to export java project as a jar file in eclipse, two options are available:

Extract required libraries into generated JAR
Package required libraries into generated JAR

First option solved my problem and probably will solve yours.

Community
  • 1
  • 1
Mehraban
  • 3,164
  • 4
  • 37
  • 60
  • I'm attempting to execute a job using Spring Data with the assumption that a jar need not be created and executed manually - a plain 'Run As' from an IDE must do it. Regarding your pointer, I have a counter-question http://stackoverflow.com/questions/21676531/is-jar-creation-necessary-to-execute-mr-on-a-remote-cluster – Kaliyug Antagonist Aug 07 '14 at 08:30
  • What run your code in an IDE, (in case of java) it builds the jar and executes it via jvm. What do you expect to be done with spring? Your jar to be spread all over cluster and run there? – Mehraban Aug 07 '14 at 08:37
  • Yeah run a main class that submits the job to a remote cluster and it further executes identical to the way we do 'hadoop jar ...' on the cluster itself. But post this error, I'm wondering if its possible, hence, put the question mentioned in my previous comment. A manual jar creation or even mention of 'jarname.jar' isn't mandatory as per the Spring Data doc. but now I'm unsure ! – Kaliyug Antagonist Aug 07 '14 at 10:09
0

I was getting the same issue then separating the mapper and reducer class works and the following changes were done in applicationContext.xml:

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:util="http://www.springframework.org/schema/util"
    xmlns:context="http://www.springframework.org/schema/context"
    xmlns:hdp="http://www.springframework.org/schema/hadoop" xmlns:batch="http://www.springframework.org/schema/batch"
    xsi:schemaLocation="
    http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
    http://www.springframework.org/schema/hadoop http://www.springframework.org/schema/hadoop/spring-hadoop.xsd
     http://www.springframework.org/schema/context  http://www.springframework.org/schema/context/spring-context.xsd
     http://www.springframework.org/schema/batch http://www.springframework.org/schema/batch/spring-batch.xsd
    http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util-4.2.xsd">

    <context:property-placeholder location="classpath:application.properties" />
    <hdp:configuration namenode-principal="hdfs://xx.yy.com" rm-manager-uri="xx.yy.com"
        security-method="kerb" user-keytab="location" rm-manager-principal="username"
        user-principal="username">
        fs.default.name=${fs.default.name}
        mapred.job.tracker=${mapred.job.tracker}
    </hdp:configuration>

    <hdp:job id="wordCountJobId" input-path="${input.path}"
        output-path="${output.path}" jar-by-class="com.xx.poc.Application"
        mapper="com.xx.poc.Map" reducer="com.xx.poc.Reduce" />

    <hdp:job-runner id="wordCountJobRunner" job-ref="wordCountJobId"
        run-at-startup="true" />
</beans>
ganzogo
  • 2,516
  • 24
  • 36
-1

Right Click on your Project, Build Path -> Configure Build Path. Select Libraries tab, Click Add External JARs and copy all the Required JAR files in your hadoop directory. I guess, this will solve your problem.

nvn22
  • 1
  • 3
  • Please go through the issue properly - I'm trying to submit a job using IDE and the exception is about my map/reduce classes not being loaded - the external jars have nothing to do here – Kaliyug Antagonist Aug 13 '14 at 04:14
  • Your output shows, you have ClasNotFoundException. It means you didn't import all the need jar files. This is the most common error at basic level. – nvn22 Aug 13 '14 at 13:42