4

I'm trying to test a Hadoop .mapreduce Avro job using MRUnit. I am receiving a NullPointerException as seen below. I've attached a portion of the pom and source code. Any assistance would be appreciated.

Thanks

The error I'm getting is :

java.lang.NullPointerException
at org.apache.hadoop.mrunit.internal.io.Serialization.copy(Serialization.java:73)
at org.apache.hadoop.mrunit.internal.io.Serialization.copy(Serialization.java:91)
at org.apache.hadoop.mrunit.internal.io.Serialization.copyWithConf(Serialization.java:104)
at org.apache.hadoop.mrunit.TestDriver.copy(TestDriver.java:608)
at org.apache.hadoop.mrunit.MapDriverBase.setInputKey(MapDriverBase.java:64)
at org.apache.hadoop.mrunit.MapDriverBase.setInput(MapDriverBase.java:104)
at org.apache.hadoop.mrunit.MapDriverBase.withInput(MapDriverBase.java:218)
at org.lab41.project.mapreduce.ParseMetadataAsTextIntoAvroTest.testMap(ParseMetadataAsTextIntoAvroTest.java:115)
.....

pom snippet:

<dependency>
    <groupId>org.apache.mrunit</groupId>
    <artifactId>mrunit</artifactId>
    <version>0.9.0-incubating</version>
    <classifier>hadoop2</classifier>
    <scope>test</scope>
</dependency>


<avro.version>1.7.4</avro.version>
<hadoop.version>2.0.0-mr1-cdh4.1.3</hadoop.version>

<dependency>
    <groupId>org.apache.avro</groupId>
    <artifactId>avro</artifactId>
    <version>${avro.version}</version>
</dependency>

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>${hadoop.version}</version>
    <scope>provided</scope>
</dependency>
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-core</artifactId>
    <version>${hadoop.version}</version>
    <scope>provided</scope>
 </dependency>
 <dependency>
    <groupId>org.apache.avro</groupId>
    <artifactId>avro-mapred</artifactId>
    <version>${avro.version}</version>
    <classifier>hadoop2</classifier>
 </dependency>

Here is an excerpt of the test :

import static org.junit.Assert.*;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.text.ParseException;
import java.text.SimpleDateFormat;

import org.apache.avro.mapred.AvroKey;
import org.apache.avro.hadoop.io.AvroSerialization;
import org.apache.avro.mapred.AvroValue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.MapDriver;
import org.apache.hadoop.mrunit.types.Pair;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.lab41.project.domain.DataRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ParseMetadataAsTextIntoAvroTest {

    Logger logger = LoggerFactory
            .getLogger(ParseMetadataAsTextIntoAvroTest.class);
    private MapDriver<LongWritable, Text, AvroKey<Long>, AvroValue<DataRecord>> mapDriver;

    @BeforeClass
    public static void setUpClass() {
    }

    @AfterClass
    public static void tearDownClass() {
    }

    @Before
    public void setUp() throws IOException {
        ParseMetadataAsTextIntoAvroMapper mapper = new ParseMetadataAsTextIntoAvroMapper();

        mapDriver = new MapDriver<LongWritable, Text, AvroKey<Long>, AvroValue<DataRecord>>();
        mapDriver.setMapper(mapper);
        mapDriver.getConfiguration().setStrings("io.serializations", new String[]{  
            AvroSerialization.class.getName()
        });
    }

    @Test
    public void testMap() throws ParseException, IOException {
        Text testInputText = new Text(test0);

        DataRecord record = new DataRecord();
       ….

        AvroKey<Long> expectedPivot = new AvroKey<Long>(1L);
        AvroValue<DataRecord> expectedRecord = new AvroValue<DataRecord>(record);

        mapDriver.withInput(new Pair<LongWritable, Text>(new LongWritable(1), testInputText));
        mapDriver.withOutput(new Pair<AvroKey<Long>, AvroValue<DataRecord>>(expectedPivot, expectedRecord));
        mapDriver.runTest();

    }
}
Paul Mazzuca
  • 567
  • 1
  • 5
  • 19

4 Answers4

10

In order to get this to work you have add the AvroSerializatio to the default serailizations. You also have to configure AvroSerializationn.

 @Before
public void setUp() throws IOException {
    ParseMetadataAsTextIntoAvroMapper mapper = new ParseMetadataAsTextIntoAvroMapper();
    mapDriver = new MapDriver<LongWritable, Text, AvroKey<Long>, AvroValue<NetworkRecord>>();
    mapDriver.setMapper(mapper);

    //Copy over the default io.serializations. If you don't do this then you will 
    //not be able to deserialize the inputs to the mapper
    String[] strings = mapDriver.getConfiguration().getStrings("io.serializations");
    String[] newStrings = new String[strings.length +1];
    System.arraycopy( strings, 0, newStrings, 0, strings.length );
    newStrings[newStrings.length-1] = AvroSerialization.class.getName();

    //Now you have to configure AvroSerialization by sepecifying the key
    //writer Schema and the value writer schema.
    mapDriver.getConfiguration().setStrings("io.serializations", newStrings);
    mapDriver.getConfiguration().setStrings("avro.serialization.key.writer.schema", Schema.create(Schema.Type.LONG).toString(true));
    mapDriver.getConfiguration().setStrings("avro.serialization.value.writer.schema", NetworkRecord.SCHEMA$.toString(true));
}
Karthik Ramachandran
  • 11,925
  • 10
  • 45
  • 53
  • The NPE is not accompanied by an explanatory error message due to an MRUnit bug ([MRUNIT-193](https://issues.apache.org/jira/browse/MRUNIT-193)) for which a patch has already been posted. – oby1 Jan 18 '14 at 23:25
  • I *really* hate to ask this way, but this is a pretty specific question and the audience of people who can answer it is small. Since your answer here helped me get past the first half of the problem, I'm hoping you might be able to help me get past the next part too: http://stackoverflow.com/questions/22591342/avro-with-mrunit-gives-instantiationexception – seawolf Mar 23 '14 at 23:26
4

This also solve the problem, with merits of shorter and more clear code.

        MapDriver driver = MapDriver.newMapDriver(your mapper);

        Configuration conf = driver.getConfiguration();
        AvroSerialization.addToConfiguration(conf);
        AvroSerialization.setKeyWriterSchema(conf, your schema);
        AvroSerialization.setKeyReaderSchema(conf, your schema);
        Job job = new Job(conf);
        job.set... your job settings;
        AvroJob.set... your avro job settings;

It may be bug of mrunit, that don't set the io.serializations right Instead it should have been set by job.setInputFormatClass(AvroKeyInputFormat.class) I think.

WeiChing 林煒清
  • 4,452
  • 3
  • 30
  • 65
0

You have to add AvroSerialization to the default serializations and configure AvroSerialization.

@Before
public void setUp() throws IOException {
    ParseMetadataAsTextIntoAvroMapper mapper = new ParseMetadataAsTextIntoAvroMapper();
    mapDriver = new MapDriver<LongWritable, Text, AvroKey<Long>, AvroValue<NetworkRecord>>();
    mapDriver.setMapper(mapper);
    Configuration configuration = mapDriver.getConfiguration();

    // Add AvroSerialization to the configuration
    // (copy over the default serializations for deserializing the mapper inputs)
    String[] serializations = configuration.getStrings(CommonConfigurationKeysPublic.IO_SERIALIZATIONS_KEY);
    String[] newSerializations = Arrays.copyOf(serializations, serializations.length + 1);
    newSerializations[serializations.length] = AvroSerialization.class.getName();
    configuration.setStrings(CommonConfigurationKeysPublic.IO_SERIALIZATIONS_KEY, newSerializations);

    //Configure AvroSerialization by specifying the key writer and value writer schemas
    AvroSerialization.setKeyWriterSchema(configuration, Schema.create(Schema.Type.LONG));
    AvroSerialization.setValueWriterSchema(configuration, NetworkRecord.SCHEMA$)
}
ohhorob
  • 11,695
  • 7
  • 41
  • 50
oby1
  • 1,302
  • 1
  • 13
  • 20
  • Karthik's answer is absolutely correct, I'm including some clean-up here for those who want to copy/paste since my edit to his answer was rejected. – oby1 Jan 19 '14 at 01:01
-1

Answered here: https://issues.apache.org/jira/browse/MRUNIT-181 specifically: https://cwiki.apache.org/confluence/display/MRUNIT/MRUnit+with+Avro

Brock Noland
  • 287
  • 1
  • 3
  • 11
  • 4
    Note that [link-only answers](http://meta.stackoverflow.com/tags/link-only-answers/info) are discouraged, SO answers should be the end-point of a search for a solution (vs. yet another stopover of references, which tend to get stale over time). Please consider adding a stand-alone synopsis here, keeping the link as a reference. – kleopatra Dec 17 '13 at 16:29