2

Consider the following error:

2018-07-12 22:46:36,087 FATAL [main] org.apache.hadoop.mapred.YarnChild: Error running child : java.lang.NoSuchMethodError: com.amazonaws.util.StringUtils.trim(Ljava/lang/String;)Ljava/lang/String;
    at com.amazonaws.auth.profile.internal.AwsProfileNameLoader.getEnvProfileName(AwsProfileNameLoader.java:72)
    at com.amazonaws.auth.profile.internal.AwsProfileNameLoader.loadProfileName(AwsProfileNameLoader.java:54)
    at com.amazonaws.regions.AwsProfileRegionProvider.<init>(AwsProfileRegionProvider.java:40)
    at com.amazonaws.regions.DefaultAwsRegionProviderChain.<init>(DefaultAwsRegionProviderChain.java:23)
    at com.amazonaws.client.builder.AwsClientBuilder.<clinit>(AwsClientBuilder.java:57)
    at com.myorg.udb.DecodeMapper.setup(myMapper.java:71)
    at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:142)
    at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:784)
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341)
    at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:165)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:415)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1635)
    at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:160)

and the following code:

package com.myorg.udb;

import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.profile.internal.AwsProfileNameLoader;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.util.StringUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.util.*;

public class myMapper extends Mapper<Object, Text, Text, Text> {
    @Override
    protected void setup(Context context) {
        try {
            System.out.println(StringUtils.trim("hi"));
        } catch(Exception e) {
            System.out.println("catch" + e);
        }
    }
    public void map(Object key, Text value, Context context
    ) throws IOException, InterruptedException {
    }
}

This line: System.out.println(StringUtils.trim("hi")); is causing java.lang.NoSuchMethodError: com.amazonaws.util.StringUtils.trim when I run it in Qubole but works fine in local machine.

Here are my POM imports:

<dependency>
    <groupId>com.amazonaws</groupId>
    <artifactId>aws-java-sdk-s3</artifactId>
    <version>1.11.365</version>
</dependency>


<dependency>
    <groupId>com.amazonaws</groupId>
    <artifactId>aws-java-sdk-core</artifactId>
    <version>1.11.365</version>
</dependency>

with the Uber JAR plugin:

<plugin>
    <groupId>org.apache.maven.plugins</groupId>
    <artifactId>maven-shade-plugin</artifactId>
    <version>3.1.1</version>
    <configuration>
        <!-- put your configurations here -->
    </configuration>
    <executions>
        <execution>
            <phase>package</phase>
            <goals>
                <goal>shade</goal>
            </goals>
        </execution>
    </executions>
</plugin>

Why can't Hadoop find com.amazonaws.util.StringUtils.trim even when I imported it in the file, imported it in the POM, and exported all dependencies into a fat JAR?

What import do I need in order to use com.amazonaws.util.StringUtils.trim?

Here is my dependencies

[INFO] --- maven-dependency-plugin:2.8:tree (default-cli) @ udb-aggregate ---
[INFO] com.org.myproject:jar:0.2.12-SNAPSHOT
[INFO] +- com.amazonaws:aws-java-sdk-s3:jar:1.11.365:compile
[INFO] |  +- com.amazonaws:aws-java-sdk-kms:jar:1.11.365:compile
[INFO] |  +- com.amazonaws:aws-java-sdk-core:jar:1.11.365:compile
[INFO] |  |  +- software.amazon.ion:ion-java:jar:1.0.2:compile
[INFO] |  |  +- com.fasterxml.jackson.core:jackson-databind:jar:2.6.7.1:compile
[INFO] |  |  |  +- com.fasterxml.jackson.core:jackson-annotations:jar:2.6.0:compile
[INFO] |  |  |  \- com.fasterxml.jackson.core:jackson-core:jar:2.6.7:compile
[INFO] |  |  +- com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:jar:2.6.7:compile
[INFO] |  |  \- joda-time:joda-time:jar:2.8.1:compile
[INFO] |  \- com.amazonaws:jmespath-java:jar:1.11.365:compile
[INFO] +- org.apache.httpcomponents:httpclient:jar:4.5.2:runtime
[INFO] |  +- org.apache.httpcomponents:httpcore:jar:4.4.4:compile
[INFO] |  +- commons-logging:commons-logging:jar:1.2:compile
[INFO] |  \- commons-codec:commons-codec:jar:1.9:compile
[INFO] +- com.googlecode.json-simple:json-simple:jar:1.1:compile
[INFO] +- org.apache.hadoop:hadoop-common:jar:2.8.4:compile
[INFO] |  +- org.apache.hadoop:hadoop-annotations:jar:2.8.4:compile
[INFO] |  |  \- jdk.tools:jdk.tools:jar:1.8:system
[INFO] |  +- com.google.guava:guava:jar:11.0.2:compile
[INFO] |  +- commons-cli:commons-cli:jar:1.2:compile
[INFO] |  +- org.apache.commons:commons-math3:jar:3.1.1:compile
[INFO] |  +- xmlenc:xmlenc:jar:0.52:compile
[INFO] |  +- commons-io:commons-io:jar:2.4:compile
[INFO] |  +- commons-net:commons-net:jar:3.1:compile
[INFO] |  +- commons-collections:commons-collections:jar:3.2.2:compile
[INFO] |  +- javax.servlet:servlet-api:jar:2.5:compile
[INFO] |  +- org.mortbay.jetty:jetty:jar:6.1.26:compile
[INFO] |  +- org.mortbay.jetty:jetty-util:jar:6.1.26:compile
[INFO] |  +- org.mortbay.jetty:jetty-sslengine:jar:6.1.26:compile
[INFO] |  +- javax.servlet.jsp:jsp-api:jar:2.1:runtime
[INFO] |  +- com.sun.jersey:jersey-core:jar:1.9:compile
[INFO] |  +- com.sun.jersey:jersey-json:jar:1.9:compile
[INFO] |  |  +- org.codehaus.jettison:jettison:jar:1.1:compile
[INFO] |  |  +- com.sun.xml.bind:jaxb-impl:jar:2.2.3-1:compile
[INFO] |  |  |  \- javax.xml.bind:jaxb-api:jar:2.2.2:compile
[INFO] |  |  |     +- javax.xml.stream:stax-api:jar:1.0-2:compile
[INFO] |  |  |     \- javax.activation:activation:jar:1.1:compile
[INFO] |  |  +- org.codehaus.jackson:jackson-jaxrs:jar:1.8.3:compile
[INFO] |  |  \- org.codehaus.jackson:jackson-xc:jar:1.8.3:compile
[INFO] |  +- com.sun.jersey:jersey-server:jar:1.9:compile
[INFO] |  |  \- asm:asm:jar:3.1:compile
[INFO] |  +- log4j:log4j:jar:1.2.17:compile
[INFO] |  +- net.java.dev.jets3t:jets3t:jar:0.9.0:compile
[INFO] |  |  \- com.jamesmurty.utils:java-xmlbuilder:jar:0.4:compile
[INFO] |  +- commons-lang:commons-lang:jar:2.6:compile
[INFO] |  +- commons-configuration:commons-configuration:jar:1.6:compile
[INFO] |  |  +- commons-digester:commons-digester:jar:1.8:compile
[INFO] |  |  |  \- commons-beanutils:commons-beanutils:jar:1.7.0:compile
[INFO] |  |  \- commons-beanutils:commons-beanutils-core:jar:1.8.0:compile
[INFO] |  +- org.slf4j:slf4j-api:jar:1.7.10:compile
[INFO] |  +- org.slf4j:slf4j-log4j12:jar:1.7.10:compile
[INFO] |  +- org.codehaus.jackson:jackson-core-asl:jar:1.9.13:compile
[INFO] |  +- org.codehaus.jackson:jackson-mapper-asl:jar:1.9.13:compile
[INFO] |  +- org.apache.avro:avro:jar:1.7.4:compile
[INFO] |  |  +- com.thoughtworks.paranamer:paranamer:jar:2.3:compile
[INFO] |  |  \- org.xerial.snappy:snappy-java:jar:1.0.4.1:compile
[INFO] |  +- com.google.protobuf:protobuf-java:jar:2.5.0:compile
[INFO] |  +- com.google.code.gson:gson:jar:2.2.4:compile
[INFO] |  +- org.apache.hadoop:hadoop-auth:jar:2.8.4:compile
[INFO] |  |  +- com.nimbusds:nimbus-jose-jwt:jar:4.41.1:compile
[INFO] |  |  |  +- com.github.stephenc.jcip:jcip-annotations:jar:1.0-1:compile
[INFO] |  |  |  \- net.minidev:json-smart:jar:2.3:compile (version selected from constraint [1.3.1,2.3])
[INFO] |  |  |     \- net.minidev:accessors-smart:jar:1.2:compile
[INFO] |  |  |        \- org.ow2.asm:asm:jar:5.0.4:compile
[INFO] |  |  +- org.apache.directory.server:apacheds-kerberos-codec:jar:2.0.0-M15:compile
[INFO] |  |  |  +- org.apache.directory.server:apacheds-i18n:jar:2.0.0-M15:compile
[INFO] |  |  |  +- org.apache.directory.api:api-asn1-api:jar:1.0.0-M20:compile
[INFO] |  |  |  \- org.apache.directory.api:api-util:jar:1.0.0-M20:compile
[INFO] |  |  \- org.apache.curator:curator-framework:jar:2.7.1:compile
[INFO] |  +- com.jcraft:jsch:jar:0.1.54:compile
[INFO] |  +- org.apache.curator:curator-client:jar:2.7.1:compile
[INFO] |  +- org.apache.curator:curator-recipes:jar:2.7.1:compile
[INFO] |  +- com.google.code.findbugs:jsr305:jar:3.0.0:compile
[INFO] |  +- org.apache.htrace:htrace-core4:jar:4.0.1-incubating:compile
[INFO] |  +- org.apache.zookeeper:zookeeper:jar:3.4.6:compile
[INFO] |  |  \- io.netty:netty:jar:3.7.0.Final:compile
[INFO] |  \- org.apache.commons:commons-compress:jar:1.4.1:compile
[INFO] |     \- org.tukaani:xz:jar:1.0:compile
[INFO] +- org.apache.hadoop:hadoop-client:jar:2.8.4:compile
[INFO] |  +- org.apache.hadoop:hadoop-hdfs-client:jar:2.8.4:compile
[INFO] |  |  \- com.squareup.okhttp:okhttp:jar:2.4.0:compile
[INFO] |  |     \- com.squareup.okio:okio:jar:1.4.0:compile
[INFO] |  +- org.apache.hadoop:hadoop-mapreduce-client-app:jar:2.8.4:compile
[INFO] |  |  +- org.apache.hadoop:hadoop-mapreduce-client-common:jar:2.8.4:compile
[INFO] |  |  |  +- org.apache.hadoop:hadoop-yarn-client:jar:2.8.4:compile
[INFO] |  |  |  \- org.apache.hadoop:hadoop-yarn-server-common:jar:2.8.4:compile
[INFO] |  |  \- org.apache.hadoop:hadoop-mapreduce-client-shuffle:jar:2.8.4:compile
[INFO] |  |     \- org.fusesource.leveldbjni:leveldbjni-all:jar:1.8:compile
[INFO] |  +- org.apache.hadoop:hadoop-yarn-api:jar:2.8.4:compile
[INFO] |  +- org.apache.hadoop:hadoop-mapreduce-client-core:jar:2.8.4:compile
[INFO] |  |  \- org.apache.hadoop:hadoop-yarn-common:jar:2.8.4:compile
[INFO] |  |     \- com.sun.jersey:jersey-client:jar:1.9:compile
[INFO] |  \- org.apache.hadoop:hadoop-mapreduce-client-jobclient:jar:2.8.4:compile
[INFO] \- junit:junit:jar:4.12:test
[INFO]    \- org.hamcrest:hamcrest-core:jar:1.3:test
Jal
  • 2,174
  • 1
  • 18
  • 37
  • 1
    In 99% of cases this is due to building against a library version that differs from the library installed in your deployment environment. Make sure you have the same version installed both places. – Jim Garrison Jul 12 '18 at 23:07
  • Does your project contain any dependencies on anything that depends on an earlier version of the same JAR? Your IDE should be able to help you work this out. If so, you may need to add an exclusion to your pom file. – Dawood ibn Kareem Jul 12 '18 at 23:12
  • Looks like `trim()` was added in version 1.8.2, so the Hadoop version you're running seems have an older version which supersedes the version you deployed. – Andreas Jul 12 '18 at 23:17
  • @Andreas hadoop has dependency on aws sdk? – Jal Jul 12 '18 at 23:19
  • I am using ```2.6.0``` – Jal Jul 12 '18 at 23:19
  • @Jal Or something else installed in Hadoop does, and it's version of AWS SDK is used, not yours. We can only guess. We don't know your environment. – Andreas Jul 12 '18 at 23:21
  • Is there a way to debug dependency collision on the fat jar? – Jal Jul 12 '18 at 23:24
  • Actually i did ```mvn dependency:tree``` and don't see aws listed besided the one I imported – Jal Jul 12 '18 at 23:26
  • Well, this might help someone. We had a similar issue(java.lang.NoSuchMethodError: com.amazonaws.util.StringUtils.trim) but the libraries that were in conflict were not hadoop and aws-sdk but rather 'amazon-kinesis-client(1.0.0)' and 'aws-java-sdk-kinesis(1.11.490)'. Commenting out the former did the trick for us – JavaTec Feb 02 '19 at 14:59

2 Answers2

3

Most likely it's due to local and remote difference in the class. Perhaps the JAR you are providing already appears in different version earlier in the class path and is loaded first. Look at this answer to see how to locate the JAR that contains the class file on the remote.

Class klass = StringUtils.class;
URL location = klass.getResource('/' + klass.getName().replace('.', '/') + ".class");

Hopefully location will have the JAR version number in it so you can confirm the remote version against local.

Since you are already shading you can work around this by repackaging the dependencies, relocating third party classes into your own com.myorg package e.g. com.amazonaws.util.StringUtils becomes com.myorg.com.amazonaws.util.StringUtils. See Using Package Relocation in the maven-shade-plugin. I'd not do it if there are many or large dependencies, but it's up to you.

Karol Dowbecki
  • 43,645
  • 9
  • 78
  • 111
  • 1
    Interesting, unfortunately, I can't do that because I don't really use `StringUtils.trim` rather it is used in the following ```AmazonS3 s3client = AmazonS3ClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(credentials)) .withRegion(Regions.US_EAST_1) .build();``` when I build the credentials it internally uses it – Jal Jul 12 '18 at 23:50
  • @Jal Maybe then print entire classpath to the logs? – Karol Dowbecki Jul 12 '18 at 23:51
  • Yeah, that is probably my next step, although I am confuse on how the remote jar can be different since I provide a fat jar to qubole. – Jal Jul 12 '18 at 23:51
  • @Jal JARs are loaded from classpath in order they appear. If earlier in the classpath there is a JAR that contains the same fully qualified classname the class in your JAR won't be loaded at all. It's classes that are loaded, not JARs. – Karol Dowbecki Jul 12 '18 at 23:53
  • hmm, but if I supply a fat jar, does qubole has some internal magic that is able to inject a jar into my fat jar and potentially overrid my `aws-sdk` version? – Jal Jul 12 '18 at 23:55
  • 1
    @Jal no idea, there always could be a custom internal class loader that gives preference to user or system code. – Karol Dowbecki Jul 12 '18 at 23:57
  • The problem ended up being that qubole supports hadoop map reduce up to 2.6 [src qubole doc](https://docs.qubole.com/en/latest/admin-guide/osversionsupport.html#os-version-support) and I think the 2.6 version they are using some how depend on `/usr/lib/qubole/packages/hadoop2-2.6.0/hadoop2/share/hadoop/tools/lib/aws-java-sdk-1.7.4.jar!/com/amazonaws/util/StringUtils.class` however the official 2.6 does not [src hadoop doc](https://hadoop.apache.org/docs/r2.6.0/hadoop-mapreduce-client/hadoop-mapreduce-client-core/dependency-analysis.html) – Jal Jul 13 '18 at 17:19
0

I got such error while running spark-submit. I realized that the code was built using spark 3.0.0 with hadoop 3.2 libraries and my local spark version was 2.4.7. I upgraded to Spark 3 with hadoop 3.2 and copied the aws-java-sdk-1.11.828.jar (this was the sdk used by my code) to spark jars directory and it worked!

Preeti Joshi
  • 841
  • 1
  • 13
  • 20