0

I can run an apache-storm program in local mode (using Intellij and Maven), but when I run on the storm cluster it bombs immediately with the error "Exception in thread “main” java.lang.NoClassDefFoundException: org.apache.http.client.HttpClient". I have verified the storm cluster works on a toy program with no http calls. Here is my pom.xml file:

<?xml version="1.0" encoding="UTF-8"?> 
<project xmlns="http://maven.apache.org/POM/4.0.0" 
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 
    <modelVersion>4.0.0</modelVersion> 
    <groupId>org.jmm</groupId> 
    <artifactId>twitter2</artifactId> 
    <version>1.0</version> 
    <dependencies> 
        <dependency> 
            <groupId>org.apache.storm</groupId> 
            <artifactId>storm-core</artifactId> 
            <version>2.1.1</version> 
        </dependency> 
        <dependency> 
            <groupId>com.fasterxml.jackson.core</groupId> 
            <artifactId>jackson-databind</artifactId> 
            <version>2.6.3</version> 
        </dependency> 
        <dependency> 
            <groupId>org.json</groupId> 
            <artifactId>json</artifactId> 
            <version>20090211</version> 
        </dependency> 
    </dependencies> 
    <properties> 
        <maven.compiler.source>8</maven.compiler.source> 
        <maven.compiler.target>8</maven.compiler.target> 
    </properties> 
</project> 

And the start of my spout code:

import org.apache.storm.spout.SpoutOutputCollector; 
import org.apache.storm.task.TopologyContext; 
import org.apache.storm.topology.OutputFieldsDeclarer; 
import org.apache.storm.topology.base.BaseRichSpout; 
import org.apache.storm.tuple.Fields; 
import org.apache.storm.tuple.Tuple; 
import org.apache.storm.tuple.Values; 
import org.apache.storm.utils.Utils; 
import java.util.ArrayList; 
import java.util.Arrays; 
import java.util.Map; 
import java.time.format.DateTimeFormatter; 
import java.time.LocalDateTime; import org.apache.http.HttpEntity; 
import org.apache.http.HttpResponse; 
import org.apache.http.NameValuePair; 
import org.apache.http.client.HttpClient; 
import org.apache.http.client.config.CookieSpecs; 
import org.apache.http.client.config.RequestConfig; 
import org.apache.http.client.methods.HttpGet; 
import org.apache.http.client.utils.URIBuilder; 
import org.apache.http.impl.client.HttpClients; 
import org.apache.http.message.BasicNameValuePair; 
import org.apache.http.util.EntityUtils; 
import twitter4j.Status; 

public class twitterSpout extends BaseRichSpout { 
    private SpoutOutputCollector collector; 
    String bearerToken = "";   //deleted here, but filled in for my program 
    HttpGet httpGet; 
    HttpResponse response; 
    HttpEntity entity; 
    URIBuilder uriBuilder; 

And the topology:

import org.apache.storm.Config; 
import org.apache.storm.StormSubmitter; 
import org.apache.storm.topology.TopologyBuilder; 
import org.apache.storm.tuple.Fields; 
import org.apache.storm.LocalCluster; 

public class twitterTopology { 
    public static void main(String[] args) throws Exception { 
        twitterSpout spout = new twitterSpout(); 
        System.out.println("\nafter initializing twitterSpout\n"); 
        LossyBolt countBolt = new LossyBolt(); 
        ReportBolt reportBolt = new ReportBolt(); 
        TopologyBuilder builder = new TopologyBuilder(); 
        builder.setSpout("twitterSpout", spout); 
        builder.setBolt("lossy", countBolt).fieldsGrouping("twitterSpout", new Fields("bucket")); 
        builder.setBolt("report", reportBolt).fieldsGrouping("lossy", new Fields("bcurrent")); 
  
        Config config = new Config(); 
        config.setDebug(true); 
        config.setNumWorkers(3); 
        /* this code works when I uncomment it and comment out StormSubmitter
        LocalCluster cluster = new LocalCluster(); 
        cluster.submitTopology("twitterTopology", config, builder.createTopology()); 
        Thread.sleep(70000); 
        cluster.killTopology("twitterTopology");; 
        cluster.shutdown(); 
        */ 
        StormSubmitter.submitTopology("twitterTopology", config, builder.createTopology()); 

        Thread.sleep(70000); 

     } 

} 

Any insight as to why it works in local mode but not in storm cluster mode would be appreciated!!

jmuth
  • 71
  • 4
  • Are all necessary dependcies given in the `/lib`-directory? – moosehead42 Jul 18 '22 at 17:47
  • In Intellij, I see Maven: org.apache.httpcomponents:httpclient:4.5.13 and Maven: org.apache.httpcomponents:httpcore:4.4.13 under External Libraries. This works when I run in local mode (the LocalCluster code above), just not when I run with my storm cluster. – jmuth Jul 19 '22 at 04:33
  • Are these jars in the lib-folder of your remote storm installation? – moosehead42 Jul 19 '22 at 07:57
  • When I open the bookmarks under External Libraries, I do see a .jar file. However, I don't see the actual .jar files in my IdeaProjects directory. Are you thinking I need to download the .jar files separately and link them in? – jmuth Jul 19 '22 at 17:26
  • Your suggestion made me think I could download httpcore-4.4.10.jar and httpclient-4.5.6.jar, so I did. Unfortunately, I have been trying for the last four hours to figure out how to pull these jar files in to the Maven build in Intellij so I can create a package for my storm cluster, but w/o any success... – jmuth Jul 19 '22 at 22:15
  • I give up for today. I was able to find the original httpclient and httpcore jar files and backed out the additions I had made. I can run Maven clean and package, a new jar file is created, but I still get the same error. – jmuth Jul 19 '22 at 23:03
  • So you are referring to the distributed mode which then needs to run on a remote server and not on your local machine, right? In that case, Storm needs to access all the packages you use. One way is to either pack a uber-jar (e.g. with maven), another way would be to upload the additional packages to the lib-folder of your remote storm installation. – moosehead42 Jul 21 '22 at 05:18
  • Thanks, moosehead42. When I select Package, Maven creates a jar file. How would I create the "uber-jar" you are suggesting? – jmuth Jul 24 '22 at 18:37
  • So I preferred to solve it with the single jars that you need to upload just once. Uber-jar is an alternative that however inherently uploads all the packed jars every time. But is maybe easier to handle. Maybe read this as starting point: https://stackoverflow.com/questions/11947037/what-is-an-uber-jar – moosehead42 Jul 25 '22 at 08:47
  • Really appreciate the guidance. I was able to use the details in the link to get it working. For anyone else who follows this thread, make sure to add back in provided at the end of the storm-core dependency so you don't get the duplicate yaml error. Thanks again, moosehead42. – jmuth Jul 25 '22 at 23:46

0 Answers0