2

I'm parsing snowplow's events using Spark as per their guide at https://github.com/snowplow/snowplow-scala-analytics-sdk.

My code looks like

import com.snowplowanalytics.snowplow.analytics.scalasdk.json.EventTransformer
import org.apache.spark.{ SparkConf, SparkContext }
import org.apache.spark.SparkContext._;
import java.util.Calendar
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.graphx._
import org.apache.spark.storage.StorageLevel
import scala.collection.mutable.ArrayBuffer
import java.text.SimpleDateFormat;
import java.io.{ InputStream, Serializable };
import java.net.URI;
import java.util.Properties;
import org.apache.spark.sql.SQLContext
import java.util.Date
import org.apache.spark.sql.SQLContext._
import org.apache.spark.sql.SparkSession
import scalaz._
import scalaz.Scalaz._

object NcgGraphx {
    def main(args: Array[String]) {

        if (args.size < 1) {
             println("Please supply input file path")
             System.exit(1)
        }
        val file = args(0)
        val conf = new SparkConf().setAppName("NcgGraphx").setMaster("local")
        println("created conf object")
        val ctx = new SparkContext(conf)
        println("created spark context")
        val spark = SparkSession.builder().appName("NcgGraphX").getOrCreate
        val sqlContext = new SQLContext(ctx)
        import sqlContext.implicits._
        import spark.implicits._

        println("loading json file")
        val input = ctx.textFile(file)
        println("after loading file")
        input.take(1).foreach(println(_))
        val events = input.map(line => EventTransformer.transform(line)).flatMap (_.toOption)
        println("after transforming events")
        events.take(1).foreach(println(_))
//        val dataframe = spark.read.json(events)
//        dataframe.show() 
}
}

My build.sbt is like this

name := "ncgaka-graphx"

version := "1.0"

scalaVersion := "2.11.8"

libraryDependencies ++= Seq(
  "org.apache.spark" %% "spark-core" % "2.0.0" % "provided",
  "org.apache.spark" %% "spark-sql" % "2.0.0",
  "org.apache.spark" %% "spark-hive" % "2.0.0",
  "org.apache.spark" %% "spark-graphx" % "2.0.0",
  "com.snowplowanalytics" %% "snowplow-scala-analytics-sdk" % "0.1.0",
  "org.scalaz" %% "scalaz-core" % "7.2.5",
  "org.scalaz" %% "scalaz-concurrent" % "7.2.5"
)

resolvers ++= Seq("Snowplow Analytics" at "http://maven.snplow.com/releases/")

When I try to run JAR I get following run-time exception.

Exception in thread "main" java.lang.NoClassDefFoundError: scalaz/Validation
    at NcgGraphx$.main(NcgGraphx.scala:42)
    at NcgGraphx.main(NcgGraphx.scala)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:729)
    at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)
    at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: scalaz.Validation
    at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:358)

Any idea what I'm missing?

hlagvankar
  • 219
  • 1
  • 3
  • 12

1 Answers1

0

Problem is that you're using scalaz-7.2.5 which is binary-incompatible with version used in scala-analytics-sdk.

If you need version both binary-compatible and available for Scala 2.11 you can use 7.0.9:

"org.scalaz" %% "scalaz-core" % "7.0.9"
chuwy
  • 6,310
  • 4
  • 20
  • 29