1

I'm trying to generate a fat-jar (with sbt assembly) and then send it to spark using spark-submit and have my application use my logback logging appenders and encoders.

I have tried what is stated in: Configuring Apache Spark Logging with Scala and logback and also in Separating application logs in Logback from Spark Logs in log4j

but with no success. Either the far jar fails due to some deduplicate errors or the application does not start due to some error in the sparkContext creation.

My build.sbt

libraryDependencies += Seq(
"org.apache.spark" %% "spark-core" % sparkVersion excludeAll( ExclusionRule(organization = "org.slf4j"))
  , "org.apache.spark" %% "spark-sql" % sparkVersion  excludeAll( ExclusionRule(organization = "org.slf4j"))
  , "org.apache.spark" %% "spark-mllib" % sparkVersion  excludeAll( ExclusionRule(organization = "org.slf4j"))
  , "org.apache.spark" %% "spark-streaming-kafka-0-10" % sparkVersion  excludeAll( ExclusionRule(organization = "org.slf4j"))
  , "org.apache.spark" %% "spark-hive" % "2.4.0"  excludeAll( ExclusionRule(organization = "org.slf4j"))


  , "org.scala-lang.modules" %% "scala-xml" % "1.0.6"
  , "io.minio" % "minio" % "6.0.11" excludeAll(ExclusionRule(organization= "com.google.code.findbugs", name= "annotations"))
  , "com.typesafe.slick" %% "slick" % "3.3.2"
  , "com.typesafe.slick" %% "slick-hikaricp" % "3.3.2"
  , "com.microsoft.sqlserver" % "mssql-jdbc" % "7.4.1.jre8"
  , "org.slf4j" % "log4j-over-slf4j" % "1.7.25"
)


assemblyMergeStrategy in assembly := {

  case PathList("com",   "esotericsoftware", xs @ _*) => MergeStrategy.last
  case PathList("com",   "squareup", xs @ _*) => MergeStrategy.last
  case PathList("com",   "sun", xs @ _*) => MergeStrategy.last
  case PathList("com",   "thoughtworks", xs @ _*) => MergeStrategy.last
  case PathList("commons-beanutils", xs @ _*) => MergeStrategy.last
  case PathList("commons-cli", xs @ _*) => MergeStrategy.last
  case PathList("commons-collections", xs @ _*) => MergeStrategy.last
  case PathList("commons-io", xs @ _*) => MergeStrategy.last
  case PathList("io",    "netty", xs @ _*) => MergeStrategy.last
  case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
  case PathList("javax", "inject", xs @ _*) => MergeStrategy.last
  case PathList("javax", "xml", xs @ _*) => MergeStrategy.last
  case PathList("org",   "apache", xs @ _*) => MergeStrategy.last
  case PathList("org",   "codehaus", xs @ _*) => MergeStrategy.last
  case PathList("org",   "glassfish", xs @ _*) => MergeStrategy.last
  case PathList("org",   "fusesource", xs @ _*) => MergeStrategy.last
  case PathList("org",   "mortbay", xs @ _*) => MergeStrategy.last
  case PathList("org",   "tukaani", xs @ _*) => MergeStrategy.last
  case PathList("com",   "twitter", xs @ _*) => MergeStrategy.last
  case PathList("org",   "objenesis", xs @ _*) => MergeStrategy.last
  case PathList("org", "aopalliance", xs @ _*) => MergeStrategy.last
  case PathList("xerces", xs @ _*) => MergeStrategy.last
  case PathList("xmlenc", xs @ _*) => MergeStrategy.last
  case "about.html" => MergeStrategy.rename
  case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
  case "META-INF/mailcap" => MergeStrategy.last
  case "META-INF/mimetypes.default" => MergeStrategy.last
  case "plugin.properties" => MergeStrategy.last
  case "git.properties" => MergeStrategy.last
  case "plugin.xml" => MergeStrategy.last
  case "log4j.properties" => MergeStrategy.last
  case "parquet.thrift" => MergeStrategy.last
  case "codegen/config.fmpp" => MergeStrategy.last


  // Needed only to sbt assembly non provided spark-streaming-kafka-0-10
  case PathList("org", "apache", "spark", "unused", "UnusedStubClass.class") => MergeStrategy.first
  // Needed only to sbt assembly etcd coming with libraries
  case PathList("META-INF", "io.netty.versions.properties", xs @ _*) => MergeStrategy.last
  // Needed only to sbt assembly mastria-etcd4s coming with libraries
  case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
  case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
  case PathList("scala","util", xs @ _*) => MergeStrategy.first
  case PathList("library.properties", xs @ _*) => MergeStrategy.first
  case PathList("logback.xml",xs @ _ *) => MergeStrategy.last
  case x => (assemblyMergeStrategy in assembly).value(x)
  //case PathList("MANIFEST.MF",xs @ _ *) => MergeStrategy.first
 // case _ => MergeStrategy.first
}


assemblyMergeStrategy in assembly := {

  case PathList("com",   "esotericsoftware", xs @ _*) => MergeStrategy.last
  case PathList("com",   "squareup", xs @ _*) => MergeStrategy.last
  case PathList("com",   "sun", xs @ _*) => MergeStrategy.last
  case PathList("com",   "thoughtworks", xs @ _*) => MergeStrategy.last
  case PathList("commons-beanutils", xs @ _*) => MergeStrategy.last
  case PathList("commons-cli", xs @ _*) => MergeStrategy.last
  case PathList("commons-collections", xs @ _*) => MergeStrategy.last
  case PathList("commons-io", xs @ _*) => MergeStrategy.last
  case PathList("io",    "netty", xs @ _*) => MergeStrategy.last
  case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
  case PathList("javax", "inject", xs @ _*) => MergeStrategy.last
  case PathList("javax", "xml", xs @ _*) => MergeStrategy.last
  case PathList("org",   "apache", xs @ _*) => MergeStrategy.last
  case PathList("org",   "codehaus", xs @ _*) => MergeStrategy.last
  case PathList("org",   "glassfish", xs @ _*) => MergeStrategy.last
  case PathList("org",   "fusesource", xs @ _*) => MergeStrategy.last
  case PathList("org",   "mortbay", xs @ _*) => MergeStrategy.last
  case PathList("org",   "tukaani", xs @ _*) => MergeStrategy.last
  case PathList("com",   "twitter", xs @ _*) => MergeStrategy.last
  case PathList("org",   "objenesis", xs @ _*) => MergeStrategy.last
  case PathList("org", "aopalliance", xs @ _*) => MergeStrategy.last
  case PathList("xerces", xs @ _*) => MergeStrategy.last
  case PathList("xmlenc", xs @ _*) => MergeStrategy.last
  case "about.html" => MergeStrategy.rename
  case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
  case "META-INF/mailcap" => MergeStrategy.last
  case "META-INF/mimetypes.default" => MergeStrategy.last
  case "plugin.properties" => MergeStrategy.last
  case "git.properties" => MergeStrategy.last
  case "plugin.xml" => MergeStrategy.last
  case "log4j.properties" => MergeStrategy.last
  case "parquet.thrift" => MergeStrategy.last
  case "codegen/config.fmpp" => MergeStrategy.last


  // Needed only to sbt assembly non provided spark-streaming-kafka-0-10
  case PathList("org", "apache", "spark", "unused", "UnusedStubClass.class") => MergeStrategy.first
  // Needed only to sbt assembly etcd coming with libraries
  case PathList("META-INF", "io.netty.versions.properties", xs @ _*) => MergeStrategy.last
  // Needed only to sbt assembly mastria-etcd4s coming with libraries
  case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
  case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
  case PathList("scala","util", xs @ _*) => MergeStrategy.first
  case PathList("library.properties", xs @ _*) => MergeStrategy.first
  case PathList("logback.xml",xs @ _ *) => MergeStrategy.last
  case x => (assemblyMergeStrategy in assembly).value(x)
  //case PathList("MANIFEST.MF",xs @ _ *) => MergeStrategy.first
 // case _ => MergeStrategy.first
}

When I use typesafe LazyLogging class the process fails no matter the combination of arguemtns I try to use.

I would like to see a full build.sbt example to maybe find my mistake.

grcanosa
  • 36
  • 4

0 Answers0