I'm trying to generate a fat-jar (with sbt assembly) and then send it to spark using spark-submit and have my application use my logback logging appenders and encoders.
I have tried what is stated in: Configuring Apache Spark Logging with Scala and logback and also in Separating application logs in Logback from Spark Logs in log4j
but with no success. Either the far jar fails due to some deduplicate errors or the application does not start due to some error in the sparkContext creation.
My build.sbt
libraryDependencies += Seq(
"org.apache.spark" %% "spark-core" % sparkVersion excludeAll( ExclusionRule(organization = "org.slf4j"))
, "org.apache.spark" %% "spark-sql" % sparkVersion excludeAll( ExclusionRule(organization = "org.slf4j"))
, "org.apache.spark" %% "spark-mllib" % sparkVersion excludeAll( ExclusionRule(organization = "org.slf4j"))
, "org.apache.spark" %% "spark-streaming-kafka-0-10" % sparkVersion excludeAll( ExclusionRule(organization = "org.slf4j"))
, "org.apache.spark" %% "spark-hive" % "2.4.0" excludeAll( ExclusionRule(organization = "org.slf4j"))
, "org.scala-lang.modules" %% "scala-xml" % "1.0.6"
, "io.minio" % "minio" % "6.0.11" excludeAll(ExclusionRule(organization= "com.google.code.findbugs", name= "annotations"))
, "com.typesafe.slick" %% "slick" % "3.3.2"
, "com.typesafe.slick" %% "slick-hikaricp" % "3.3.2"
, "com.microsoft.sqlserver" % "mssql-jdbc" % "7.4.1.jre8"
, "org.slf4j" % "log4j-over-slf4j" % "1.7.25"
)
assemblyMergeStrategy in assembly := {
case PathList("com", "esotericsoftware", xs @ _*) => MergeStrategy.last
case PathList("com", "squareup", xs @ _*) => MergeStrategy.last
case PathList("com", "sun", xs @ _*) => MergeStrategy.last
case PathList("com", "thoughtworks", xs @ _*) => MergeStrategy.last
case PathList("commons-beanutils", xs @ _*) => MergeStrategy.last
case PathList("commons-cli", xs @ _*) => MergeStrategy.last
case PathList("commons-collections", xs @ _*) => MergeStrategy.last
case PathList("commons-io", xs @ _*) => MergeStrategy.last
case PathList("io", "netty", xs @ _*) => MergeStrategy.last
case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
case PathList("javax", "inject", xs @ _*) => MergeStrategy.last
case PathList("javax", "xml", xs @ _*) => MergeStrategy.last
case PathList("org", "apache", xs @ _*) => MergeStrategy.last
case PathList("org", "codehaus", xs @ _*) => MergeStrategy.last
case PathList("org", "glassfish", xs @ _*) => MergeStrategy.last
case PathList("org", "fusesource", xs @ _*) => MergeStrategy.last
case PathList("org", "mortbay", xs @ _*) => MergeStrategy.last
case PathList("org", "tukaani", xs @ _*) => MergeStrategy.last
case PathList("com", "twitter", xs @ _*) => MergeStrategy.last
case PathList("org", "objenesis", xs @ _*) => MergeStrategy.last
case PathList("org", "aopalliance", xs @ _*) => MergeStrategy.last
case PathList("xerces", xs @ _*) => MergeStrategy.last
case PathList("xmlenc", xs @ _*) => MergeStrategy.last
case "about.html" => MergeStrategy.rename
case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
case "META-INF/mailcap" => MergeStrategy.last
case "META-INF/mimetypes.default" => MergeStrategy.last
case "plugin.properties" => MergeStrategy.last
case "git.properties" => MergeStrategy.last
case "plugin.xml" => MergeStrategy.last
case "log4j.properties" => MergeStrategy.last
case "parquet.thrift" => MergeStrategy.last
case "codegen/config.fmpp" => MergeStrategy.last
// Needed only to sbt assembly non provided spark-streaming-kafka-0-10
case PathList("org", "apache", "spark", "unused", "UnusedStubClass.class") => MergeStrategy.first
// Needed only to sbt assembly etcd coming with libraries
case PathList("META-INF", "io.netty.versions.properties", xs @ _*) => MergeStrategy.last
// Needed only to sbt assembly mastria-etcd4s coming with libraries
case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
case PathList("scala","util", xs @ _*) => MergeStrategy.first
case PathList("library.properties", xs @ _*) => MergeStrategy.first
case PathList("logback.xml",xs @ _ *) => MergeStrategy.last
case x => (assemblyMergeStrategy in assembly).value(x)
//case PathList("MANIFEST.MF",xs @ _ *) => MergeStrategy.first
// case _ => MergeStrategy.first
}
assemblyMergeStrategy in assembly := {
case PathList("com", "esotericsoftware", xs @ _*) => MergeStrategy.last
case PathList("com", "squareup", xs @ _*) => MergeStrategy.last
case PathList("com", "sun", xs @ _*) => MergeStrategy.last
case PathList("com", "thoughtworks", xs @ _*) => MergeStrategy.last
case PathList("commons-beanutils", xs @ _*) => MergeStrategy.last
case PathList("commons-cli", xs @ _*) => MergeStrategy.last
case PathList("commons-collections", xs @ _*) => MergeStrategy.last
case PathList("commons-io", xs @ _*) => MergeStrategy.last
case PathList("io", "netty", xs @ _*) => MergeStrategy.last
case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
case PathList("javax", "inject", xs @ _*) => MergeStrategy.last
case PathList("javax", "xml", xs @ _*) => MergeStrategy.last
case PathList("org", "apache", xs @ _*) => MergeStrategy.last
case PathList("org", "codehaus", xs @ _*) => MergeStrategy.last
case PathList("org", "glassfish", xs @ _*) => MergeStrategy.last
case PathList("org", "fusesource", xs @ _*) => MergeStrategy.last
case PathList("org", "mortbay", xs @ _*) => MergeStrategy.last
case PathList("org", "tukaani", xs @ _*) => MergeStrategy.last
case PathList("com", "twitter", xs @ _*) => MergeStrategy.last
case PathList("org", "objenesis", xs @ _*) => MergeStrategy.last
case PathList("org", "aopalliance", xs @ _*) => MergeStrategy.last
case PathList("xerces", xs @ _*) => MergeStrategy.last
case PathList("xmlenc", xs @ _*) => MergeStrategy.last
case "about.html" => MergeStrategy.rename
case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
case "META-INF/mailcap" => MergeStrategy.last
case "META-INF/mimetypes.default" => MergeStrategy.last
case "plugin.properties" => MergeStrategy.last
case "git.properties" => MergeStrategy.last
case "plugin.xml" => MergeStrategy.last
case "log4j.properties" => MergeStrategy.last
case "parquet.thrift" => MergeStrategy.last
case "codegen/config.fmpp" => MergeStrategy.last
// Needed only to sbt assembly non provided spark-streaming-kafka-0-10
case PathList("org", "apache", "spark", "unused", "UnusedStubClass.class") => MergeStrategy.first
// Needed only to sbt assembly etcd coming with libraries
case PathList("META-INF", "io.netty.versions.properties", xs @ _*) => MergeStrategy.last
// Needed only to sbt assembly mastria-etcd4s coming with libraries
case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
case PathList("scala","collection","mutable", xs @ _*) => MergeStrategy.first
case PathList("scala","util", xs @ _*) => MergeStrategy.first
case PathList("library.properties", xs @ _*) => MergeStrategy.first
case PathList("logback.xml",xs @ _ *) => MergeStrategy.last
case x => (assemblyMergeStrategy in assembly).value(x)
//case PathList("MANIFEST.MF",xs @ _ *) => MergeStrategy.first
// case _ => MergeStrategy.first
}
When I use typesafe LazyLogging class the process fails no matter the combination of arguemtns I try to use.
I would like to see a full build.sbt example to maybe find my mistake.