2

I'm having problem with serializing my class to get it shipped to the workers in the map process. I'm using json4s on spark 1.4

I found following questions which are related NotSerializableException with json4s on Spark and Spark non-serializable exception when parsing JSON with json4s

I tried the suggestions from the answers but I still don't get my code to run. Any help would be much appreciated.

import org.json4s._
import org.json4s.native.JsonMethods._
import org.json4s.native.Serialization

// define class for Url ITems
case class Url(url: String, content: List[String])

// Serialize Url Class
implicit val formats = Serialization.formats(ShortTypeHints(List(classOf[Url])))
// Function that parses JSON
val get_url = (line: String) => {
  val json = parse(line)
  json.extract[Url]
}    

val contents = url_contents.map(line => get_url(line))

There is no difference in the result when I run it with formats in the map task

val contents = url_contents.map(line =>
{    
    implicit val formats = Serialization.formats(ShortTypeHints(List(classOf[Url]))); 
    get_url(line)
})

this is the error message I'm getting

org.apache.spark.SparkException: Task not serializable
    at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:315)
    at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:305)
    at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:132)
    at org.apache.spark.SparkContext.clean(SparkContext.scala:1891)
    at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:294)
    at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:293)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
    at org.apache.spark.rdd.RDD.map(RDD.scala:293)
Caused by: java.io.NotSerializableException: org.json4s.DefaultFormats$
Serialization stack:
    - object not serializable (class: org.json4s.DefaultFormats$, value: org.json4s.DefaultFormats$@39f1b590)
    - field (class: line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC$$iwC$$iwC, name: formats, type: class org.json4s.DefaultFormats$)
    - object (class line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC$$iwC$$iwC, line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC$$iwC$$iwC@244790d)
    - field (class: line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC$$iwC, name: $iw, type: class line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC$$iwC$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC$$iwC, line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC$$iwC@3d138764)
    - field (class: line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC, name: $iw, type: class line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC, line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC@3835c32)
    - field (class: line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC, name: $iw, type: class line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC, line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC@87bad40)
    - field (class: line86766ced366e45e095dc4be2c0d50ff812.$read, name: $iw, type: class line86766ced366e45e095dc4be2c0d50ff812.$read$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff812.$read, line86766ced366e45e095dc4be2c0d50ff812.$read@24a1c141)
    - field (class: line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name: $VAL77, type: class line86766ced366e45e095dc4be2c0d50ff812.$read)
    - object (class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@4b3c9766)
    - field (class: line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name: $iw, type: class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@7b9f33a3)
    - field (class: line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC, name: $iw, type: class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC, line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC@220e530b)
    - field (class: line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC, name: $iw, type: class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC, line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC@3988fd25)
    - field (class: line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC, name: $iw, type: class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC, line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC@1ab99b4)
    - field (class: line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC, name: $iw, type: class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC, line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC@3895db40)
    - field (class: line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC, name: $iw, type: class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC, line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC@403bb82c)
    - field (class: line86766ced366e45e095dc4be2c0d50ff825.$read, name: $iw, type: class line86766ced366e45e095dc4be2c0d50ff825.$read$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff825.$read, line86766ced366e45e095dc4be2c0d50ff825.$read@4c3ec106)
    - field (class: line86766ced366e45e095dc4be2c0d50ff830.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name: $VAL123, type: class line86766ced366e45e095dc4be2c0d50ff825.$read)
    - object (class line86766ced366e45e095dc4be2c0d50ff830.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, line86766ced366e45e095dc4be2c0d50ff830.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@22f99011)
    - field (class: line86766ced366e45e095dc4be2c0d50ff830.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, name: $outer, type: class line86766ced366e45e095dc4be2c0d50ff830.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff830.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC, line86766ced366e45e095dc4be2c0d50ff830.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC@1145ee7)
    - field (class: line86766ced366e45e095dc4be2c0d50ff830.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1, name: $outer, type: class line86766ced366e45e095dc4be2c0d50ff830.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC)
    - object (class line86766ced366e45e095dc4be2c0d50ff830.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$anonfun$1, <function1>)
    at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40)
    at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:47)
    at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:81)
    at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:312)
    at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:305)
    at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:132)
    at org.apache.spark.SparkContext.clean(SparkContext.scala:1891)
    at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:294)
    at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:293)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:108)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:286)
    at org.apache.spark.rdd.RDD.map(RDD.scala:293)

Any ideas and suggestions of how I can serialize my class URL and so it can be distributed to the workers?

Thank you and best Calvin

Community
  • 1
  • 1
cdudek
  • 123
  • 2
  • 10

0 Answers0