0

How can I return sub class objects in a Dataset of parent class?. Below code compiles but last line fails at runtime with " scala.ScalaReflectionException: is not a term". Any help is highly appreciated.

Case classes Apple and Orange extends Fruit trait. I am trying to return objects of Apple and Orange in a Fruit ref.

import org.apache.spark.sql._

object Test {

  case class Item(name: String, itemType: Int, count: Long)

  trait Fruit extends Product{
    def name: String
    def count: Long
  }

  case class Apple(name: String, count: Long) extends Fruit

  case class Orange(name: String, count: Long) extends Fruit

  def main(args: Array[String]): Unit = {

    val spark = SparkSession.builder
      .master("local[2]")
      .getOrCreate()

    import spark.implicits._

    val ds = Seq(("apple", 1, 1), ("orange", 2, 1))
      .toDF("name", "itemType", "count").as[Item]

    ds.map(createFruits).show
  }

  def createFruits(item: Item): Fruit ={

    item.itemType match{
      case 1 => Apple(item.name, item.count)
      case 2 => Orange(item.name, item.count)
    }
  }
}
Bijith Kumar
  • 181
  • 7
  • Thanks @ user8371915. Does that mean the only solution is to have separate datasets for Apple and Orange? – Bijith Kumar May 24 '18 at 15:49
  • 1
    Or use Kryo / Java serialization (https://stackoverflow.com/q/36648128/8371915, but you probably don't want that) or UDT (https://stackoverflow.com/q/32440461/8371915, but it is private and doesn't integrate well with Dataset API). – Alper t. Turker May 24 '18 at 16:53

0 Answers0