I am trying to apply different types of logic according to the type of spark dataset.
Depending on the type of case class that is passed to doWork
(Customer
or Worker
) I have to apply different types of aggregation.
How can I do that?
import org.apache.spark.sql.{Dataset, SparkSession}
object SparkSql extends App {
import spark.implicits._
val spark = SparkSession
.builder()
.appName("Simple app")
.config("spark.master", "local")
.getOrCreate()
sealed trait Person {
def name: String
}
final case class Customer(override val name: String, email: String) extends Person
final case class Worker(override val name: String, id: Int, skills: Array[String]) extends Person
val workers: Dataset[Worker] = Seq(
Worker("Bob", id = 1, skills = Array("communication", "teamwork")),
Worker("Sam", id = 1, skills = Array("self-motivation"))
).toDS
def doWork(persons: Dataset[Person]): Unit = {
persons match {
case ... // Dataset[Customer] ... do something
case ... // Dataset[Worker] ... do something else
}
}
}