9

I am new in Spark and Spark dataset. I was trying to declare an empty dataset using emptyDataset but it was asking for org.apache.spark.sql.Encoder. The data type I am using for the dataset is an object of case class Tp(s1: String, s2: String, s3: String).

Hassan Ali
  • 205
  • 1
  • 4
  • 12

3 Answers3

4

All you need is to import implicit encoders from SparkSession instance before you create empty Dataset: import spark.implicits._ See full example here

Vitalii Kotliarenko
  • 2,947
  • 18
  • 26
3

EmptyDataFrame

package com.examples.sparksql

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

object EmptyDataFrame {

  def main(args: Array[String]){

    //Create Spark Conf
    val sparkConf = new SparkConf().setAppName("Empty-Data-Frame").setMaster("local")

    //Create Spark Context - sc
    val sc = new SparkContext(sparkConf)

    //Create Sql Context
    val sqlContext = new org.apache.spark.sql.SQLContext(sc)    

    //Import Sql Implicit conversions
    import sqlContext.implicits._
    import org.apache.spark.sql.Row
    import org.apache.spark.sql.types.{StructType,StructField,StringType}   

    //Create Schema RDD
    val schema_string = "name,id,dept"
    val schema_rdd = StructType(schema_string.split(",").map(fieldName => StructField(fieldName, StringType, true)) )

    //Create Empty DataFrame
    val empty_df = sqlContext.createDataFrame(sc.emptyRDD[Row], schema_rdd)

    //Some Operations on Empty Data Frame
    empty_df.show()
    println(empty_df.count())     

    //You can register a Table on Empty DataFrame, it's empty table though
    empty_df.registerTempTable("empty_table")

    //let's check it ;)
    val res = sqlContext.sql("select * from empty_table")
    res.show

  }

}
  • 4
    Actually, I need to declare an empty dataset. – Hassan Ali Sep 19 '17 at 09:27
  • @HassanAli I tried Some thing like var accumList: Dataset[Seq[( String,String, List[(Long,Double)], List[(Long,Double)], List[(Long,Double)] )]]= new Dataset() .. But did work. – user2458922 Jun 11 '19 at 13:25
1

Alternatively you can convert an empty list into a Dataset:

import sparkSession.implicits._

case class Employee(name: String, id: Int)

val ds: Dataset[Employee] = List.empty[Employee].toDS()
tjheslin1
  • 1,378
  • 6
  • 19
  • 36