1

I want to be able to filter on a date just like you would in normal SQL. Is that possible? I'm running into an issue on how to convert the string from the text file into a date.

import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.sql._
import org.apache.log4j._
import java.text._
//import java.util.Date
import java.sql.Date


object BayAreaBikeAnalysis {

  case class Station(ID:Int, name:String, lat:Double, longitude:Double, dockCount:Int, city:String, installationDate:Date)
  case class Status(station_id:Int, bikesAvailable:Int, docksAvailable:Int, time:String)

  val dateFormat = new SimpleDateFormat("yyyy-MM-dd")

  def extractStations(line: String): Station = {
    val fields = line.split(",",-1)
    val station:Station = Station(fields(0).toInt, fields(1), fields(2).toDouble, fields(3).toDouble, fields(4).toInt, fields(5), dateFormat.parse(fields(6)))
    return station
  }

  def extractStatus(line: String): Status = {
    val fields = line.split(",",-1)
    val status:Status = Status(fields(0).toInt, fields(1).toInt, fields(2).toInt, fields(3))
    return status
  }

  def main(args: Array[String]) {

    // Set the log level to only print errors
    //Logger.getLogger("org").setLevel(Level.ERROR)

    // Use new SparkSession interface in Spark 2.0      
    val spark = SparkSession
    .builder
    .appName("BayAreaBikeAnalysis")
    .master("local[*]")
    .config("spark.sql.warehouse.dir", "file:///C:/temp")
    .getOrCreate()

    //Load files into data sets
    import spark.implicits._
    val stationLines = spark.sparkContext.textFile("Data/station.csv")
    val stations = stationLines.map(extractStations).toDS().cache()

    val statusLines = spark.sparkContext.textFile("Data/status.csv")
    val statuses = statusLines.map(extractStatus).toDS().cache()

    //people.select("name").show()
    stations.select("installationDate").show()

    spark.stop()
  }   



}

Obviously fields(6).toDate() doesn't compile but I'm not sure what to use.

Bob Wakefield
  • 3,739
  • 4
  • 20
  • 30

3 Answers3

1

I think this post is what you are looking for.

Also here you'll find a good tutorial for string parse to date.

Hope this helps!

1

Following are the ways u can convert string to date in scala.

(1) In case of java.util.date :-

val date= new SimpleDateFormat("yyyy-MM-dd")
date.parse("2017-09-28")

(2) In case of joda's dateTime:-

DateTime.parse("09-28-2017")
Amitabh Ranjan
  • 1,500
  • 3
  • 23
  • 39
1

Here is a helping function that takes on a string representing a date and transforms it into a Timestamp

import java.sql.Timestamp
import java.util.TimeZone
import java.text.{DateFormat, SimpleDateFormat}

def getTimeStamp(timeStr: String): Timestamp = {

    val dateFormat: DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss")
    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"))

    val date: Option[Timestamp] = {
      try {
        Some(new Timestamp(dateFormat.parse(timeStr).getTime))
      } catch {
        case _: Exception => Some(Timestamp.valueOf("19700101'T'000000"))
      }
    }

    date.getOrElse(Timestamp.valueOf(timeStr))
  }

Obviously, you will need to change your input date format from "yyyy-MM-dd'T'HH:mm:ss" into whatever format you have the date string.

Hope this helps.

geo
  • 516
  • 5
  • 12