I'm trying to create a dataset with some geo data using spark and esri. If Foo
only have Point
field, it'll work but if I add some other fields beyond a Point
, I get ArrayIndexOutOfBoundsException.
import com.esri.core.geometry.Point
import org.apache.spark.sql.{Encoder, Encoders, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}
object Main {
case class Foo(position: Point, name: String)
object MyEncoders {
implicit def PointEncoder: Encoder[Point] = Encoders.kryo[Point]
implicit def FooEncoder: Encoder[Foo] = Encoders.kryo[Foo]
}
def main(args: Array[String]): Unit = {
val sc = new SparkContext(new SparkConf().setAppName("app").setMaster("local"))
val sqlContext = new SQLContext(sc)
import MyEncoders.{FooEncoder, PointEncoder}
import sqlContext.implicits._
Seq(new Foo(new Point(0, 0), "bar")).toDS.show
}
}
Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.spark.sql.execution.Queryable$$anonfun$formatString$1$$anonfun$apply$2.apply(Queryable.scala:71) at org.apache.spark.sql.execution.Queryable$$anonfun$formatString$1$$anonfun$apply$2.apply(Queryable.scala:70) at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771) at org.apache.spark.sql.execution.Queryable$$anonfun$formatString$1.apply(Queryable.scala:70) at org.apache.spark.sql.execution.Queryable$$anonfun$formatString$1.apply(Queryable.scala:69) at scala.collection.mutable.ArraySeq.foreach(ArraySeq.scala:73) at org.apache.spark.sql.execution.Queryable$class.formatString(Queryable.scala:69) at org.apache.spark.sql.Dataset.formatString(Dataset.scala:65) at org.apache.spark.sql.Dataset.showString(Dataset.scala:263) at org.apache.spark.sql.Dataset.show(Dataset.scala:230) at org.apache.spark.sql.Dataset.show(Dataset.scala:193) at org.apache.spark.sql.Dataset.show(Dataset.scala:201) at Main$.main(Main.scala:24) at Main.main(Main.scala)