2

I'm trying to run a spark job (written in Java with Spring boot) in dataproc and I get the following error :

...
22/07/31 14:44:07 WARN org.apache.spark.scheduler.TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0) (cluster123-m.europe-west2-b.c.crypto-trading-automate-357313.internal executor 1): java.lang.ClassCastException: cannot assign instance of java.lang.invoke.SerializedLambda to field org.apache.spark.rdd.MapPartitionsRDD.f of type scala.Function3 in instance of org.apache.spark.rdd.MapPartitionsRDD
    at java.io.ObjectStreamClass$FieldReflector.setObjFieldValues(ObjectStreamClass.java:2301)
    at java.io.ObjectStreamClass.setObjFieldValues(ObjectStreamClass.java:1431)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2437)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:527)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2322)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:527)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2322)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:527)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2322)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:83)
    at org.apache.spark.scheduler.Task.run(Task.scala:131)
    at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:498)
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:501)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:750)

22/07/31 14:44:08 ERROR org.apache.spark.scheduler.TaskSetManager: Task 0 in stage 0.0 failed 4 times; aborting job
Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3) (cluster123-m.europe-west2-b.c.crypto-trading-automate-357313.internal executor 2): java.lang.ClassCastException: cannot assign instance of java.lang.invoke.SerializedLambda to field org.apache.spark.rdd.MapPartitionsRDD.f of type scala.Function3 in instance of org.apache.spark.rdd.MapPartitionsRDD
    at java.io.ObjectStreamClass$FieldReflector.setObjFieldValues(ObjectStreamClass.java:2301)
    at java.io.ObjectStreamClass.setObjFieldValues(ObjectStreamClass.java:1431)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2437)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:527)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2322)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:527)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2322)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:527)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2322)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:83)
    at org.apache.spark.scheduler.Task.run(Task.scala:131)
    at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:498)
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:501)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:750)

Driver stacktrace:
    at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2304)
    at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2253)
    at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2252)
    at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
    at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2252)
    at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1124)
    at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1124)
    at scala.Option.foreach(Option.scala:407)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1124)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2491)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2433)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2422)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:902)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2204)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2225)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2244)
    at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:472)
    at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:425)
    at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:47)
    at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3709)
    at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2735)
    at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3700)
    at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
    at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
    at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
    at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
    at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
    at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3698)
    at org.apache.spark.sql.Dataset.head(Dataset.scala:2735)
    at org.apache.spark.sql.Dataset.take(Dataset.scala:2942)
    at org.apache.spark.sql.Dataset.getRows(Dataset.scala:302)
    at org.apache.spark.sql.Dataset.showString(Dataset.scala:339)
    at org.apache.spark.sql.Dataset.show(Dataset.scala:826)
    at org.apache.spark.sql.Dataset.show(Dataset.scala:785)
    at org.apache.spark.sql.Dataset.show(Dataset.scala:794)
    at com.example.demo.MyJob.start(MyJob.java:30)
    at com.example.demo.DemoApplication.main(DemoApplication.java:17)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.springframework.boot.loader.MainMethodRunner.run(MainMethodRunner.java:49)
    at org.springframework.boot.loader.Launcher.launch(Launcher.java:108)
    at org.springframework.boot.loader.Launcher.launch(Launcher.java:58)
    at org.springframework.boot.loader.JarLauncher.main(JarLauncher.java:65)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
    at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:951)
    at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
    at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
    at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
    at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1039)
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1048)
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassCastException: cannot assign instance of java.lang.invoke.SerializedLambda to field org.apache.spark.rdd.MapPartitionsRDD.f of type scala.Function3 in instance of org.apache.spark.rdd.MapPartitionsRDD
    at java.io.ObjectStreamClass$FieldReflector.setObjFieldValues(ObjectStreamClass.java:2301)
    at java.io.ObjectStreamClass.setObjFieldValues(ObjectStreamClass.java:1431)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2437)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:527)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2322)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:527)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2322)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at scala.collection.immutable.List$SerializationProxy.readObject(List.scala:527)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at java.io.ObjectStreamClass.invokeReadObject(ObjectStreamClass.java:1184)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2322)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:83)
    at org.apache.spark.scheduler.Task.run(Task.scala:131)
    at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:498)
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:501)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:750)


Here is my code :

The beans :

package com.example.demo.model;

import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;

@Getter
@Setter
@ToString
@AllArgsConstructor
public class Address {

    private String city;
    private String country;
}


package com.example.demo.model;

import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;

@Getter
@Setter
@ToString
@AllArgsConstructor
public class Person {

    private String name;
    private int age;

}


package com.example.demo.model;

import lombok.*;

@Getter
@Setter
@ToString
@Builder
@AllArgsConstructor
public class PersonWithAddress {

    private String name;
    private int age;
    private String city;
    private String country;
}



package com.example.demo;

import com.example.demo.model.Address;
import com.example.demo.model.Person;
import com.example.demo.model.PersonWithAddress;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.spark.sql.*;
import org.springframework.stereotype.Service;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Iterator;
import java.util.stream.Stream;


@Service
@AllArgsConstructor
@Slf4j
public class MyJob implements Serializable {

    public void start() {

        SparkSession spark = SparkSession.builder().appName("demo").getOrCreate();

        Person john = new Person("John", 30);
        Person david = new Person("David", 35);

        Dataset<Person> personDs = spark.createDataset(Arrays.asList(john, david), Encoders.bean(Person.class));
        Dataset<PersonWithAddress> personWithAddressDs = personDs.flatMap(this::addAddresses, Encoders.bean(PersonWithAddress.class));
        personWithAddressDs.show();

    }

    private Iterator<PersonWithAddress> addAddresses(Person person) {

        Address paris = new Address("Paris", "France");
        Address madrid = new Address("Madrid", "Spain");

        return Stream.of(paris, madrid).map(address -> createPersonWithAddress(person, address)).iterator();

    }

    private PersonWithAddress createPersonWithAddress(Person person, Address address) {

        return PersonWithAddress.builder()
                .name(person.getName())
                .age(person.getAge())
                .city(address.getCity())
                .country(address.getCountry())
                .build();
    }
}


package com.example.demo;

import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.gson.GsonAutoConfiguration;
import org.springframework.context.ApplicationContext;

@SpringBootApplication(exclude = {GsonAutoConfiguration.class})
@Slf4j
public class DemoApplication {

    public static void main(String[] args) {

        ApplicationContext applicationContext = SpringApplication.run(DemoApplication.class, args);
        MyJob myJob = applicationContext.getBean(MyJob.class);
        myJob.start();
    }

}


and the pom

        ...
        <dependencies>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter</artifactId>
                <exclusions>
                    <exclusion>
                        <groupId>org.springframework.boot</groupId>
                        <artifactId>spring-boot-starter-logging</artifactId>
                    </exclusion>
                </exclusions>
            </dependency>

            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-web</artifactId>
            </dependency>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-devtools</artifactId>
                <scope>runtime</scope>
                <optional>true</optional>
            </dependency>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-configuration-processor</artifactId>
                <optional>true</optional>
            </dependency>
            <dependency>
                <groupId>org.projectlombok</groupId>
                <artifactId>lombok</artifactId>
                <optional>true</optional>
            </dependency>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-starter-test</artifactId>
                <scope>test</scope>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-sql_2.12</artifactId>
                <scope>provided</scope>
                <version>3.1.2</version>
            </dependency>
            <dependency>
                <groupId>org.codehaus.janino</groupId>
                <artifactId>janino</artifactId>
                <version>3.0.8</version>
            </dependency>
            <dependency>
                <groupId>org.codehaus.janino</groupId>
                <artifactId>commons-compiler</artifactId>
                <version>3.0.8</version>
            </dependency>
            <dependency>
                <groupId>org.projectlombok</groupId>
                <artifactId>lombok</artifactId>
                <version>1.18.24</version>
                <scope>compile</scope>
            </dependency>
        </dependencies>

        <build>
            <plugins>
                <plugin>
                    <groupId>org.springframework.boot</groupId>
                    <artifactId>spring-boot-maven-plugin</artifactId>
                    <configuration>
                        <excludes>
                            <exclude>
                                <groupId>org.projectlombok</groupId>
                                <artifactId>lombok</artifactId>
                            </exclude>
                        </excludes>
                    </configuration>
                </plugin>
            </plugins>
        </build>

    </project>


I tried 2 solutions based on previous feeds on stackoverflow :

java.lang.ClassCastException using lambda expressions in spark job on remote server

  • replacing lambdas with innerclass
  • use the --jars to provide the generated jar to executors (but it seems that this is what dataproc do by default)

In local it works fine, only when I deploy in dataproc that I get the error.

Thank you for your help :)

0 Answers0