I'm trying to access dataframe df_rejet
declared outside a transformation
case class Rejet_Format(NOM_TABLE : String, TYPE_TRAITEMENT : String,DT_REJET : String,CD_REJET: String,CAUSE_REJET: String,ID_ENTITE : String)
var df_rejet = spark.emptyDataset[Rejet_Format].toDF()
val df_eximo_r_01 = dfDwgOfEximo.filter("Rejet_r01 = '1'").select("ID_DOSSIER","ID_ENTITE1","DT_DEBUT")
df_eximo_r_01.foreach(x =>
for(i <- 0 to 2){
if(x(i) == ("NULL"))
df_rejet = df_rejet.union(Seq(("DWG_OF_EXIMO","INSERT",DateTimeFo`enter code here`rmatter.ofPattern("dd/MM/yyyy HH:mm:ss").format(LocalDateTime.now),"R01","ID_DOSSIER : "+x(0).toString()+", "+colonnes_r1(i).toString() +" : "+ x(i),x(1).toString())).toDF("NOM_TABLE","TYPE_TRAITEMENT","DT_REJET","CD_REJET","CAUSE_REJET","ID_ENTITE"))
}
)
when i am trying to execute above code i am getting below error
diagnostics: User class threw exception: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 18.0 failed 4 times, most recent failure: Lost task 1.3 in stage 18.0 (TID 851, lxpbda27.ra1.intra.groupama.fr, executor 10): java.lang.NullPointerException
at org.apache.spark.sql.SQLImplicits.localSeqToDatasetHolder(SQLImplicits.scala:213)
at com.groupama.dwhgroup.smart.dwg_of_eximo.Operation_mod$$anonfun$gestionDelta$1$$anonfun$apply$1.apply$mcVI$sp(Operation_mod.scala:330)
at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)
at com.groupama.dwhgroup.smart.dwg_of_eximo.Operation_mod$$anonfun$gestionDelta$1.apply(Operation_mod.scala:328)
at com.groupama.dwhgroup.smart.dwg_of_eximo.Operation_mod$$anonfun$gestionDelta$1.apply(Operation_mod.scala:327)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$apply$28.apply(RDD.scala:918)
at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$apply$28.apply(RDD.scala:918)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2062)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2062)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
please ,how to resolve this problem.