1

I'm trying to access dataframe df_rejet declared outside a transformation

case class Rejet_Format(NOM_TABLE : String, TYPE_TRAITEMENT : String,DT_REJET : String,CD_REJET: String,CAUSE_REJET: String,ID_ENTITE : String)

var df_rejet = spark.emptyDataset[Rejet_Format].toDF()

val  df_eximo_r_01 = dfDwgOfEximo.filter("Rejet_r01 = '1'").select("ID_DOSSIER","ID_ENTITE1","DT_DEBUT")

 df_eximo_r_01.foreach(x =>  
                              for(i <- 0 to 2){                              
                               if(x(i) == ("NULL"))                                      
                                    df_rejet = df_rejet.union(Seq(("DWG_OF_EXIMO","INSERT",DateTimeFo`enter code here`rmatter.ofPattern("dd/MM/yyyy HH:mm:ss").format(LocalDateTime.now),"R01","ID_DOSSIER : "+x(0).toString()+", "+colonnes_r1(i).toString() +" : "+ x(i),x(1).toString())).toDF("NOM_TABLE","TYPE_TRAITEMENT","DT_REJET","CD_REJET","CAUSE_REJET","ID_ENTITE"))                                 
                                    } 
                       ) 

when i am trying to execute above code i am getting below error

diagnostics: User class threw exception: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 18.0 failed 4 times, most recent failure: Lost task 1.3 in stage 18.0 (TID 851, lxpbda27.ra1.intra.groupama.fr, executor 10): java.lang.NullPointerException
        at org.apache.spark.sql.SQLImplicits.localSeqToDatasetHolder(SQLImplicits.scala:213)
        at com.groupama.dwhgroup.smart.dwg_of_eximo.Operation_mod$$anonfun$gestionDelta$1$$anonfun$apply$1.apply$mcVI$sp(Operation_mod.scala:330)
        at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:160)
        at com.groupama.dwhgroup.smart.dwg_of_eximo.Operation_mod$$anonfun$gestionDelta$1.apply(Operation_mod.scala:328)
        at com.groupama.dwhgroup.smart.dwg_of_eximo.Operation_mod$$anonfun$gestionDelta$1.apply(Operation_mod.scala:327)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$apply$28.apply(RDD.scala:918)
        at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$apply$28.apply(RDD.scala:918)
        at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2062)
        at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2062)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
        at org.apache.spark.scheduler.Task.run(Task.scala:108)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)

please ,how to resolve this problem.

  • 2
    Possible duplicate of [NullPointerException in Scala Spark, appears to be caused be collection type?](https://stackoverflow.com/questions/23793117/nullpointerexception-in-scala-spark-appears-to-be-caused-be-collection-type). Nesting of RDDs/dataframes is not supported by Spark. – Shaido Aug 27 '19 at 09:46
  • can you add a description of the behaviour you want to get with the foreach ? – firsni Aug 27 '19 at 10:51

0 Answers0