org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2.0 (TID 5, svr17933hw2288.hadoop.sh.ctripcorp.com, executor 1): org.apache.spark.SparkException: Failed to execute user defined function($anonfun$createTransformFunc$1: (string) => array)
The code is as follows:
val tokenizer = new Tokenizer().setInputCol("sendcontent").setOutputCol("words")
var wordsData = tokenizer.transform(sourDF)
val hashingTF = new HashingTF()
.setInputCol("words").setOutputCol("rawFeatures").setNumFeatures(20)
val featurizedData = hashingTF.transform(wordsData)
val idf = new IDF().setInputCol("rawFeatures").setOutputCol("features")
val idfModel = idf.fit(featurizedData)
val rescaledData = idfModel.transform(featurizedData)
rescaledData.select("features", "msgid").take(3).foreach(println)