I am using the the Code in Below link to flatten Nested Dataframe Flatten a DataFrame in Scala with different DataTypes inside .... I am getting the error below:
Exception in thread "main" org.apache.spark.sql.AnalysisException: Reference 'alternateIdentificationQualifierCode' is ambiguous, could be: alternateIdentificationQualifierCode#2, alternateIdentificationQualifierCode#11.; at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolve(LogicalPlan.scala:287) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveChildren(LogicalPlan.scala:171) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$4$$anonfun$26.apply(Analyzer.scala:470) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$4$$anonfun$26.apply(Analyzer.scala:470) at org.apache.spark.sql.catalyst.analysis.package$.withPosition(package.scala:48) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$4.applyOrElse(Analyzer.scala:470) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$4.applyOrElse(Analyzer.scala:466) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:335) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:335) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:334) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:332) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:332) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:281) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
is there any way to Rename columns on the fly programatically in spark-dataframes in scala Thanks in Advance.. \
Code:
object flatten {
def main(args: Array[String]) {
if (args.length < 1) {
System.err.println("Usage: XMLParser.jar <config.properties>")
println("Please provide the Configuration File for the XML Parser Job")
System.exit(1)
}
val sc = new SparkContext(new SparkConf().setAppName("Spark XML Process"))
val sqlContext = new HiveContext(sc)
val prop = new Properties()
prop.load(new FileInputStream(args(0)))
val dfSchema = sqlContext.read.format("com.databricks.spark.xml").option("rowTag",prop.getProperty("xmltag")).load(prop.getProperty("input"))
val flattened_DataFrame=flattenDf(dfSchema)
// flattened_DataFrame.printSchema()
}