0

I am using the the Code in Below link to flatten Nested Dataframe Flatten a DataFrame in Scala with different DataTypes inside .... I am getting the error below:

Exception in thread "main" org.apache.spark.sql.AnalysisException: Reference 'alternateIdentificationQualifierCode' is ambiguous, could be: alternateIdentificationQualifierCode#2, alternateIdentificationQualifierCode#11.; at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolve(LogicalPlan.scala:287) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveChildren(LogicalPlan.scala:171) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$4$$anonfun$26.apply(Analyzer.scala:470) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$4$$anonfun$26.apply(Analyzer.scala:470) at org.apache.spark.sql.catalyst.analysis.package$.withPosition(package.scala:48) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$4.applyOrElse(Analyzer.scala:470) at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$10$$anonfun$applyOrElse$4.applyOrElse(Analyzer.scala:466) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:335) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:335) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:334) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:332) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:332) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:281) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)

is there any way to Rename columns on the fly programatically in spark-dataframes in scala Thanks in Advance.. \

Code:

object flatten {

  def main(args: Array[String]) {

    if (args.length < 1) {
      System.err.println("Usage: XMLParser.jar <config.properties>")
      println("Please provide the Configuration File for the XML Parser Job")
      System.exit(1)
    }

    val sc = new SparkContext(new SparkConf().setAppName("Spark XML Process"))
    val sqlContext = new HiveContext(sc)
    val prop = new Properties()
    prop.load(new FileInputStream(args(0)))
    val dfSchema = sqlContext.read.format("com.databricks.spark.xml").option("rowTag",prop.getProperty("xmltag")).load(prop.getProperty("input"))
    val flattened_DataFrame=flattenDf(dfSchema)

   // flattened_DataFrame.printSchema()

  }
Ramesh Maharjan
  • 41,071
  • 6
  • 69
  • 97

1 Answers1

1

Use

val renamed_df = df.toDF(Seq("col1","col2","col3"))

to rename columns