I'm new to spark, I have below code to convert the given column to lowercase and update the given data frame. I found this logic on the net which is not working for me.
Data: test.csv
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA,rock
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA,rock
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA,rock
I want to convert the first column hashID values to lowercase "aaaaaaaaaaaaaaaaa" for this I have this below code
import com.holdenkarau.spark.testing.{RDDComparisons, SharedSparkContext}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.{col, lower}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.scalatest.{BeforeAndAfter, FunSuite}
class Test extends FunSuite with SharedSparkContext with RDDComparisons with BeforeAndAfter
with Serializable {
test(" test lowerCase") {
val testSchema = StructType(
Array(
StructField("hashID", StringType, false),
StructField("name", StringType, false)
))
val builder = SparkSession.builder()
builder.master("local[*]")
// Build spark session
val spark = builder
.config("spark.driver.maxResultSize", "0")
.appName("testData")
.config("spark.driver.extraJavaOptions", "-Xss10M")
.getOrCreate()
var DF = spark.read.format("csv").option("header", "false").schema(testSchema).load("~/test.csv")
println("before")
val colName="hashID"
DF.select(colName).take(2).foreach(println)
DF.withColumn(colName, lower(col(colName)))
println("after")
DF.select(colName).take(2).foreach(println)
}
}