I have to convert json file to csv file using spark dataframe in databricks. I have tried the below code to convert the json to csv but i'm getting the CSV data source does not support array data type in spark dataframe . I'm unable to convert to csv file .can someone help me on this issue how to remove _corrupt_string?
import json
data=r'/dbfs/FileStore/tables/ABC.json'
print ("This is json data ", data)
def js_r(data):
with open(data, encoding='utf-8') as f_in:
return(json.load(f_in))
if __name__ == "__main__":
dic_data_first = js_r(data)
print("This is my dictionary", dic_data_first)
keys= dic_data_first.keys()
print ("The original dict keys",keys)
dic_data_second={'my_items':dic_data_first['Data']for key in keys}
with open('/dbfs/FileStore/tables/ABC_1.json', 'w') as f:
json.dump(dic_data_first, f)
df = sqlContext.read.json('dbfs:/FileStore/tables/ABC_1.json') # reading a json and writing a parquet
print(df)
df.write.mode("overwrite").format("com.databricks.spark.csv").option("header","true").csv("/dbfs/FileStore/tables/ABC_1.csv")
JSON data as follows:
{"Table":"test1",
"Data":[
{"aa":"1",
"bb":"2"},
{"aa" :"ss",
"bb":"dc"}
}]
}