I'm trying to write spark dataframe into parquet file but not able to write dataframe into parquet even i tried with csv
df is my dataframe
CUST_ID
---------------
00000082MM778Q49X
00000372QM8890MX7
00000424M09X729MQ
0000062Q028M05MX
my dataframe looks as above
df_parquet = (tempDir+"/"+"df.parquet") #filepath
customerQuery = f"SELECT DISTINCT(m.customer_ID) FROM ada_customer m INNER JOIN customer_nol mr ON m.customer_ID = mr.customer_ID \
WHERE mr.MODEL <> 'X' and m.STATUS = 'Process' AND m.YEAR = {year} AND mr.YEAR = {year}"
customer_df = sqlContext.read.format("jdbc").options(url="jdbc:mysql://localhost:3306/dbkl",
driver="com.mysql.jdbc.Driver",
query=customerQuery, user="root", password="root").load()
# above lines are working only writing into file not working
customer_df.write.mode("overwrite").parquet(df_parquet)
i'm getting this error don't know exactly what's wrong. can some one help with this
Traceback (most recent call last):
File "F:/SparkBook/HG.py", line 135, in <module>
customer_xdf.write.mode("overwrite").parquet(customer_parquet)
File "C:\spark3\python\lib\pyspark.zip\pyspark\sql\readwriter.py", line 1372, in csv
File "C:\spark3\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1305, in __call__
File "C:\spark3\python\lib\pyspark.zip\pyspark\sql\utils.py", line 111, in deco
File "C:\spark3\python\lib\py4j-0.10.9-src.zip\py4j\protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o81.csv.
: org.apache.spark.SparkException: Job aborted.
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:231)
at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:220)
... 33 more
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "F:/SparkBook/HG.py", line 148, in <module>
logger.error(e)
File "F:\SparkBook\lib\logger.py", line 16, in error
self.logger.error(message)
File "C:\spark3\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1296, in __call__
File "C:\spark3\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1266, in _build_args
File "C:\spark3\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py", line 1266, in <listcomp>
File "C:\spark3\python\lib\py4j-0.10.9-src.zip\py4j\protocol.py", line 298, in get_command_part
AttributeError: 'Py4JJavaError' object has no attribute '_get_object_id'
Process finished with exit code 1