I'm trying to export data from EMR master node to S3 bucket, its failing. While executing below line of code from my pyspark code:
DF1
.coalesce(1)
.write
.format("csv")
.option("header","true")
.save("s3://fittech-bucket/emr/outputs/test_data")
below error comes:
An error occurred while calling o78.save.
: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class com.amazon.ws.emr.hadoop.fs.EmrFileSystem not found
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2369)
at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2840)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2857)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:99)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2896)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2878)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:392)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:356)
at org.apache.spark.sql.execution.datasources.DataSource.planForWritingFileFormat(DataSource.scala:452)
at org.apache.spark.sql.execution.datasources.DataSource.planForWriting(DataSource.scala:548)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:278)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:267)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:225)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassNotFoundException: Class com.amazon.ws.emr.hadoop.fs.EmrFileSystem not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2273)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2367)