0

I am trying to run pyspark test case in my local machine in pycharm I am defining java and winutils path in code only

import os
os.environ["JAVA_HOME"] = "C:\\Program Files\\Java\\jdk-11.0.17"
os.environ["HADOOP_HOME"] = "C:\\Users\\harshkum\\hadoop"

and I am trying to create a table like this

spark.sql("CREATE TABLE test_db.sampleTable (number Int, word String)")

But getting below error:

answer = 'xro91'
gateway_client = <py4j.clientserver.JavaClient object at 0x000002327736EE20>
target_id = 'o35', name = 'sql'

    def get_return_value(answer, gateway_client, target_id=None, name=None):
        """Converts an answer received from the Java gateway into a Python object.
    
        For example, string representation of integers are converted to Python
        integer, string representation of objects are converted to JavaObject
        instances, etc.
    
        :param answer: the string returned by the Java gateway
        :param gateway_client: the gateway client used to communicate with the Java
            Gateway. Only necessary if the answer is a reference (e.g., object,
            list, map)
        :param target_id: the name of the object from which the answer comes from
            (e.g., *object1* in `object1.hello()`). Optional.
        :param name: the name of the member from which the answer comes from
            (e.g., *hello* in `object1.hello()`). Optional.
        """
        if is_error(answer)[0]:
            if len(answer) > 1:
                type = answer[1]
                value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
                if answer[1] == REFERENCE_TYPE:
>                   raise Py4JJavaError(
                        "An error occurred while calling {0}{1}{2}.\n".
                        format(target_id, ".", name), value)
E                   py4j.protocol.Py4JJavaError: An error occurred while calling o35.sql.
E                   : java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
E                       at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
E                       at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:793)
E                       at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1218)
E                       at org.apache.hadoop.fs.FileUtil.list(FileUtil.java:1423)
E                       at org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:601)
E                       at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1972)
E                       at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2014)
E                       at org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:761)
E                       at org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateTableLocation(SessionCatalog.scala:384)
E                       at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createTable(SessionCatalog.scala:371)
E                       at org.apache.spark.sql.execution.command.CreateTableCommand.run(tables.scala:169)
E                       at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
E                       at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
E                       at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
E                       at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:98)
E                       at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:109)
E                       at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:169)
E                       at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:95)
E                       at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
E                       at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
E                       at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
E                       at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94)
E                       at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:584)
E                       at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:176)
E                       at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:584)
E                       at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
E                       at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
E                       at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
E                       at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
E                       at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
E                       at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:560)
E                       at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94)
E                       at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81)
E                       at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79)
E                       at org.apache.spark.sql.Dataset.<init>(Dataset.scala:220)
E                       at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
E                       at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
E                       at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
E                       at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:622)
E                       at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
E                       at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:617)
E                       at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
E                       at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
E                       at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
E                       at java.base/java.lang.reflect.Method.invoke(Method.java:566)
E                       at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
E                       at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
E                       at py4j.Gateway.invoke(Gateway.java:282)
E                       at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
E                       at py4j.commands.CallCommand.execute(CallCommand.java:79)
E                       at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
E                       at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
E                       at java.base/java.lang.Thread.run(Thread.java:834)

..\..\..\..\AppData\Local\Programs\Python\Python39\lib\site-packages\py4j\protocol.py:326: Py4JJavaError

I have all the required installed in pycharm like pyspark,pytest,ipython I am new to python/pyspark and writing this test case for the project but been stuck here from long time. Any help is appreciated

Kumar Harsh
  • 423
  • 5
  • 26
  • 1
    Does this answer your question? [Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z](https://stackoverflow.com/questions/41851066/exception-in-thread-main-java-lang-unsatisfiedlinkerror-org-apache-hadoop-io) – werner Mar 28 '23 at 16:55
  • I dont want to install hadoop in my system. The winutils suppose to work that. Thats different scenario where user installed hadoop in system – Kumar Harsh Mar 28 '23 at 17:01
  • No. Winutils is only a shim, not a full Hadoop replacement – OneCricketeer Mar 29 '23 at 14:22

0 Answers0