I know this question seems to be duplicated as there are some similar problems have been uploaded but none of em can solve my problem. I've tried everything mentioned here Pyspark error - Unsupported class file major version 55
My PATH variable is as follows:- /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/usr/local/spark-2.4.0-bin-hadoop2.7/sbin:/usr/lib/jvm/java-1.11.0-openjdk-amd64
My spark code is as follows:
from pyspark.sql import SparkSession
import os,shutil
spark = SparkSession.builder.master("local[*]").getOrCreate()
rdd = spark.read.csv("file:///home/karan/Ubuntu Files/ExportExcel2.csv",header=True,sep=",",multiLine="True")
try:
ch=int(input("1. Patient Count\n2. Medicine Recommendation\nChoice:-"))
if(ch==1):
ch=int(input("Enter number of fields you want to analyze:- "))
print("Enter field names:- ")
l=[]
for i in range(ch):
l.append(input())
rs=rdd.groupBy(*l).count().sort('count',ascending=False).select(*l,"count")
rs.show()
else:
print("Now you messed up")
except Exception as e:
print(e)
Can someone tell me where is the problem, this is what my csv file looks like:
"ID","Doctor","Disease","Symptoms Recorded","Severeness","Prescribed medicine"
"1","Phillip Mcneill","Cold","sore throat;runny nose;coughing","7","Tylenol"
"2","Ryan Griffiths","Anxiety Disorder","Panic;fear;Sleep problems;Shortness of breath","1","diazepam"
"3","Logan Needham","Anxiety Disorder","Panic;fear;Cold;sweaty;numb or tingling hands or feet","1","oxazepam"
"4","Britney Sinclair","Diabetes","Hunger and fatigue;Dry mouth and itchy skin","2","acarbose"
"5","Aiden Clayton","Anxiety Disorder","Panic;fear;Sleep problems;Shortness of breath","6","lorazepam"
"6","Lynn Oatway","Cold","sore throat;runny nose;coughing;sneezing;watery eyes","2","Tylenol"
"7","Jaylene Campbell","Diabetes","Hunger and fatigue;Dry mouth and itchy skin; Blurred vision;Yeast infections","7","acarbose"
"8","Remy Mcgee","Anxiety Disorder","Panic;fear;Sleep problems;Shortness of breath;Cold;sweaty;numb or tingling hands or feet","3","alprazolam"
"9","Paula Ulyatt","ADD","disorganized;Lack focus;Inattention","9","Concerta"
"10","Danny Roscoe","Diabetes","Hunger and fatigue;Dry mouth and itchy skin; Blurred vision;Yeast infections","4","glimepiride"
And this is the error that pop up everytime I try to run my code:
2019-03-04 07:58:54 WARN Utils:66 - Your hostname, ubuntu resolves to a loopback address: 127.0.1.1; using 192.168.43.120 instead (on interface ens33)
2019-03-04 07:58:54 WARN Utils:66 - Set SPARK_LOCAL_IP if you need to bind to another address
WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.hadoop.security.authentication.util.KerberosUtil (file:/home/karan/.local/lib/python3.6/site-packages/pyspark/jars/hadoop-auth-2.7.3.jar) to method sun.security.krb5.Config.getInstance()
WARNING: Please consider reporting this to the maintainers of org.apache.hadoop.security.authentication.util.KerberosUtil
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
2019-03-04 07:58:55 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Traceback (most recent call last):
File "/home/karan/.local/lib/python3.6/site-packages/pyspark/sql/utils.py", line 63, in deco
return f(*a, **kw)
File "/home/karan/.local/lib/python3.6/site-packages/py4j/protocol.py", line 328, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o30.csv.
: java.lang.IllegalArgumentException: Unsupported class file major version 55
at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:166)
at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:148)
at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:136)
at org.apache.xbean.asm6.ClassReader.<init>(ClassReader.java:237)
at org.apache.spark.util.ClosureCleaner$.getClassReader(ClosureCleaner.scala:49)
at org.apache.spark.util.FieldAccessFinder$$anon$3$$anonfun$visitMethodInsn$2.apply(ClosureCleaner.scala:517)
at org.apache.spark.util.FieldAccessFinder$$anon$3$$anonfun$visitMethodInsn$2.apply(ClosureCleaner.scala:500)
at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:733)
at scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:134)
at scala.collection.mutable.HashMap$$anon$1$$anonfun$foreach$2.apply(HashMap.scala:134)
at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
at scala.collection.mutable.HashMap$$anon$1.foreach(HashMap.scala:134)
at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:732)
at org.apache.spark.util.FieldAccessFinder$$anon$3.visitMethodInsn(ClosureCleaner.scala:500)
at org.apache.xbean.asm6.ClassReader.readCode(ClassReader.java:2175)
at org.apache.xbean.asm6.ClassReader.readMethod(ClassReader.java:1238)
at org.apache.xbean.asm6.ClassReader.accept(ClassReader.java:631)
at org.apache.xbean.asm6.ClassReader.accept(ClassReader.java:355)
at org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:307)
at org.apache.spark.util.ClosureCleaner$$anonfun$org$apache$spark$util$ClosureCleaner$$clean$14.apply(ClosureCleaner.scala:306)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:306)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:162)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2326)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2100)
at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1364)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.take(RDD.scala:1337)
at org.apache.spark.sql.execution.datasources.csv.MultiLineCSVDataSource$.infer(CSVDataSource.scala:328)
at org.apache.spark.sql.execution.datasources.csv.CSVDataSource.inferSchema(CSVDataSource.scala:68)
at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat.inferSchema(CSVFileFormat.scala:63)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$6.apply(DataSource.scala:180)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$6.apply(DataSource.scala:180)
at scala.Option.orElse(Option.scala:289)
at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:179)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:373)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:617)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:834)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "HAS.py", line 11, in <module>
rdd = spark.read.csv("file:///home/karan/Ubuntu Files/ExportExcel2.csv",header=True,sep=",",multiLine="True")
File "/home/karan/.local/lib/python3.6/site-packages/pyspark/sql/readwriter.py", line 472, in csv
return self._df(self._jreader.csv(self._spark._sc._jvm.PythonUtils.toSeq(path)))
File "/home/karan/.local/lib/python3.6/site-packages/py4j/java_gateway.py", line 1257, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/home/karan/.local/lib/python3.6/site-packages/pyspark/sql/utils.py", line 79, in deco
raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)
pyspark.sql.utils.IllegalArgumentException: 'Unsupported class file major version 55'