0

I am trying to use Spark in R Studio using the sparklyr library on MacOS. I have installed it using the following commands

# Install the sparklyr package
install.packages("sparklyr")

# Now load the library
library(sparklyr)

# Install Spark to your local machine
spark_install(version = "2.1.0")

install.packages("devtools")

# Install latest version of sparklyr
devtools::install_github("rstudio/sparklyr")

# Connect to Spark
options(sparklyr.java9 = TRUE)

sc = spark_connect(master = "local")

iris_tbl <- copy_to(sc, iris) # Throws hive error !!!

Here is the error that I am facing -->

iris_tbl <- copy_to(sc, iris) Error: java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveSessionState': at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:981) at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:110) at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:109) at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:878) at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:878) at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:99) at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:99) at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:230) at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40) at scala.collection.mutable.HashMap.foreach(HashMap.scala:99) at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:878) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.base/java.lang.reflect.Method.invoke(Method.java:564) at sparklyr.Invoke$.invoke(invoke.scala:102) at sparklyr.StreamHandler$.handleMethodCall(stream.scala:97) at sparklyr.StreamHandler$.read(stream.scala:62) at sparklyr.BackendHandler.channelRead0(handler.scala:52) at sparklyr.BackendHandler.channelRead0(handler.scala:14) at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:105) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346) at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346) at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:293) at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:267) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:346) at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1294) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:367) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:353) at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:911) at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131) at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:652) at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:575) at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:489) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:451) at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:140) at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144) at java.base/java.lang.Thread.run(Thread.java:844) Caused by: java.lang.reflect.InvocationTargetException at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:488) at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$reflect(SparkSession.scala:978) ... 44 more Caused by: java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveExternalCatalog': at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:169) at org.apache.spark.sql.internal.SharedState.(SharedState.scala:86) at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101) at org.apache.spark.sql.SparkSession$$anonfun$sharedState$1.apply(SparkSession.scala:101) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.sql.SparkSession.sharedState$lzycompute(SparkSession.scala:101) at org.apache.spark.sql.SparkSession.sharedState(SparkSession.scala:100) at org.apache.spark.sql.internal.SessionState.(SessionState.scala:157) at org.apache.spark.sql.hive.HiveSessionState.(HiveSessionState.scala:32) ... 49 more Caused by: java.lang.reflect.InvocationTargetException at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:488) at org.apache.spark.sql.internal.SharedState$.org$apache$spark$sql$internal$SharedState$$reflect(SharedState.scala:166) ... 57 more Caused by: java.lang.ClassNotFoundException: java.lang.NoClassDefFoundError: org/apache/hadoop/hive/conf/HiveConf when creating Hive client using classpath: file:/Library/Frameworks/R.framework/Versions/3.4/Resources/library/sparklyr/java/sparklyr-2.1-2.11.jar Please make sure that jars for your version of hive and hadoop are included in the paths passed to spark.sql.hive.metastore.jars. at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:270) at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:366) at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:270) at org.apache.spark.sql.hive.HiveExternalCatalog.(HiveExternalCatalog.scala:65) ... 62 more Caused by: java.lang.reflect.InvocationTargetException at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:488) at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264) ... 65 more Caused by: java.lang.NoClassDefFoundError: org/apache/hadoop/hive/conf/HiveConf at org.apache.spark.sql.hive.client.HiveClientImpl.(HiveClientImpl.scala:97) ... 70 more Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hive.conf.HiveConf at java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:466) at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:563) at org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1.doLoadClass(IsolatedC

Grace Mahoney
  • 485
  • 1
  • 7
  • 14
Regressor
  • 1,843
  • 4
  • 27
  • 67

1 Answers1

0

New Mac OSX releases have a documented problem of messing up the Java path in R/RStudio (see here). I have a feeling (though I'm not 100% certain) that this is what you're encountering here.

If you check out the question I've linked above, hopefully you can find a solution that works to reset your path; the one I found that best worked for me on High Sierra was:

dyn.load('/Library/Java/JavaVirtualMachines/jdk1.8.0_66.jdk/Contents/Home/jre/lib/server/libjvm.dylib')
Grace Mahoney
  • 485
  • 1
  • 7
  • 14