I'm trying to read a table from a PostgreSQL database.
Previously i was dealing with several errors, just as mentionad here. My solution was to download the PostgreSQL JDBC Driver and add it manually in the "jars" folder, that is: Inside "spark-3.2.0-bin-hadoop2.7\jars" and inside "anaconda3\Lib\site-packages\pyspark\jars". After that i was able to connect to the database using:
df = spark.read \
.format("jdbc") \
.option("url", "jdbc:postgresql://_my_host:5432/my_db_name") \
.option("dbtable", "my_table_name") \
.option("user", "_my_name") \
.option("password", "my_password") \
.option("driver", "org.postgresql.Driver") \
.load()
and i can execute some commands, like: df.printSchema()
. So far so good.
But, when i try to read the data with df.head()
or df.show(5)
, the following error throws:
ERROR:root:Exception while sending command.
Traceback (most recent call last):
File "C:\Users\danid\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3418, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-6-eb589bae8d4b>", line 1, in <module>
df.show(5)
File "C:\Users\danid\anaconda3\lib\site-packages\pyspark\sql\dataframe.py", line 494, in show
print(self._jdf.showString(n, 20, vertical))
File "C:\Users\danid\anaconda3\lib\site-packages\py4j\java_gateway.py", line 1309, in __call__
return_value = get_return_value(
File "C:\Users\danid\anaconda3\lib\site-packages\pyspark\sql\utils.py", line 111, in deco
return f(*a, **kw)
File "C:\Users\danid\anaconda3\lib\site-packages\py4j\protocol.py", line 326, in get_return_value
raise Py4JJavaError(
py4j.protocol.Py4JJavaError: <unprintable Py4JJavaError object>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\danid\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2045, in showtraceback
stb = value._render_traceback_()
AttributeError: 'Py4JJavaError' object has no attribute '_render_traceback_'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\danid\anaconda3\lib\site-packages\py4j\clientserver.py", line 475, in send_command
answer = smart_decode(self.stream.readline()[:-1])
File "C:\Users\danid\anaconda3\lib\socket.py", line 669, in readinto
return self._sock.recv_into(b)
ConnectionResetError: [WinError 10054] Foi forçado o cancelamento de uma conexão existente pelo host remoto
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\danid\anaconda3\lib\site-packages\py4j\java_gateway.py", line 1038, in send_command
response = connection.send_command(command)
File "C:\Users\danid\anaconda3\lib\site-packages\py4j\clientserver.py", line 503, in send_command
raise Py4JNetworkError(
py4j.protocol.Py4JNetworkError: Error while sending or receiving
---------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
[... skipping hidden 1 frame]
<ipython-input-6-eb589bae8d4b> in <module>
----> 1 df.show(5)
~\anaconda3\lib\site-packages\pyspark\sql\dataframe.py in show(self, n, truncate, vertical)
493 if isinstance(truncate, bool) and truncate:
--> 494 print(self._jdf.showString(n, 20, vertical))
495 else:
~\anaconda3\lib\site-packages\py4j\java_gateway.py in __call__(self, *args)
1308 answer = self.gateway_client.send_command(command)
-> 1309 return_value = get_return_value(
1310 answer, self.gateway_client, self.target_id, self.name)
~\anaconda3\lib\site-packages\pyspark\sql\utils.py in deco(*a, **kw)
110 try:
--> 111 return f(*a, **kw)
112 except py4j.protocol.Py4JJavaError as e:
~\anaconda3\lib\site-packages\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
<class 'str'>: (<class 'ConnectionRefusedError'>, ConnectionRefusedError(10061, 'No connection could be made because the target machine actively refused it ', None, 10061, None))
During handling of the above exception, another exception occurred:
ConnectionRefusedError Traceback (most recent call last)
[... skipping hidden 1 frame]
~\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in showtraceback(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)
2048 value, tb, tb_offset=tb_offset)
2049
-> 2050 self._showtraceback(etype, value, stb)
2051 if self.call_pdb:
2052 # drop into debugger
~\anaconda3\lib\site-packages\ipykernel\zmqshell.py in _showtraceback(self, etype, evalue, stb)
544 u'traceback' : stb,
545 u'ename' : unicode_type(etype.__name__),
--> 546 u'evalue' : py3compat.safe_unicode(evalue),
547 }
548
~\anaconda3\lib\site-packages\ipython_genutils\py3compat.py in safe_unicode(e)
63 """
64 try:
---> 65 return unicode_type(e)
66 except UnicodeError:
67 pass
~\anaconda3\lib\site-packages\py4j\protocol.py in __str__(self)
469 def __str__(self):
470 gateway_client = self.java_exception._gateway_client
--> 471 answer = gateway_client.send_command(self.exception_cmd)
472 return_value = get_return_value(answer, gateway_client, None, None)
473 # Note: technically this should return a bytestring 'str' rather than
~\anaconda3\lib\site-packages\py4j\java_gateway.py in send_command(self, command, retry, binary)
1034 if `binary` is `True`.
1035 """
-> 1036 connection = self._get_connection()
1037 try:
1038 response = connection.send_command(command)
~\anaconda3\lib\site-packages\py4j\clientserver.py in _get_connection(self)
279
280 if connection is None or connection.socket is None:
--> 281 connection = self._create_new_connection()
282 return connection
283
~\anaconda3\lib\site-packages\py4j\clientserver.py in _create_new_connection(self)
286 self.java_parameters, self.python_parameters,
287 self.gateway_property, self)
--> 288 connection.connect_to_java_server()
289 self.set_thread_connection(connection)
290 return connection
~\anaconda3\lib\site-packages\py4j\clientserver.py in connect_to_java_server(self)
400 self.socket = self.ssl_context.wrap_socket(
401 self.socket, server_hostname=self.java_address)
--> 402 self.socket.connect((self.java_address, self.java_port))
403 self.stream = self.socket.makefile("rb")
404 self.is_connected = True
ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused it
I don't know which configuration i shoul make to fix it.