0

I'm trying to read a table from a PostgreSQL database.

Previously i was dealing with several errors, just as mentionad here. My solution was to download the PostgreSQL JDBC Driver and add it manually in the "jars" folder, that is: Inside "spark-3.2.0-bin-hadoop2.7\jars" and inside "anaconda3\Lib\site-packages\pyspark\jars". After that i was able to connect to the database using:

df = spark.read \
    .format("jdbc") \
    .option("url", "jdbc:postgresql://_my_host:5432/my_db_name") \
    .option("dbtable", "my_table_name") \
    .option("user", "_my_name") \
    .option("password", "my_password") \
    .option("driver", "org.postgresql.Driver") \
    .load()

and i can execute some commands, like: df.printSchema(). So far so good.

But, when i try to read the data with df.head() or df.show(5), the following error throws:

ERROR:root:Exception while sending command.
Traceback (most recent call last):
  File "C:\Users\danid\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3418, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-eb589bae8d4b>", line 1, in <module>
    df.show(5)
  File "C:\Users\danid\anaconda3\lib\site-packages\pyspark\sql\dataframe.py", line 494, in show
    print(self._jdf.showString(n, 20, vertical))
  File "C:\Users\danid\anaconda3\lib\site-packages\py4j\java_gateway.py", line 1309, in __call__
    return_value = get_return_value(
  File "C:\Users\danid\anaconda3\lib\site-packages\pyspark\sql\utils.py", line 111, in deco
    return f(*a, **kw)
  File "C:\Users\danid\anaconda3\lib\site-packages\py4j\protocol.py", line 326, in get_return_value
    raise Py4JJavaError(
py4j.protocol.Py4JJavaError: <unprintable Py4JJavaError object>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\danid\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2045, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'Py4JJavaError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\danid\anaconda3\lib\site-packages\py4j\clientserver.py", line 475, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "C:\Users\danid\anaconda3\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
ConnectionResetError: [WinError 10054] Foi forçado o cancelamento de uma conexão existente pelo host remoto

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\danid\anaconda3\lib\site-packages\py4j\java_gateway.py", line 1038, in send_command
    response = connection.send_command(command)
  File "C:\Users\danid\anaconda3\lib\site-packages\py4j\clientserver.py", line 503, in send_command
    raise Py4JNetworkError(
py4j.protocol.Py4JNetworkError: Error while sending or receiving
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
    [... skipping hidden 1 frame]

<ipython-input-6-eb589bae8d4b> in <module>
----> 1 df.show(5)

~\anaconda3\lib\site-packages\pyspark\sql\dataframe.py in show(self, n, truncate, vertical)
    493         if isinstance(truncate, bool) and truncate:
--> 494             print(self._jdf.showString(n, 20, vertical))
    495         else:

~\anaconda3\lib\site-packages\py4j\java_gateway.py in __call__(self, *args)
   1308         answer = self.gateway_client.send_command(command)
-> 1309         return_value = get_return_value(
   1310             answer, self.gateway_client, self.target_id, self.name)

~\anaconda3\lib\site-packages\pyspark\sql\utils.py in deco(*a, **kw)
    110         try:
--> 111             return f(*a, **kw)
    112         except py4j.protocol.Py4JJavaError as e:

~\anaconda3\lib\site-packages\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name)
    325             if answer[1] == REFERENCE_TYPE:
--> 326                 raise Py4JJavaError(
    327                     "An error occurred while calling {0}{1}{2}.\n".

<class 'str'>: (<class 'ConnectionRefusedError'>, ConnectionRefusedError(10061, 'No connection could be made because the target machine actively refused it ', None, 10061, None))

During handling of the above exception, another exception occurred:

ConnectionRefusedError                    Traceback (most recent call last)
    [... skipping hidden 1 frame]

~\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in showtraceback(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)
   2048                                             value, tb, tb_offset=tb_offset)
   2049 
-> 2050                     self._showtraceback(etype, value, stb)
   2051                     if self.call_pdb:
   2052                         # drop into debugger

~\anaconda3\lib\site-packages\ipykernel\zmqshell.py in _showtraceback(self, etype, evalue, stb)
    544             u'traceback' : stb,
    545             u'ename' : unicode_type(etype.__name__),
--> 546             u'evalue' : py3compat.safe_unicode(evalue),
    547         }
    548 

~\anaconda3\lib\site-packages\ipython_genutils\py3compat.py in safe_unicode(e)
     63     """
     64     try:
---> 65         return unicode_type(e)
     66     except UnicodeError:
     67         pass

~\anaconda3\lib\site-packages\py4j\protocol.py in __str__(self)
    469     def __str__(self):
    470         gateway_client = self.java_exception._gateway_client
--> 471         answer = gateway_client.send_command(self.exception_cmd)
    472         return_value = get_return_value(answer, gateway_client, None, None)
    473         # Note: technically this should return a bytestring 'str' rather than

~\anaconda3\lib\site-packages\py4j\java_gateway.py in send_command(self, command, retry, binary)
   1034          if `binary` is `True`.
   1035         """
-> 1036         connection = self._get_connection()
   1037         try:
   1038             response = connection.send_command(command)

~\anaconda3\lib\site-packages\py4j\clientserver.py in _get_connection(self)
    279 
    280         if connection is None or connection.socket is None:
--> 281             connection = self._create_new_connection()
    282         return connection
    283 

~\anaconda3\lib\site-packages\py4j\clientserver.py in _create_new_connection(self)
    286             self.java_parameters, self.python_parameters,
    287             self.gateway_property, self)
--> 288         connection.connect_to_java_server()
    289         self.set_thread_connection(connection)
    290         return connection

~\anaconda3\lib\site-packages\py4j\clientserver.py in connect_to_java_server(self)
    400                 self.socket = self.ssl_context.wrap_socket(
    401                     self.socket, server_hostname=self.java_address)
--> 402             self.socket.connect((self.java_address, self.java_port))
    403             self.stream = self.socket.makefile("rb")
    404             self.is_connected = True

ConnectionRefusedError: [WinError 10061] No connection could be made because the target machine actively refused it 

I don't know which configuration i shoul make to fix it.

Daniel Dantas
  • 145
  • 3
  • 15

1 Answers1

0

After a lot of research i ended up with this new knowledge: Spark limitate your memory usage and it throws an error. So you have to configure something to obtain more memory, or you can use "cursor", which is an alternative for reading data in batches.

Daniel Dantas
  • 145
  • 3
  • 15