0

I am trying to read a file of a work HDFS location using the following code:

import hdfs3
from hdfs3 import HDFileSystem
hdfs=HDFileSystem(host='host',port='port')
with hdfs.open('FILE') as f:
    model_AOB = f.read()

I am getting the following error:

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-1-d44f943ebe4e> in <module>()
      1 import hdfs3
      2 from hdfs3 import HDFileSystem
----> 3 hdfs=HDFileSystem(host='HOST',port=PORT)
      4 with hdfs.open('FILE') as f:
      5     model_AOB = f.read()

~\AppData\Local\Continuum\anaconda3\lib\site-packages\hdfs3\core.py in __init__(self, host, port, connect, autoconf, pars, **kwargs)
     86 
     87         if connect:
---> 88             self.connect()
     89 
     90     def __getstate__(self):

~\AppData\Local\Continuum\anaconda3\lib\site-packages\hdfs3\core.py in connect(self)
    104         This happens automatically at startup
    105         """
--> 106         get_lib()
    107         conf = self.conf.copy()
    108         if self._handle:

~\AppData\Local\Continuum\anaconda3\lib\site-packages\hdfs3\core.py in get_lib()
    668     global _lib
    669     if _lib is None:
--> 670         from .lib import _lib as l
    671         _lib = l
    672 

~\AppData\Local\Continuum\anaconda3\lib\site-packages\hdfs3\lib.py in <module>()
     15 for name in ['libhdfs3.so', 'libhdfs3.dylib']:
     16     try:
---> 17         _lib = ct.cdll.LoadLibrary(name)
     18         break
     19     except OSError as e:

~\AppData\Local\Continuum\anaconda3\lib\ctypes\__init__.py in LoadLibrary(self, name)
    432 
    433     def LoadLibrary(self, name):
--> 434         return self._dlltype(name)
    435 
    436 cdll = LibraryLoader(CDLL)

~\AppData\Local\Continuum\anaconda3\lib\ctypes\__init__.py in __init__(self, name, mode, handle, use_errno, use_last_error)
    354 
    355         if handle is None:
--> 356             self._handle = _dlopen(self._name, mode)
    357         else:
    358             self._handle = handle

OSError: [WinError 126] The specified module could not be found

I have also tried adding in the argument pars = {"hadoop.security.authentication": "kerberos"} in the HDFileSystem function as I believe the hadoop cluster is kerberized.

Can anyone help with this issue? Apologies for the chunky question, I'm new to python so I didn't want to accidentally leave out something relevant in the error.

Thanks

TUIQ
  • 1
  • 1

0 Answers0