I'm using a python script where I'm using a shell command to copy from local to hdfs.
import os
import logging
import subprocess
filePath = "/tmp"
keyword = "BC10^Dummy-Segment"
for root, dirs, files in os.walk(filePath):
for file in files:
if keyword in file:
subprocess.call(["hadoop fs -copyFromLocal /tmp/BC10%5EDummy-Segment* /user/app"], shell=True)
subprocess.call(["hadoop fs -rm /tmp/BC10%5EDummy-Segment*"], shell=True)
I'm seeing this error:
copyFromLocal: `/tmp/BC10^Dummy-Segment*': No such file or directory
rm: `/tmp/BC10^Dummy-Segment_2019': No such file or directory
Updated code:
import glob
import subprocess
import os
from urllib import urlencode, quote_plus
filePath = "/tmp"
keyword = "BC10^Dummy-Segment"
wildcard = os.path.join(filePath, '{0}*'.format(keyword))
print(wildcard)
files = [urlencode(x, quote_via=quote_plus) for x in glob.glob(wildcard)]
subprocess.check_call(["hadoop", "fs", "-copyFromLocal"] + files + ["/user/app"])
#subprocess.check_call(["hadoop", "fs", "-rm"] + files)
Seeing error when I run:
Traceback (most recent call last):
File "ming.py", line 11, in <module>
files = [urlencode(x, quote_via=quote_plus) for x in glob.glob(wildcard)]
TypeError: urlencode() got an unexpected keyword argument 'quote_via'