If you are rewriting the script in Python; you could replace wget
by urllib.urlretrieve()
in this case:
#!/usr/bin/env python
import os
import posixpath
import sys
import urllib
import urlparse
def url2filename(url):
"""Return basename corresponding to url.
>>> url2filename('http://example.com/path/to/file?opt=1')
'file'
"""
urlpath = urlparse.urlsplit(url).path # pylint: disable=E1103
basename = posixpath.basename(urllib.unquote(urlpath))
if os.path.basename(basename) != basename:
raise ValueError # refuse 'dir%5Cbasename.ext' on Windows
return basename
def reporthook(blocknum, blocksize, totalsize):
"""Report download progress on stderr."""
readsofar = blocknum * blocksize
if totalsize > 0:
percent = readsofar * 1e2 / totalsize
s = "\r%5.1f%% %*d / %d" % (
percent, len(str(totalsize)), readsofar, totalsize)
sys.stderr.write(s)
if readsofar >= totalsize: # near the end
sys.stderr.write("\n")
else: # total size is unknown
sys.stderr.write("read %d\n" % (readsofar,))
url = sys.argv[1]
filename = sys.argv[2] if len(sys.argv) > 2 else url2filename(url)
urllib.urlretrieve(url, filename, reporthook)
Example:
$ python download-file.py http://example.com/path/to/file
It downloads the url to a file. If the file is not given then it uses basename from the url.
You could also run wget
if you need it:
#!/usr/bin/env python
import sys
from subprocess import Popen, PIPE, STDOUT
def urlretrieve(url, filename=None, width=4):
destination = ["-O", filename] if filename is not None else []
p = Popen(["wget"] + destination + ["--progress=dot", url],
stdout=PIPE, stderr=STDOUT, bufsize=1) # line-buffered (out side)
for line in iter(p.stdout.readline, b''):
if b'%' in line: # grep "%"
line = line.replace(b'.', b'') # sed -u -e "s,\.,,g"
percents = line.split(None, 2)[1].decode() # awk $2
sys.stderr.write("\b"*width + percents.rjust(width))
p.communicate() # close stdout, wait for child's exit
print("\b"*width + "DONE")
url = sys.argv[1]
filename = sys.argv[2] if len(sys.argv) > 2 else None
urlretrieve(url, filename)
I have not noticed any buffering issues with this code.