I've read the base64 python docs and seen examples here on SO and elsewhere, but I'm still having a problem decoding base64 back to the original binary representation.
I'm not getting any exceptions, so I don't think there's a padding or character set issue. I just get a resulting binary file that's smaller than the original binary.
I'm including both the base64 encoding and decoding steps in case there's an issue with either or both steps.
The code must run with python 2.7.
Below are the scripts that reproduce the problem.
b64_encode.py
#!/usr/bin/env python2.7
#
# b64_encode.py - must run with python 2.7
# - must process data in chunks to limit memory consumption
# - base64 data must be JSON compatible, i.e.
# use base64 "modern" interface,
# not base64.encodestring() which contains linefeeds
#
import sys, base64
def write_base64_file_from_file(src_fname, b64_fname, chunk_size=8192):
with open(src_fname, 'rb') as fin, open(b64_fname, 'w') as fout:
while True:
bin_data = fin.read(chunk_size)
if not bin_data:
break
print 'bin %s data len: %d' % (type(bin_data), len(bin_data))
b64_data = base64.b64encode(bin_data)
print 'b64 %s data len: %d' % (type(b64_data), len(b64_data))
fout.write(b64_data)
if len(sys.argv) != 2:
print 'usage: %s <bin_fname>' % sys.argv[0]
sys.exit()
bin_fname = sys.argv[1]
b64_fname = bin_fname + '.b64'
write_base64_file_from_file(bin_fname, b64_fname)
b64_decode.py
#!/usr/bin/env python2.7
#
# b64_decode.py - must run with python 2.7
# - must process data in chunks to limit memory consumption
#
import os, sys, base64
def write_file_from_base64_file(b64_fname, dst_fname, chunk_size=8192):
with open(b64_fname, 'r') as fin, open(dst_fname, 'wb') as fout:
while True:
b64_data = fin.read(chunk_size)
if not b64_data:
break
print 'b64 %s data len: %d' % (type(b64_data), len(b64_data))
bin_data = base64.b64decode(b64_data)
print 'bin %s data len: %d' % (type(bin_data), len(bin_data))
fout.write(bin_data)
if len(sys.argv) != 2:
print 'usage: %s <b64_fname>' % sys.argv[0]
sys.exit()
b64_fname = sys.argv[1]
bin_ext = os.path.splitext(os.path.splitext(b64_fname)[0])[1]
bin_fname = os.path.splitext(b64_fname)[0] + bin_ext
write_file_from_base64_file(b64_fname, bin_fname)
For example, my output for a 19k image file is:
$ ./b64_encode.py img.jpg
bin <type 'str'> data len: 8192
b64 <type 'str'> data len: 10924
bin <type 'str'> data len: 8192
b64 <type 'str'> data len: 10924
bin <type 'str'> data len: 2842
b64 <type 'str'> data len: 3792
$ ./b64_decode.py img.jpg.b64
b64 <type 'str'> data len: 8192
bin <type 'str'> data len: 6144
b64 <type 'str'> data len: 8192
bin <type 'str'> data len: 2048
b64 <type 'str'> data len: 8192
bin <type 'str'> data len: 4097
b64 <type 'str'> data len: 1064
bin <type 'str'> data len: 796
$ ll
19226 Feb 5 14:24 img.jpg
25640 Mar 29 12:12 img.jpg.b64
13085 Mar 29 12:14 img.jpg.jpg