i have 30 gzip files needed to be de-serialized .i used following code as de-serializing code :
def deserialize(f):
retval = {}
while True:
content = f.read(struct.calcsize('L'))
if not content: break
k_len = struct.unpack('L', content)[0]
k_bstr = f.read(k_len)
k = k_bstr.decode('utf-8')
v_len = struct.unpack('L', f.read(struct.calcsize('L')))[0]
v_bytes = os.io.BytesIO(f.read(v_len))
v = numpy.load(v_bytes, allow_pickle=True)
retval[k] = v.item()
return retval
for i in range(0,26):
with gzip.open('Files/company'+str(i)+'.zip','rb') as f:
curdic1 = deserialize(f)
n = 0
for key in curdic1:
n = n + 1
company = curdic1[key]
if (n % 10000 == 1):
print(i, key)
but when it gives me following exception during deserializing:
k_bstr = f.read(k_len) File "/usr/lib/python3.5/gzip.py", line 274, in read return self._buffer.read(size) MemoryError
in addition, each file's size is less than 4 mb!. so what is the problem with this code?
Edited: sample file]
Edited this is serialize method if can help to clarify ...:
def serialize(f, content):
for k,v in content.items():
# write length of key, followed by key as string
k_bstr = k.encode('utf-8')
f.write(struct.pack('L', len(k_bstr)))
f.write(k_bstr)
# write length of value, followed by value in numpy.save format
memfile = io.BytesIO()
numpy.save(memfile, v)
f.write(struct.pack('L', memfile.tell()))
f.write(memfile.getvalue())