I have a very large in size zipped file (1.5G), there are 500 sub folders inside the zipped file.And there are 5000 json file under each sub folder.
I would like to read the json to python dataframe and have the code & error like below. Could you pls suggest me how to fix it? Thanks.
with zipfile.ZipFile('20APIJSON.zip', 'r') as z:
for filename in z.namelist():
with z.open(filename) as f:
data = f.read()
json_file = json.loads(data)
error:
JSONDecodeError Traceback (most recent call last)
<ipython-input-6-e235c823f732> in <module>()
9 with z.open(filename) as f:
10 data = f.read()
---> 11 json_file = json.loads(data)
12 l.append([json_file['FullStudy']['Study']['ProtocolSection']['IdentificationModule']['NCTId'],
13 json_file['FullStudy']['Study']['ProtocolSection']['StatusModule']['OverallStatus'],
~\Anaconda3\lib\json\__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
352 parse_int is None and parse_float is None and
353 parse_constant is None and object_pairs_hook is None and not kw):
--> 354 return _default_decoder.decode(s)
355 if cls is None:
356 cls = JSONDecoder
~\Anaconda3\lib\json\decoder.py in decode(self, s, _w)
337
338 """
--> 339 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
340 end = _w(s, end).end()
341 if end != len(s):
~\Anaconda3\lib\json\decoder.py in raw_decode(self, s, idx)
355 obj, end = self.scan_once(s, idx)
356 except StopIteration as err:
--> 357 raise JSONDecodeError("Expecting value", s, err.value) from None
358 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)