I have set of 10k json files that I am trying to parse into single dataframe.
I started first with parsing single file to make sure I am parsing it correctly for values i need:
import pandas as pd
import json
from pandas.io.json import json_normalize
file = open("C:/Test/datapush_event_2019-09-25T16-02-30.4Z.json")
data = json.load(file)
data
This worked perfectly however when i try to extend this on all files that i have just in different directory using for loop I am getting weird error:
import json
from pandas.io.json import json_normalize
import glob
data = pd.DataFrame()
for file in glob.glob('C:/Data2/*.json'):
with open(file) as json_file:
json_data = json.load(json_file)
df = json_normalize(json_data, record_path='events', meta=[['metadata', 'serial_number']])
data =data.append(df)
Error code:
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
<ipython-input-44-9feedda31786> in <module>
5 for file in glob.glob('C:/Data2/*.json'):
6 with open(file) as json_file:
----> 7 json_data = json.load(json_file)
8 df = json_normalize(json_data, record_path='events', meta=[['metadata', 'serial_number']])
9 data =data.append(df)
C:\Program Files (x86)\Python37-32\lib\json\__init__.py in load(fp, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
294 cls=cls, object_hook=object_hook,
295 parse_float=parse_float, parse_int=parse_int,
--> 296 parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
297
298
C:\Program Files (x86)\Python37-32\lib\json\__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
346 parse_int is None and parse_float is None and
347 parse_constant is None and object_pairs_hook is None and not kw):
--> 348 return _default_decoder.decode(s)
349 if cls is None:
350 cls = JSONDecoder
C:\Program Files (x86)\Python37-32\lib\json\decoder.py in decode(self, s, _w)
335
336 """
--> 337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
339 if end != len(s):
C:\Program Files (x86)\Python37-32\lib\json\decoder.py in raw_decode(self, s, idx)
353 obj, end = self.scan_once(s, idx)
354 except StopIteration as err:
--> 355 raise JSONDecodeError("Expecting value", s, err.value) from None
356 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
I am adding the sample of json file:
{
"metadata":{
"timezone":{
"location":"Atlanta",
"offset":120
},
"serial_number":"12345",
"device_type":"Test"
},
"from":"2019-09-25T16:02:30.400Z",
"events":[
{
"timestamp":"2019-09-25T16:02:30.400Z",
"type":"ZONE_EXIT",
"tracked_object":{
"id":53778,
"type":"PERSON",
"position":{
"x":-3878,
"y":-1606,
"type":"FOOT",
"coordinate_system":"REAL_WORLD_IN_MILLIMETER"
},
"person_data":{
"height":1687
}
},
"element":{
"id":"4bea9786-a840-4895-b5ea-4216753854d7",
"name":"Count Zone Left",
"type":"ZONE_GEOMETRY"
}
},
{
"timestamp":"2019-09-25T16:02:30.400Z",
"type":"ZONE_EXIT",
"tracked_object":{
"id":53778,
"type":"PERSON",
"position":{
"x":-3878,
"y":-1606,
"type":"FOOT",
"coordinate_system":"REAL_WORLD_IN_MILLIMETER"
},
"person_data":{
"height":1687
}
},
"element":{
"id":"e6311a10-5b0e-4aca-a97b-ad0e2f3827d3",
"name":"ZoneOfInterest-Count01",
"type":"ZONE_GEOMETRY"
}
},
{
"timestamp":"2019-09-25T16:02:30.400Z",
"type":"ZONE_EXIT",
"tracked_object":{
"id":53787,
"type":"PERSON",
"position":{
"x":-7124,
"y":-2329,
"type":"FOOT",
"coordinate_system":"REAL_WORLD_IN_MILLIMETER"
},
"person_data":{
"height":1978
}
},
"element":{
"id":"e6311a10-5b0e-4aca-a97b-ad0e2f3827d3",
"name":"ZoneOfInterest-Count01",
"type":"ZONE_GEOMETRY"
}
}
]
}
Any suggested how to resolve this? Or is there any better approach loading multiple json files into single dataframe in python?