This question has been probably asked several times here. I've been trying to make my nested JSON file flat and convert it to CSV, however the closest I could get was to list field names: MyCount, from, Mysize, Allhits, aggs
, but with no values:
Output.csv:
""
Mycount
from
Mysize
Allhits
aggs
I've been trying this code to convert JSON to CSV:
import json
import csv
def get_leaves(item, key=None):
if isinstance(item, dict):
leaves = {}
for i in item.keys():
leaves.update(get_leaves(item[i], i))
return leaves
elif isinstance(item, list):
leaves = {}
for i in item:
leaves.update(get_leaves(i, key))
return leaves
else:
return {key : item}
with open('path/to/my/file.json') as f_input:
json_data = json.load(f_input)
# Paresing all entries to get the complete fieldname list
fieldnames = set()
for entry in json_data:
fieldnames.update(get_leaves(entry).keys())
with open('/path/to/myoutput.csv', 'w', newline='') as f_output:
csv_output = csv.DictWriter(f_output, fieldnames=sorted(fieldnames))
csv_output.writeheader()
csv_output.writerows(get_leaves(entry) for entry in json_data)
The JSON structure is like this:
{"Mycount":538,
"from":0,
"Mysize":1000,
"Allhits":[{
"isVerified":true,
"backgroundColor":"FF720B",
"name":"yourShop",
"Id":"12345678",
"ActionItems":[{
"subtitle":"Click here to start",
"body":null,
"language":"de",
"title":"Start here",
"isDefault":true}],
"tintColor":"FFFFFF",
"shoppingHours":[{"hours":[{"day":["SUNDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["MONDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["SATURDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["FRIDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["THURSDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["WEDNESDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["TUESDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]}]}],
"LogoUrl":"https://url/to/my/logo.png",
"coverage":[{
"country":"*",
"language":"*",
"ratio":1}],
"shoppingHours2":[{"hours":[{"day":["SUNDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["MONDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["SATURDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["FRIDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["THURSDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["WEDNESDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["TUESDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]}]}],
"group":"shop_open",
"timeZone":"CET",
"phone":"+1234567890",
"modTime":1234567890,
"intId":"+123456789",
"Logo2Url":"https://link/to/my/logo.png"}],
"aggs":{}}
Would this be easy to accomplish using pandas module? I'm still learning python so I would appreciate any guidance with this. The minimum what I need from this json file is to extract id, intId, name, ratio
and values for this field names into CSV.
The desired output should be (alternatively it could have all field names and values and I would then be able to extract the fields I need from CSV directly):
id intId name ratio
12345678 123456789 yourShop 1
This is just version for one record, but my output file has to include rows for all IDs present in JSON file.
Thanks for your help in advance!
EDIT I've tried also the following:
import json
import csv
x = '/path/to/myrecords.json'
x = json.loads(x)
f.writerow(["name", "id", "intId", "ratio"])
f = csv.writer(open("/path/to/my/output.csv", "w", newline=''))
for x in x:
f.writerow([x["Allhits"]["name"],
x["Allhits"]["id"],
x["Allhits"]["ActionItems"]["intId"],
x["Allhits"]["ActionItems"]["ratio"]])
but received this error for x = json.loads(x)
step:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/myusername/anaconda3/lib/python3.6/json/__init__.py", line 354, in loads
return _default_decoder.decode(s)
File "/Users/myusername/anaconda3/lib/python3.6/json/decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Users/myusername/anaconda3/lib/python3.6/json/decoder.py", line 357, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)