I am trying to convert nested JSON to CSV using python. There can be multiple values for some attributes: like phone1, phone2,phone3 for a single individual.
I wrote a python code for this and its doing the job perfectly. But I am getting the values of multiple attributes in multiple columns but I want them in multiple rows,
like for example: my output: name phone1 phone2 phone3 xyx 98 34 56
required output:
name phone xyx 98 xyx 34 xyx 56
The code for this is:
import sys
import json
import csv
import io
import pandas as pd
##
# Convert to string keeping encoding in mind...
##
processed_data = []
def to_string(s):
try:
return str(s)
except:
# Change the encoding type if needed
return s.encode('utf-8')
def reduce_item(key, value):
global reduced_item
global res
# Reduction Condition 1
if type(value) is list:
i = 0
for sub_item in value:
reduce_item(key + '_' + to_string(i), sub_item)
i = i + 1
# Reduction Condition 2
elif type(value) is dict:
sub_keys = value.keys()
for sub_key in sub_keys:
reduce_item(key+'_'+to_string(sub_key), value[sub_key])
# Base Condition
else:
#if reduced_item.get(to_string(key)):
#reduced_item[to_string(key)] = to_string(value)
#processed_data.append(reduced_item)
#else:
reduced_item[to_string(key)]=to_string(value)
if __name__ == "__main__":
if len(sys.argv) != 4:
print("\nUsage: python json_to_csv.py <node> <json_in_file_path> <csv_out_file_path>\n")
else:
# Reading arguments
node = sys.argv[1]
json_file_path = sys.argv[2]
csv_file_path = sys.argv[3]
with io.open(json_file_path, 'r', encoding='utf-8-sig') as fp:
json_value = fp.read()
raw_data = json.loads(json_value)
try:
data_to_be_processed = raw_data[node]
except:
data_to_be_processed = raw_data
header = []
for item in data_to_be_processed:
reduced_item={}
reduce_item(node, item)
header += reduced_item.keys()
processed_data.append(reduced_item)
header = list(set(header))
header.sort()
with open(csv_file_path, 'w+') as f:
writer = csv.DictWriter(f, header,quoting=csv.QUOTE_ALL)
writer.writeheader()
for row in processed_data:
writer.writerow(row)
print("Just completed writing csv file with %d columns" % len(header))
I tried changing the code but was not able to achieve the desired result.It would be of great help if anyone can suggest the changes for this code
Thanks in advance