0

I am trying to convert nested JSON to CSV using python. There can be multiple values for some attributes: like phone1, phone2,phone3 for a single individual.

I wrote a python code for this and its doing the job perfectly. But I am getting the values of multiple attributes in multiple columns but I want them in multiple rows,

like for example: my output: name phone1 phone2 phone3 xyx 98 34 56

required output:

name phone xyx 98 xyx 34 xyx 56

The code for this is:

 import sys
import json
import csv
import io
import pandas as pd


##
# Convert to string keeping encoding in mind...
##
processed_data = []
def to_string(s):
    try:
        return str(s)
    except:
        # Change the encoding type if needed
        return s.encode('utf-8')


def reduce_item(key, value):
    global reduced_item
    global res

    # Reduction Condition 1
    if type(value) is list:
        i = 0
        for sub_item in value:
            reduce_item(key + '_' + to_string(i), sub_item)
            i = i + 1


    # Reduction Condition 2
    elif type(value) is dict:
        sub_keys = value.keys()
        for sub_key in sub_keys:
            reduce_item(key+'_'+to_string(sub_key), value[sub_key])

    # Base Condition
    else:
        #if reduced_item.get(to_string(key)):
            #reduced_item[to_string(key)] = to_string(value)
            #processed_data.append(reduced_item)
        #else:
        reduced_item[to_string(key)]=to_string(value)


  if __name__ == "__main__":
    if len(sys.argv) != 4:
        print("\nUsage: python json_to_csv.py <node> <json_in_file_path> <csv_out_file_path>\n")
    else:
        # Reading arguments
        node = sys.argv[1]
        json_file_path = sys.argv[2]
        csv_file_path = sys.argv[3]

        with io.open(json_file_path, 'r', encoding='utf-8-sig') as fp:
            json_value = fp.read()
            raw_data = json.loads(json_value)

        try:
            data_to_be_processed = raw_data[node]
        except:
            data_to_be_processed = raw_data

        header = []
        for item in data_to_be_processed:
            reduced_item={}
            reduce_item(node, item)
            header += reduced_item.keys()

            processed_data.append(reduced_item)

        header = list(set(header))
        header.sort()

        with open(csv_file_path, 'w+') as f:
            writer = csv.DictWriter(f, header,quoting=csv.QUOTE_ALL)
            writer.writeheader()
            for row in processed_data:
                writer.writerow(row)

        print("Just completed writing csv file with %d columns" % len(header))

I tried changing the code but was not able to achieve the desired result.It would be of great help if anyone can suggest the changes for this code

Thanks in advance

0 Answers0