Python CSV multivalued columns to multiple rows

Question

I am trying to convert nested JSON to CSV using python. There can be multiple values for some attributes: like phone1, phone2,phone3 for a single individual.

I wrote a python code for this and its doing the job perfectly. But I am getting the values of multiple attributes in multiple columns but I want them in multiple rows,

like for example: my output: name phone1 phone2 phone3 xyx 98 34 56

required output:

name phone xyx 98 xyx 34 xyx 56

The code for this is:

 import sys
import json
import csv
import io
import pandas as pd


##
# Convert to string keeping encoding in mind...
##
processed_data = []
def to_string(s):
    try:
        return str(s)
    except:
        # Change the encoding type if needed
        return s.encode('utf-8')


def reduce_item(key, value):
    global reduced_item
    global res

    # Reduction Condition 1
    if type(value) is list:
        i = 0
        for sub_item in value:
            reduce_item(key + '_' + to_string(i), sub_item)
            i = i + 1


    # Reduction Condition 2
    elif type(value) is dict:
        sub_keys = value.keys()
        for sub_key in sub_keys:
            reduce_item(key+'_'+to_string(sub_key), value[sub_key])

    # Base Condition
    else:
        #if reduced_item.get(to_string(key)):
            #reduced_item[to_string(key)] = to_string(value)
            #processed_data.append(reduced_item)
        #else:
        reduced_item[to_string(key)]=to_string(value)


  if __name__ == "__main__":
    if len(sys.argv) != 4:
        print("\nUsage: python json_to_csv.py <node> <json_in_file_path> <csv_out_file_path>\n")
    else:
        # Reading arguments
        node = sys.argv[1]
        json_file_path = sys.argv[2]
        csv_file_path = sys.argv[3]

        with io.open(json_file_path, 'r', encoding='utf-8-sig') as fp:
            json_value = fp.read()
            raw_data = json.loads(json_value)

        try:
            data_to_be_processed = raw_data[node]
        except:
            data_to_be_processed = raw_data

        header = []
        for item in data_to_be_processed:
            reduced_item={}
            reduce_item(node, item)
            header += reduced_item.keys()

            processed_data.append(reduced_item)

        header = list(set(header))
        header.sort()

        with open(csv_file_path, 'w+') as f:
            writer = csv.DictWriter(f, header,quoting=csv.QUOTE_ALL)
            writer.writeheader()
            for row in processed_data:
                writer.writerow(row)

        print("Just completed writing csv file with %d columns" % len(header))

I tried changing the code but was not able to achieve the desired result.It would be of great help if anyone can suggest the changes for this code

Thanks in advance

Did you try using [pandas](https://pypi.org/project/pandas/)? check out [this question](https://stackoverflow.com/questions/40588852) — M.A. Heshmat Khah, Jun 11 '21 at 11:34
In that example also we are getting data across multiple columns but we want it across multiple rows. — harry_12, Jun 13 '21 at 13:32

Python CSV multivalued columns to multiple rows

0 Answers0