1

Please help me here in reading the below json and convert it to dataframe. This contains multiple json objects and I want to store each array as individual dataframe.

with open(r"C:\filelocation\file.json","rb") as read_file: data = json.load(read_file)

The above code gives me one complete dataframe , I need to store each array as an dataframe. Below is the json which i am trying to convert to dataframe

{ "q1": "a1", "q2": "a2", "q3": "a3", "q4": "a4", "q5": "a5", "q6": "a6", "Details": [{ "q7": "a7", "q8": "a8", "q9": "a9", "q10": "a10", "q11": [] }, { "q7": "a11", "q8": "a12", "q9": "a13", "q10": "a14", "q11": [] }, { "q7": "a15", "q8": "a16", "q9": "a17", "q10": "a18", "q11": [] }, { "q7": "a19", "q8": "a20", "q9": "a21", "q10": "a22", "q11": [] }] }

Coder
  • 39
  • 7

2 Answers2

1

You could try this approach.

import pandas as pd
import flatten_json 
from flatten_json import flatten

d = { "q1": "a1", "q2": "a2", "q3": "a3", "q4": "a4", "q5": "a5", "q6": "a6", "Details": [{ "q7": "a7", "q8": "a8", "q9": "a9", "q10": "a10", "q11": [] }, { "q7": "a11", "q8": "a12", "q9": "a13", "q10": "a14", "q11": [] }, { "q7": "a15", "q8": "a16", "q9": "a17", "q10": "a18", "q11": [] }, { "q7": "a19", "q8": "a20", "q9": "a21", "q10": "a22", "q11": [] }] }

create a df first

df = pd.json_normalize(d)

which gives:

q1  q2  q3  q4  q5  q6                                            Details
0  a1  a2  a3  a4  a5  a6  [{'q7': 'a7', 'q8': 'a8', 'q9': 'a9', 'q10': '...

and define the following function

def flatten_nested_json_df(df):
    df = df.reset_index()
    s = (df.applymap(type) == list).all()
    list_columns = s[s].index.tolist()
    
    s = (df.applymap(type) == dict).all()
    dict_columns = s[s].index.tolist()

    
    while len(list_columns) > 0 or len(dict_columns) > 0:
        new_columns = []

        for col in dict_columns:
            horiz_exploded = pd.json_normalize(df[col]).add_prefix(f'{col}.')
            horiz_exploded.index = df.index
            df = pd.concat([df, horiz_exploded], axis=1).drop(columns=[col])
            new_columns.extend(horiz_exploded.columns) # inplace

        for col in list_columns:
            
            df = df.drop(columns=[col]).join(df[col].explode().to_frame())
            new_columns.append(col)

        s = (df[new_columns].applymap(type) == list).all()
        list_columns = s[s].index.tolist()

        s = (df[new_columns].applymap(type) == dict).all()
        dict_columns = s[s].index.tolist()
    return df

Finally, flatten the df:

print(flatten_nested_json_df(df))

which returns:

 index  q1  q2  q3  q4  q5  q6 Details.q7 Details.q8 Details.q9 Details.q10  \
0      0  a1  a2  a3  a4  a5  a6         a7         a8         a9         a10   
0      0  a1  a2  a3  a4  a5  a6         a7         a8         a9         a10   
0      0  a1  a2  a3  a4  a5  a6         a7         a8         a9         a10   
0      0  a1  a2  a3  a4  a5  a6         a7         a8         a9         a10   
0      0  a1  a2  a3  a4  a5  a6        a11        a12        a13         a14   
0      0  a1  a2  a3  a4  a5  a6        a11        a12        a13         a14   
0      0  a1  a2  a3  a4  a5  a6        a11        a12        a13         a14   
0      0  a1  a2  a3  a4  a5  a6        a11        a12        a13         a14   
0      0  a1  a2  a3  a4  a5  a6        a15        a16        a17         a18   
0      0  a1  a2  a3  a4  a5  a6        a15        a16        a17         a18   
0      0  a1  a2  a3  a4  a5  a6        a15        a16        a17         a18   
0      0  a1  a2  a3  a4  a5  a6        a15        a16        a17         a18   
0      0  a1  a2  a3  a4  a5  a6        a19        a20        a21         a22   
0      0  a1  a2  a3  a4  a5  a6        a19        a20        a21         a22   
0      0  a1  a2  a3  a4  a5  a6        a19        a20        a21         a22   
0      0  a1  a2  a3  a4  a5  a6        a19        a20        a21         a22   

  Details.q11  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
0         NaN  
1

IIUC, you can try:

df = pd.json_normalize(
    json_data, meta=['q1', 'q2', 'q3', 'q4', 'q5', 'q6'], record_path='Details')

Nk03
  • 14,699
  • 2
  • 8
  • 22