0

I have a dataframe with the following schema:

Schema of the dataframe

I try to fetch all the data from this dataframe. I use df.collect() method to iterate through the entire dataframe and then pulling the values out of the columns one-by-one. But it seems like its not iterating through the entire tree and just pulling through the initial parent row only.

def parseCol(landing_df,data):
  for i in landing_df.collect():
    parent_id = i["parent_id"]
    shared = "null"
    if (len(i["children"]))>1:
#       print(len(i["children"]))
#     if(len(i["children"])>1):

      data.append([i["project_id"],i["id"],i["name"],i["order"],i["pid"],i["created_date"],i["last_modified_date"], str(parent_id),i["description"],i["recursive"],i["links"][0][0],str(shared)])
      for j in i["children"]:
        if(('shared') not in (i)):
          shared = 'null'
        else:
          shared = i['shared']
          
        if(('project_id') not in (j)):
          project_id = "null"  
        else:
          project_id = j['project_id']
        data.append([project_id,j["id"],j["name"],j["order"],j["pid"],j["created_date"],j["last_modified_date"],str(j["parent_id"]),j["description"],j["recursive"],j["links"][0][0],str(shared)])
          
#           print(-1)
        
    elif(len(i["children"])==0):
             
      data.append([i["project_id"],i["id"],i["name"],i["order"],i["pid"],i["created_date"],i["last_modified_date"],"null",i["description"],i["recursive"],i["links"][0][0],str(shared)])
  return data  

Can someone suggest some better way to do this.

Vincent Doba
  • 4,343
  • 3
  • 22
  • 42
tathagat
  • 9
  • 4

0 Answers0