I have about 300.000 rows as below, but what I need is only id and email address. Dataframe like this:
d = {'vid': [1201,1202], 'col2': [[{'vid': 1201, 'saved-at-timestamp': 1638824550030, 'deleted-changed-timestamp': 0,
'identities': [{'type': 'EMAIL', 'value': 'abc@gmaill.com', 'timestamp': 1548608578090, 'is-primary': True},
{'type': 'LEAD_GUID', 'value': '69c4f6ec-e0e9-4632-8d16-cbc204a57b22', 'timestamp': 1548608578106}]},
{'vid': 314479851, 'saved-at-timestamp': 1638824550030, 'deleted-changed-timestamp': 0, 'identities': []},
{'vid': 183374504, 'saved-at-timestamp': 1638824550030, 'deleted-changed-timestamp': 0, 'identities': []},
{'vid': 17543251, 'saved-at-timestamp': 1638824550030, 'deleted-changed-timestamp': 0, 'identities': []},
{'vid': 99700201, 'saved-at-timestamp': 1638824550030, 'deleted-changed-timestamp': 0, 'identities': []},
{'vid': 65375052, 'saved-at-timestamp': 1638824550030, 'deleted-changed-timestamp': 0, 'identities': []},
{'vid': 17525601, 'saved-at-timestamp': 1638824550030, 'deleted-changed-timestamp': 0, 'identities': []},
{'vid': 238128701, 'saved-at-timestamp': 1638824550030, 'deleted-changed-timestamp': 0, 'identities': []}],
[{'vid': 1202, 'saved-at-timestamp': 1548608578109, 'deleted-changed-timestamp': 0,
'identities': [{'type': 'EMAIL', 'value': 'xyz@gmaill.com', 'timestamp': 1548608578088, 'is-primary': True},
{'type': 'LEAD_GUID', 'value': 'fe6c2628-b1db-47c5-91f6-258e79ea58f0', 'timestamp': 1548608578106}]}]]}
df=pd.DataFrame(d)
df
vid col2
1201 [{'vid': 1201, 'saved-at-timestamp': 1638824550030........
1202 [{'vid': 1202, 'saved-at-timestamp': 1548608578109......
expected output (only two fields but for all rows):
vid email
1201 abc@gmaill.com
1202 xyz@gmaill.com
.. ..