I have a list of of dictionaries (MyList), which have nested arrays as values in some cases. I am trying to iterate through the list and apply a recursive function (flat) on each dictionary to flatten them before appending the new flat dictionaries to another list.
My problem is that the resulting dictionaries seem to build upon the iteration before and retain the uncommon key value pairs from previously flattened dicts. I would like to have just original key value pairs for each dictionary in my output.
Any pointers would be greatly appreciated.
MyList = [
{
"altLabel_attr": [
"hA20",
"IMMU-106",
"Veltuzumab",
"VELTUZUMAB"
],
"chemblId_attr": "CHEMBL1743088",
"description_attr": "Veltuzumab (humanized mab)",
"hasExternalLink": False,
"internalDocument": True,
"isBiotherapeutic_attr": True,
"isImageMap": False,
"label": "VELTUZUMAB",
"label_attr": "VELTUZUMAB",
"prefLabel_attr": "VELTUZUMAB",
"score": 1,
"subType": "",
"subjectUris": [
"http://rdf.ebi.ac.uk/resource/chembl/molecule/CHEMBL1743088"
],
"substanceType_attr": "Antibody",
"type": "target"
},
{
"altLabel_attr": [
"PRONASE",
"Pronase"
],
"chemblId_attr": "CHEMBL2108476",
"hasExternalLink": False,
"internalDocument": True,
"isImageMap": False,
"label": "PRONASE",
"label_attr": "PRONASE",
"prefLabel_attr": "PRONASE",
"score": 1,
"subType": "",
"subjectUris": [
"http://rdf.ebi.ac.uk/resource/chembl/molecule/CHEMBL2108476"
],
"substanceType_attr": "Enzyme",
"type": "target"
},
{
"altLabel_attr": "MDX-1342",
"chemblId_attr": "CHEMBL2109337",
"hasExternalLink": False,
"internalDocument": True,
"isBiotherapeutic_attr": True,
"isImageMap": False,
"label": "MDX-1342",
"label_attr": "MDX-1342",
"prefLabel_attr": "MDX-1342",
"score": 1,
"subType": "",
"subjectUris": [
"http://rdf.ebi.ac.uk/resource/chembl/molecule/CHEMBL2109337"
],
"substanceType_attr": "Antibody",
"type": "target"
}
]
This is my flatten function:
def flat(structure, key="", path="", flattened={}):
if not isinstance(structure,(dict, list)):
flattened[((path + "_") if path else "") + key] = structure
elif isinstance(structure, list):
for i, item in enumerate(structure):
flat(item,str(i), "_".join(filter(None,[path,key])), flattened)
else:
for new_key, value in structure.iteritems():
flat(value, new_key, "*".join(filter(None,[path,key])), flattened)
return flattened
ListFlatDicts = []
for i in MyList:
i = dict(flat(i))
ListFlatDicts.append(i)
for i in ListFlatDicts:
print i
Current Output:
{'altLabel_attr_3': 'VELTUZUMAB', 'altLabel_attr_2': 'Veltuzumab', 'altLabel_attr_1': 'IMMU-106', 'isImageMap': 'false', 'subjectUris_0': 'CHEMBL1743088', 'subType': '', 'internalDocument': 'true', 'label_attr': 'VELTUZUMAB', 'prefLabel_attr': 'VELTUZUMAB', 'substanceType_attr': 'Antibody', 'isBiotherapeutic_attr': 'true', 'altLabel_attr_0': 'hA20', 'hasExternalLink': 'false', 'label': 'VELTUZUMAB', 'score': 1, 'description_attr': 'Veltuzumab (humanized mab)', 'chemblId_attr': 'CHEMBL1743088', 'type': 'target'}
{'altLabel_attr_3': 'VELTUZUMAB', 'altLabel_attr_2': 'Veltuzumab', 'altLabel_attr_1': 'Pronase', 'isImageMap': 'false', 'subjectUris_0': 'CHEMBL2108476', 'subType': '', 'internalDocument': 'true', 'label_attr': 'PRONASE', 'prefLabel_attr': 'PRONASE', 'substanceType_attr': 'Enzyme', 'isBiotherapeutic_attr': 'true', 'altLabel_attr_0': 'PRONASE', 'hasExternalLink': 'false', 'label': 'PRONASE', 'score': 1, 'description_attr': 'Veltuzumab (humanized mab)', 'chemblId_attr': 'CHEMBL2108476', 'type': 'target'}
{'altLabel_attr_3': 'VELTUZUMAB', 'altLabel_attr_2': 'Veltuzumab', 'altLabel_attr_1': 'Pronase', 'isImageMap': 'false', 'subjectUris_0': 'CHEMBL2109337', 'subType': '', 'internalDocument': 'true', 'label_attr': 'MDX-1342', 'prefLabel_attr': 'MDX-1342', 'substanceType_attr': 'Antibody', 'isBiotherapeutic_attr': 'true', 'altLabel_attr_0': 'PRONASE', 'hasExternalLink': 'false', 'label': 'MDX-1342', 'altLabel_attr': 'MDX-1342', 'score': 1, 'description_attr': 'Veltuzumab (humanized mab)', 'chemblId_attr': 'CHEMBL2109337', 'type': 'target'}
Desired Output:
{'altLabel_attr_3': 'VELTUZUMAB', 'altLabel_attr_2': 'Veltuzumab', 'altLabel_attr_1': 'IMMU-106', 'isImageMap': 'false', 'subjectUris_0': 'CHEMBL1743088', 'subType': '', 'internalDocument': 'true', 'label_attr': 'VELTUZUMAB', 'prefLabel_attr': 'VELTUZUMAB', 'substanceType_attr': 'Antibody', 'isBiotherapeutic_attr': 'true', 'altLabel_attr_0': 'hA20', 'hasExternalLink': 'false', 'label': 'VELTUZUMAB', 'score': 1, 'description_attr': 'Veltuzumab (humanized mab)', 'chemblId_attr': 'CHEMBL1743088', 'type': 'target'},
{'prefLabel_attr': 'PRONASE', 'substanceType_attr': 'Enzyme', 'altLabel_attr_1': 'Pronase', 'isImageMap': 'false', 'subjectUris_0': 'CHEMBL2108476', 'altLabel_attr_0': 'PRONASE', 'hasExternalLink': 'false', 'label': 'PRONASE', 'subType': '', 'score': 1, 'internalDocument': 'true', 'chemblId_attr': 'CHEMBL2108476', 'type': 'target', 'label_attr': 'PRONASE'}
{'prefLabel_attr': 'MDX-1342', 'substanceType_attr': 'Antibody', 'isImageMap': 'false', 'isBiotherapeutic_attr': 'true', 'hasExternalLink': 'false', 'label': 'MDX-1342', 'altLabel_attr': 'MDX-1342', 'subType': '', 'score': 1, 'subjectUris_0': 'CHEMBL2109337', 'internalDocument': 'true', 'chemblId_attr': 'CHEMBL2109337', 'type': 'target', 'label_attr': 'MDX-1342'}