I have following JSON
{
"FileResults": [
{
"FileName": "gtg.0.wav",
"FileUrl": null,
"Results": [
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": null,
"Words": [
{
"Word": "ask",
"Offset": 944400000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 90200000,
"Duration": 25600000,
"NBest": [
{
"Confidence": 0.9415368,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": ".",
"Sentiment": null,
"Words": [
{
"Word": "ask",
"Offset": 90500000,
"Duration": 3500000
},
{
"Word": "everybody",
"Offset": 94000000,
"Duration": 4400000
},
{
"Word": "to",
"Offset": 98400000,
"Duration": 1200000
},
{
"Word": "please",
"Offset": 99600000,
"Duration": 3000000
},
{
"Word": "take",
"Offset": 102600000,
"Duration": 2400000
},
{
"Word": "their",
"Offset": 105000000,
"Duration": 2400000
},
{
"Word": "seats",
"Offset": 107400000,
"Duration": 8200000
}
]
}
]
},
{
"Status": "Success",
"ChannelNumber": null,
"SpeakerId": null,
"Offset": 169400000,
"Duration": 157500000,
"NBest": [
{
"Confidence": 0.944001734,
"Lexical": "",
"ITN": "",
"MaskedITN": "",
"Display": "",
"Sentiment": null,
"Words": [
{
"Word": "welcome",
"Offset": 169700000,
"Duration": 4500000
},
{
"Word": "to",
"Offset": 174200000,
"Duration": 2600000
},
{
"Word": "the",
"Offset": 176800000,
"Duration": 8600000
},
{
"Word": "scheduled",
"Offset": 186500000,
"Duration": 7900000
},
{
"Word": "special",
"Offset": 194400000,
"Duration": 6000000
},
{
"Word": "budget",
"Offset": 200400000,
"Duration": 4400000
},
{
"Word": "hearings",
"Offset": 204800000,
"Duration": 6400000
},
{
"Word": "meeting",
"Offset": 211400000,
"Duration": 4800000
},
{
"Word": "of",
"Offset": 216200000,
"Duration": 1600000
},
{
"Word": "the",
"Offset": 217800000,
"Duration": 1300000
},
{
"Word": "los",
"Offset": 219100000,
"Duration": 2300000
},
{
"Word": "lm",
"Offset": 221400000,
"Duration": 3600000
},
{
"Word": "mk",
"Offset": 225000000,
"Duration": 5500000
},
{
"Word": "board",
"Offset": 231800000,
"Duration": 4600000
},
{
"Word": "of",
"Offset": 236400000,
"Duration": 1000000
},
{
"Word": "supervisors",
"Offset": 237400000,
"Duration": 9200000
},
{
"Word": "seems",
"Offset": 246600000,
"Duration": 3000000
},
{
"Word": "like",
"Offset": 249600000,
"Duration": 2400000
},
{
"Word": "we",
"Offset": 252000000,
"Duration": 1400000
},
{
"Word": "were",
"Offset": 253400000,
"Duration": 1600000
},
{
"Word": "just",
"Offset": 255000000,
"Duration": 3400000
},
{
"Word": "here",
"Offset": 258400000,
"Duration": 5500000
},
{
"Word": "but",
"Offset": 270200000,
"Duration": 4000000
},
{
"Word": "no",
"Offset": 274200000,
"Duration": 3000000
},
{
"Word": "it's",
"Offset": 277200000,
"Duration": 1600000
},
{
"Word": "wednesday",
"Offset": 278800000,
"Duration": 6700000
},
{
"Word": "may",
"Offset": 288600000,
"Duration": 3800000
},
{
"Word": "sixteenth",
"Offset": 292400000,
"Duration": 8800000
},
{
"Word": "full",
"Offset": 307200000,
"Duration": 4600000
},
{
"Word": "complement",
"Offset": 311800000,
"Duration": 6600000
},
{
"Word": "not",
"Offset": 318400000,
"Duration": 3000000
},
{
"Word": "quite",
"Offset": 321400000,
"Duration": 5300000
}
]
}
]
}
]
}
]
}
I would like to remove duplicates from the JSON only
For instance "Word": "ask" came twice; I would like to retain first occurrence of "Word": "ask" and remove second.
{
"Word": "welcome",
"Offset": 169700000,
"Duration": 4500000
},
I have tried various dedup techniques but nothing is helping
Here is my sample code:
import json
with open('example1.json') as json_data:
obj = json.load(json_data)
#attr = lambda x: x['hdfs:batchprocessing'][0]['application']['app_id']+x['hdfs:batchprocessing'][0]['application']['app_id']
el_set = set()
el_list = []
for el in obj:
if str(el) not in el_set:
el_set.add(str(el))
el_list.append(el)
open("updated_structure.json", "w").write(
json.dumps(el_list, sort_keys=True, indent=4, separators=(',', ': '))
)
JSON without any duplicate values for "Word"