Example of dictionary:
data_noisy = {'P1': {'age': 'eighty two', 'salary': '60196.0', 'suburb':
'Toorak', 'language': 'English'},
'P2': {'age': '49', 'salary': '-16945514.0', 'suburb': 'St.
Kilda', 'language': 'Chinese'},
'P3': {'age': '54', 'salary': '49775.0', 'suburb':
'Neverland', 'language': 'Italian'}}
Wanted output:
data_clean = {'P1': {'age': 'None', 'salary': '60196.0', 'suburb':
'Toorak', 'language': 'English'},
'P2': {'age': '49', 'salary': 'None', 'suburb': 'St.
Kilda', 'language': 'Chinese'},
'P3': {'age': '54', 'salary': '49775.0', 'suburb': 'None',
'language': 'Italian'}}
MAX_SALARY = 200000
VALID_SUBURBS = ["Richmond", "Southbank", "Fitzroy",
"Docklands", "St. Kilda", "Footscray",
"Hawthorn", "Parkville", "Toorak", "Brunswick",
"Kensington", "Flemington", "Frankston", "Dandenong",
"Caulfield", "Collingwood"]
def clean_data(data):
data_dict = {}
data_dict = data
for key, value in data.items():
for val in value.items():
age = value['age']
if not age.isdigit():
data_dict['age'] = 'None'
else:
data_dict['age'] = value['age']
salary = float(value['salary'])
if salary < 0 or salary > MAX_SALARY:
data_dict['salary'] = 'None'
else:
data_dict['salary'] = value['salary']
suburb = value['suburb']
if suburb not in VALID_SUBURBS:
data_dict['suburb'] = 'None'
else:
data_dict['suburb'] = value['suburb']
print(data_dict)
I'm wanting to not change the original dictionary so tried to copy it, then iterate to "clean" the data. Seems like I just get a RuntimeError: dictionary changed size during iteration.
Any assistance with syntax etc. with working with these nested dictionaries would be greatly appreciated.
Thanks.