I have a dataset of medical insurance variables, and am interested in understanding how the proportion of smokers ('yes', 'no') differ between regions ('northwest', 'northeast', 'southwest', 'southeast').
I have used a for loop to iterate over each instance of smoker and non-smoker for each region, adding to a smoker/non-smoker variable for each region respectively. I then used each of these variables to compute the proportion of smokers and non-smokers for each region. However, the code feels seriously cumbersome. How can I make this code more efficient? This is my second week using python, so I am hoping someone can teach me some useful tricks to aid the learning process.
Here is the code I have, it works but is super inefficient:
smoker_region = list(zip(smoker_status, region))
def smoker_region_diff(smoker_region):
northwest_smokers = 0
northeast_smokers = 0
southwest_smokers = 0
southeast_smokers = 0
northwest_non_smokers = 0
northeast_non_smokers = 0
southwest_non_smokers = 0
southeast_non_smokers = 0
for smoker in smoker_region:
if smoker[0] == 'yes' and smoker[1] == 'northwest':
northwest_smokers += 1
elif smoker[0] == 'yes' and smoker[1] == 'northeast':
northeast_smokers += 1
elif smoker[0] == 'yes' and smoker[1] == 'southwest':
southwest_smokers += 1
elif smoker[0] == 'yes' and smoker[1] == 'southeast':
southeast_smokers += 1
elif smoker[0] == 'no' and smoker[1] == 'northwest':
northwest_non_smokers += 1
elif smoker[0] == 'no' and smoker[1] == 'northeast':
northeast_non_smokers += 1
elif smoker[0] == 'no' and smoker[1] == 'southwest':
southwest_non_smokers += 1
elif smoker[0] == 'no' and smoker[1] == 'southeast':
southeast_non_smokers += 1
prop_smokers_northwest = northwest_smokers / len(smoker_status)
prop_smokers_northeast = northeast_smokers / len(smoker_status)
prop_smokers_southwest = southwest_smokers / len(smoker_status)
prop_smokers_southeast = southeast_smokers / len(smoker_status)
prop_non_smokers_northwest = northwest_non_smokers / len(smoker_status)
prop_non_smokers_northeast = northeast_non_smokers / len(smoker_status)
prop_non_smokers_southwest = southwest_non_smokers / len(smoker_status)
prop_non_smokers_southeast = northwest_non_smokers / len(smoker_status)
print(f'Proportion of smokers in the northwest:{prop_smokers_northwest}%')
print(f'Proportion of smokers in the northeast:{prop_smokers_northeast}%')
print(f'Proportion of smokers in the southwest:{prop_smokers_southwest}%')
print(f'Proportion of smokers in the southeast:{prop_smokers_southeast}%')
print(f'Proportion of non-smokers in the northwest:{prop_non_smokers_northwest}%')
print(f'Proportion of non-smokers in the northeast:{prop_non_smokers_northeast}%')
print(f'Proportion of non-smokers in the southwest:{prop_non_smokers_southwest}%')
print(f'Proportion of non-smokers in the southeast:{prop_non_smokers_southeast}%')
smoker_region_diff(smoker_region)