data = {'score': [91, 93, 72, 87, 86, 73, 68, 87, 78, 99, 95, 76, 84, 96, 76, 80, 83, 84, 73, 74],
'hours': [16, 6, 3, 1, 2, 3, 2, 5, 2, 5, 2, 3, 4, 3, 3, 3, 4, 3, 4, 4],
'prep': [3, 4, 0, 3, 4, 0, 1, 2, 1, 2, 3, 3, 3, 2, 2, 2, 3, 3, 2, 2],
'grade': [70, 88, 80, 83, 88, 84, 78, 94, 90, 93, 89, 82, 95, 94, 81, 93, 93, 90, 89, 89],
'school': [0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1]}
df = pd.DataFrame(data,columns=['score', 'hours', 'prep','grade',"school"])
df
In my data frame, each row represents a student. I would like to create a new column called 'Average Score Difference '. The values of this column will be populated by the difference between the average score for that individual's school and their own score, where the value 0 or 1 in the column 'school', represents the school the individual is in.
Any help would be greatly appreciated