I want do a mean of my variable in a DataFrame where I have grouped the element by column 'A'. The problem is that when I print the result the output is the mean only for the first variable, column, why do this ?
Code;
import pandas as pd
file = open('C:/Users/Andre/Desktop/Python/introduction-datascience-python-book-master/files/ch03/adult.data', 'r')
def chr_int(a):
if a.isdigit(): return int(a)
else:
return a
data = []
for line in file:
data1 = line.split(',')
if len(data1) == 15:
data.append([chr_int(data1[0]), data1[1], chr_int(data1[2]), data1[3], chr_int(data1[4]), data1[5], data1[6],
data1[7], data1[8], data1[9], chr_int(data1[10]), chr_int(data1[11]),
chr_int(data1[12]), data1[13], data1[14]])
df = pd.DataFrame(data)
df.columns = [ 'age', 'type_employer', 'fnlwgt', 'education',
'education_num', 'marital', 'occupation',
'relationship', 'race', 'sex', 'capital_gain',
'capital_loss', 'hr_per_week', 'country', 'income' ]
#print(df)
counts = df.groupby('country').mean()
print(counts.head())
OUTPUT;
age
country
? 38.725557
Cambodia 37.789474
Canada 42.545455
China 42.533333
Columbia 39.711864