0

I need some small help. I have data containing hospital names and Birth weights in kilograms. Now I do want to group and count weights below 1kg and above 1kg per individual hospitals . Here is how my data looks like

# intialise data of lists. 
data = {'Hospital':['Ruack', 'Ruack', 'Pens', 'Rick','Pens', 'Rick'],'Birth_weight':['1.0', '0.1', '2.1', '0.9', '2.19', '0.88']} 

# Create DataFrame 
dfy = pd.DataFrame(data) 

# Print the output. 
print(dfy)

Here is what I tried

#weight below 1kg
weight_count=pd.DataFrame(dfy.groupby('Hospital')['Birth_weight'] < 1.value_counts())
weight_count = weight_count.rename({'Birth_weight': 'weight_count'}, axis='columns')
weight_final = weight_count.reset_index()

#weight above 1kg
weight_count=pd.DataFrame(dfy.groupby('Hospital')['Birth_weight'] > 1.value_counts())
weight_count = weight_count.rename({'Birth_weight': 'weight_count'}, axis='columns')
weight_final = weight_count.reset_index()

end results Expected result is a table with weight counts of birth weights under 1kg and above 1kg grouped per hospital.

EXPECTED TABLE

# intialise data of lists. 
data = {'Hospital':['Ruack' , 'Rick','pens'],'< 1kg_count':['1', '2' , 'NAN'], '>1kg_count':['1','NAN' ,'2']} 

# Create DataFrame 
df_final = pd.DataFrame(data) 

# Print the output. 
print(df_final)
LivingstoneM
  • 1,088
  • 10
  • 28

3 Answers3

2

Use numpy.where for catagorize to new column and then GroupBy.size with Series.unstack:

#if encessary convert to floats
dfy['Birth_weight'] = dfy['Birth_weight'].astype(float)

dfy['group'] = np.where(dfy['Birth_weight'] < 1,'< 1kg_count','>1kg_count')
df = dfy.groupby(['Hospital', 'group']).size().unstack().reset_index()
print (df)
group Hospital  < 1kg_count  >1kg_count
0         Pens          NaN         2.0
1         Rick          2.0         NaN
2        Ruack          1.0         1.0

Another idea with DataFrame.pivot_table:

dfy['Birth_weight'] = dfy['Birth_weight'].astype(float)

g = np.where(dfy['Birth_weight'] < 1,'< 1kg_count','>1kg_count')
df = dfy.pivot_table(index='Hospital', columns=g, aggfunc='size').reset_index()
print (df)
  Hospital  < 1kg_count  >1kg_count
0     Pens          NaN         2.0
1     Rick          2.0         NaN
2    Ruack          1.0         1.0

EDIT: If want binning of column use cut:

dfy['Birth_weight'] = dfy['Birth_weight'].astype(float)

bins = np.arange(0, 5.5, 0.5)
labels = ['{}-{}kg_count'.format(i, j) for i, j in zip(bins[:-1], bins[1:])] 
#print (bins)
#print (labels)

g = pd.cut(dfy['Birth_weight'], bins=bins, labels=labels)
df = dfy.pivot_table(index='Hospital', columns=g, aggfunc='size')
print (df)
Birth_weight  0.0-0.5kg_count  0.5-1.0kg_count  2.0-2.5kg_count
Hospital                                                       
Pens                      NaN              NaN              2.0
Rick                      NaN              2.0              NaN
Ruack                     1.0              1.0              NaN
jezrael
  • 822,522
  • 95
  • 1,334
  • 1,252
0

Are you looking for something like this?

a=(dfy['Birth_weight'].astype(float)<1).map({True: 'Less than 1kg', False: 'More than 1kg'})
dfy.groupby(['Hospital',a])['Birth_weight'].count().reset_index(name='Count')

Output

Hospital    Birth_weight    Count
0   Pens    More than 1kg   2
1   Rick    Less than 1kg   2
2   Ruack   Less than 1kg   1
3   Ruack   More than 1kg   1
moys
  • 7,747
  • 2
  • 11
  • 42
0
import pandas as pd
import numpy as np

# intialise data of lists. 
data = {'Hospital':['Ruack', 'Ruack', 'Pens', 'Rick','Pens', 'Rick'],'Birth_weight': 
                   ['1.0', '0.1', '2.1', '0.9', '2.19', '0.88']} 

# Create DataFrame

dfy = pd.DataFrame(data) 
dfy['Birth_weight'] = dfy['Birth_weight'].astype(float)
df1 = dfy.groupby(['Hospital','Birth_weight'])
df1.filter(lambda x: x['Birth_weight']>1)
df1.filter(lambda x: x['Birth_weight']<1)