I think here is main problem read csv
to DataFrame
, for parsing I use yaml:
import pandas as pd
from pandas.compat import StringIO
import yaml, csv
temp=u""""NAME, FOOD
John", {"Apple":10, "Banana":10, "Mango":5}
"Jack", {"Apple":5, "Mango":10}
"Jill", {"Apple":7, "Banana":15}"""
#after testing replace 'StringIO(temp)' to 'filename.csv'
df = pd.read_csv(StringIO(temp),
header=None,
sep='{',
quoting =csv.QUOTE_NONE,
skiprows=1,
names=['a','b'])
df['b'] = ('{' + df['b']).apply(yaml.load)
df['a'] = df['a'].str.strip(', "')
print (df)
a b
0 John {'Apple': 10, 'Mango': 5, 'Banana': 10}
1 Jack {'Apple': 5, 'Mango': 10}
2 Jill {'Apple': 7, 'Banana': 15}
print (type(df.loc[0,'b']))
<class 'dict'>
Then create new DataFrame
by constructor and get idxmax
and max
values per rows:
df1 = pd.DataFrame(df['b'].values.tolist())
df['b'] = df1.idxmax(1)
df['c'] = df1.max(1).astype(int)
print (df)
a b c
0 John Apple 10
1 Jack Mango 10
2 Jill Banana 15
EDIT:
Maybe also works json
parser:
import json, csv
temp=u""""NAME, FOOD
John", {"Apple":10, "Banana":10, "Mango":5}
"Jack", {"Apple":5, "Mango":10}
"Jill", {"Apple":7, "Banana":15}"""
#after testing replace 'StringIO(temp)' to 'filename.csv'
df = pd.read_csv(StringIO(temp),
header=None,
sep='{',
quoting =csv.QUOTE_NONE,
skiprows=1,
names=['a','b'])
df['b'] = ('{' + df['b']).apply(json.loads)
df['a'] = df['a'].str.strip(', "')
print (df)
a b
0 John {'Mango': 5, 'Apple': 10, 'Banana': 10}
1 Jack {'Mango': 10, 'Apple': 5}
2 Jill {'Apple': 7, 'Banana': 15}
print (type(df.loc[0,'b']))
<class 'dict'>