I am trying to split multiple columns into multiple rows in few lines rather than writing a big 'def function' for it.
I have 2 columns that need splitting by ;
. I am trying to follow this: Split cell into multiple rows in pandas dataframe
from pandas import DataFrame
import numpy as np
from itertools import chain
import pandas as pd
a = DataFrame([{'var1': 'a;b;c', 'var2': 1,'var3':'apples;mango'},
{'var1': 'd;e;f', 'var2': 2,'var3':'kiwi;pineapple'},
{'var1': 'g;e;a', 'var2': 15,'var3':'pinneapple'},
{'var1': 'm', 'var2': 12,'var3':'orange'}])
a
# return list from series of comma-separated strings
def chainer(s):
return list(chain.from_iterable(s.str.split(';')))
# calculate lengths of splits
lens = a['var1'].str.split(';').map(len)
# create new dataframe, repeating or chaining as appropriate
new_df = pd.DataFrame({'var1': chainer(a['var1']),
'var2': np.repeat(a['var2'], lens),
'var3': chainer(a['var3'])
})
print(new_df)
#ERROR: ValueError: arrays must all be same length