You can use the version proposed by @Anshul Jindal, but there is also another alternative, which significantly differs in the output and you may find it useful if you have nan
s in your data.
import io
df = pd.DataFrame({'a': ['a', 'b', np.nan],
'b': [np.nan, 'e', 'f'],
'c': ['g', 'h', 'i'],
'd': ['j', np.nan, 'l']})
cols = ['a', 'b' ,'d']
# another approach, using temporary text buffer
with io.StringIO() as output:
df[cols].to_csv(output, sep=',', index=False, header=False)
output.seek(0)
df = df.assign(new_col=output.readlines())
df.new_col = df.new_col.str.strip()
# approach proposed earlier
df = df.assign(new_col_2 = df[cols].apply(
lambda x: ','.join(x.dropna().astype(str)),
axis=1
))
print(df)
a b c d new_col new_col_2
0 a NaN g j a,,j a,j
1 b e h NaN b,e, b,e
2 NaN f i l ,f,l f,l
Plus quite surprising timing of the approaches:
import io
import timeit
df = pd.DataFrame({'a': ['a', 'b', np.nan],
'b': [np.nan, 'e', 'f'],
'c': ['g', 'h', 'i'],
'd': ['j', np.nan, 'l']})
cols = ['a', 'b' ,'d']
def buffer_approach(df, cols_to_merge):
with io.StringIO() as output:
df[cols_to_merge].to_csv(output, sep=',', index=False, header=False)
output.seek(0)
df = df.assign(new_col=output.readlines())
df.new_col = df.new_col.str.strip()
return df
def pandas_approach(df, cols_to_merge):
df = df.assign(new_col = df[cols_to_merge].apply(
lambda x: ','.join(x.dropna().astype(str)),
axis=1
))
return df
print(timeit.repeat("buffer_approach(df, cols)", globals=globals(), repeat=5, number=1000))
print(timeit.repeat("pandas_approach(df, cols)", globals=globals(), repeat=5, number=1000))
[2.5745794447138906, 2.556944037321955, 2.5482078031636775, 2.2512022089213133, 2.0038619451224804]
[3.6452969149686396, 3.326099018100649, 3.5136850751005113, 3.9479835461825132, 3.4149401267059147]