I wrote a very fast version of the transform to get these results. You can do the np.ushort inside the generator as well, and it's still fast but much faster outside:
import time
df = pd.DataFrame(
np.random.randn(8, 4**7),
index=[np.array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']),
np.array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'])])
start = time.time()
df.loc[:,] = np.ushort(df)
df = df.transform(lambda x: [ i if i> 10 else namednumber2numbername[x.name[1]][i] for i in x], axis=1)
end = time.time()
print(end - start)
# 1.150895118713379
Here's the time's on the original:
df = pd.DataFrame( np.random.randn(8, 4),
index=[np.array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']),
np.array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'])])
start = time.time()
df.loc[:,] = np.ushort(df)
df = df.transform(lambda x: [ i if i> 10 else namednumber2numbername[x.name[1]][i] for i in x], axis=1)
end = time.time()
print(end - start)
# 0.005067110061645508
In [453]: df
Out[453]:
0 1 2 3
bar one zero zero one zero
two i i i i
baz one zero zero zero zero
two i i ii i
foo one 65535 zero 65535 zero
two i i i i
qux one zero zero zero zero
two i i i ii
I got it to a one liner:
df.transform(lambda x: [ np.ushort(value) if np.ushort(value) > 10 else namednumber2numbername[pos[1]][np.ushort(value)] for pos, value in x.items()])
0 1 2 3
bar one zero zero zero zero
two i i ii i
baz one 65534 zero 65535 zero
two ii i 65535 i
foo one zero zero zero zero
two ii i i ii
qux one 65535 zero zero zero
two i i i i
Ok a version without .items():
def what(x):
if type(x[0]) == np.float64:
if np.ushort(x[0])>10:
return np.ushort(x[0])
else:
return(namednumber2numbername[x.index[0][1]][np.ushort(x[0])])
df.groupby(level=[0,1]).transform(what)
0 1 2 3
bar one zero one zero zero
two i ii 65535 i
baz one zero zero 65535 zero
two i i i i
foo one zero one zero zero
two i i i i
qux one two zero zero 65534
two i i i ii
and one liner!!!! no .items per your request! We groupby Levels 0 and 1 and then perform the calculations to determine the values::
df.groupby(level=[0,1]).transform(lambda x: np.ushort(x[0]) if type(x[0]) == np.float64 and np.ushort(x[0]) >10 else namednumber2numbername[x.index[0][1]][np.ushort(x[0])])
0 1 2 3
bar one zero one zero zero
two i ii 65535 i
baz one zero zero 65535 zero
two i i i i
foo one zero one zero zero
two i i i i
qux one two zero zero 65534
two i i i ii
To get the other values i did this:
df.transform(lambda x: [ str(x.name[0]) + '_' + str(x.name[1]) + '_' + str( pos)+ '_' +str(value) for pos,value in x.items()])
print('Transformed DataFrame:\n',
df.transform(what), sep='')
Transformed DataFrame:
α ... ω ε
f a b c ... b c j
one α_a_one_79.96465755359696 α_b_one_31.32938096131651 α_c_one_2.61444370203201 ... ω_b_one_35.7457972161041 ω_c_one_40.224465043054195 ε_j_one_43.527184108357496
two α_a_two_42.66244395377804 α_b_two_65.92020941618344 α_c_two_77.26467264185487 ... ω_b_two_40.91908469505522 ω_c_two_50.395561828234555 ε_j_two_71.67418483119914
one α_a_one_47.9769845681328 α_b_one_38.90671671550259 α_c_one_67.13601594352508 ... ω_b_one_23.23799084164898 ω_c_one_63.551178212994465 ε_j_one_16.975582723809303
Here's one without .items:
df.transform(lambda x: ['_'.join((x.name[0], x.name[1], x.index[0], str(i) if type(i) == float else 0)) for i in list(x)])
output
α ... ω ε
f a b c ... b c j
one α_a_one_79.96465755359696 α_b_one_31.32938096131651 α_c_one_2.61444370203201 ... ω_b_one_35.7457972161041 ω_c_one_40.224465043054195 ε_j_one_43.527184108357496
two α_a_two_42.66244395377804 α_b_two_65.92020941618344 α_c_two_77.26467264185487 ... ω_b_two_40.91908469505522 ω_c_two_50.395561828234555 ε_j_two_71.67418483119914
one α_a_one_47.9769845681328 α_b_one_38.90671671550259 α_c_one_67.13601594352508 ... ω_b_one_23.23799084164898 ω_c_one_63.551178212994465 ε_j_one_16.975582723809303
I did it also with no groupings:
df.T.apply(lambda x: x.name[0] + '_'+ x.name[1] + '_' + df.T.eq(x).columns + '_' + x.astype(str) , axis=1).T
or even better and most simple:
df.T.apply(lambda x: x.name[0] + '_'+ x.name[1] + '_' + x.index + '_' + x.astype(str) , axis=1).T
or
df.T.transform(lambda x: x.name[0] + '_'+ x.name[1] + '_' + x.index + '_' + x.astype(str) , axis=1).T
or with no .T:
df.transform(lambda x: x.index[0][0] + '_'+ x.index[0][1] + '_' + x.name + '_' + x.astype(str) , axis=1)
α ... ω ε
f a b c ... b c j
one α_a_one_79.96465755359696 α_b_one_31.32938096131651 α_c_one_2.61444370203201 ... ω_b_one_35.7457972161041 ω_c_one_40.224465043054195 ε_j_one_43.527184108357496
two α_a_two_42.66244395377804 α_b_two_65.92020941618344 α_c_two_77.26467264185487 ... ω_b_two_40.91908469505522 ω_c_two_50.395561828234555 ε_j_two_71.67418483119914
one α_a_one_47.9769845681328 α_b_one_38.90671671550259 α_c_one_67.13601594352508 ... ω_b_one_23.23799084164898 ω_c_one_63.551178212994465 ε_j_one_16.975582723809303