try this
import pandas as pd
table = {
'a, en':[1,0,0],
'a, ha':[0,1,0],
'a, padam':[0,0,1],
'aa, aala' :[1,0,0],
'aaa, accountinte':[0,1,0],
'aaaa,adhamanaya':[0,0,1],
'aaab,adhamanaya':[0,0,1]
}
tf1_bigram = pd.DataFrame(table)
table = {0:['aa'], 1:['aaa'], 2:['aaaa'], 3:['aaan'], 4:['aaanu'], 5:['aada'], 6:['aadhyam']}
tf_words = pd.DataFrame(table)
list_tf_words = tf_words.values.tolist()
print(tf1_bigram)
print(f'\n\n-------------BREAK-----------\n\n')
def func(x):
for y in list_tf_words[0]:
if x.name.find(y) != -1:
return x*0.5
else:
pass
return x
tf1_bigram = tf1_bigram.apply(func, axis = 0)
print(tf1_bigram)
OUTUPUT
a, en a, ha a, padam ... aaa, accountinte aaaa,adhamanaya aaab,adhamanaya
0 1 0 0 ... 0 0 0
1 0 1 0 ... 1 0 0
2 0 0 1 ... 0 1 1
[3 rows x 7 columns]
-------------BREAK-----------
a, en a, ha a, padam ... aaa, accountinte aaaa,adhamanaya aaab,adhamanaya
0 1 0 0 ... 0.0 0.0 0.0
1 0 1 0 ... 0.5 0.0 0.0
2 0 0 1 ... 0.0 0.5 0.5
[3 rows x 7 columns]
If you want to multiply by 0.5 more than once, use this code below
import pandas as pd
table = {
'a, en':[1,0,0],
'a, ha':[0,1,0],
'a, padam':[0,0,1],
'aa, aala' :[1,0,0],
'aaa, aaanu, accountinte':[0,1,0],
'aaaa,adhamanaya':[0,0,1]
}
tf1_bigram = pd.DataFrame(table)
table = {0:['aa'], 1:['aaa'], 2:['aaaa'], 3:['aaan'], 4:['aaanu'], 5:['aada'], 6:['aadhyam']}
tf_words = pd.DataFrame(table)
list_tf_words = tf_words.values.tolist()
print(tf1_bigram)
print(f'\n\n-------------BREAK-----------\n\n')
def func(x):
for y in list_tf_words[0]:
if x.name.find(y) != -1:
x = x*0.5
else:
pass
return x
tf1_bigram = tf1_bigram.apply(func, axis = 0)
print(tf1_bigram)
OUTUPUT
a, en a, ha a, padam aa, aala aaa, aaanu, accountinte aaaa,adhamanaya
0 1 0 0 1 0 0
1 0 1 0 0 1 0
2 0 0 1 0 0 1
-------------BREAK-----------
a, en a, ha a, padam aa, aala aaa, aaanu, accountinte aaaa,adhamanaya
0 1 0 0 0.5 0.0000 0.000
1 0 1 0 0.0 0.0625 0.000
2 0 0 1 0.0 0.0000 0.125
try this, if you need compare exactly content the column with tf_words
import pandas as pd
table = {
'a, en':[1,0,0],
'a, ha':[0,1,0],
'a, padam':[0,0,1],
'aa, aala' :[1,0,0],
'aaa, accountinte':[0,1,0],
'aaaa,adhamanaya':[0,0,1],
'aaab,adhamanaya':[0,0,1]
}
tf1_bigram = pd.DataFrame(table)
table = {0:['a'], 1:['en'], 2:['aaaa'], 3:['aaan'], 4:['aaanu'], 5:['aada'], 6:['aadhyam']}
tf_words = pd.DataFrame(table)
list_tf_words = tf_words.values.tolist()
print(tf1_bigram)
print(f'\n\n-------------BREAK-----------\n\n')
def func(x):
temp = x.name.split(',')
for y in list_tf_words[0]:
if (temp[0].strip()) in list_tf_words[0] and (temp[1].strip()) in list_tf_words[0]: # change "and" condition case only one value need match with the list of tf_words
return x*0.5
else:
return x
tf1_bigram = tf1_bigram.apply(func, axis = 0)
print(tf1_bigram)
OUTUPUT
a, en a, ha a, padam ... aaa, accountinte aaaa,adhamanaya aaab,adhamanaya
0 1 0 0 ... 0 0 0
1 0 1 0 ... 1 0 0
2 0 0 1 ... 0 1 1
[3 rows x 7 columns]
-------------BREAK-----------
a, en a, ha a, padam ... aaa, accountinte aaaa,adhamanaya aaab,adhamanaya
0 0.5 0 0 ... 0 0 0
1 0.0 1 0 ... 1 0 0
2 0.0 0 1 ... 0 1 1
[3 rows x 7 columns]
Solution for Tuples:
import pandas as pd
table = {
('a', 'en'):(1,0,0),
('a', 'ha'):[0,1,0],
('a', 'padam'):[0,0,1],
('aa', 'aala') :[1,0,0],
('aaa', 'accountinte'):[0,1,0],
('aaaa','adhamanaya'):[0,0,1],
('aaab','adhamanaya'):[0,0,1]
}
tf1_bigram = pd.DataFrame(table)
table = {0:['a'], 1:['en'], 2:['aaaa'], 3:['aaan'], 4:['aaanu'], 5:['aada'], 6:['aadhyam']}
tf_words = pd.DataFrame(table)
list_tf_words = tf_words.values.tolist()
print(tf1_bigram)
print(f'\n\n-------------BREAK-----------\n\n')
def func(x):
temp = x.name
if (temp[0].strip()) in list_tf_words[0] and (temp[1].strip()) in list_tf_words[0]: # change "and" condition case only one value need match with the list of tf_words
return x*0.5
else:
return x
tf1_bigram = tf1_bigram.apply(func, axis = 0)
print(tf1_bigram)
OUTUPUT
a aa aaa aaaa aaab
en ha padam aala accountinte adhamanaya adhamanaya
0 1 0 0 1 0 0 0
1 0 1 0 0 1 0 0
2 0 0 1 0 0 1 1
-------------BREAK-----------
a aa aaa aaaa aaab
en ha padam aala accountinte adhamanaya adhamanaya
0 0.5 0 0 1 0 0 0
1 0.0 1 0 0 1 0 0
2 0.0 0 1 0 0 1 1