how to apply on a column with many rows and not only on the first row? it works only for df[0] for example but not on all the rows. What can I do instead of split? and how to apply?
I currantly get a list of words of only the first cell (which is a long text). I need to get the list of all the rows
I get error:
AttributeError Traceback (most recent call last)
<ipython-input-400-30a976cb67ef> in <module>
73
74
---> 75 new_data=Clean_stop_words(df["Text"])
76
77 print(new_data, end=",")
<ipython-input-400-30a976cb67ef> in Clean_stop_words(data)
54 # for i in range(0:500):
55 # data=data.apply(str)
---> 56 data_split = data.split(' ')
57 #print(data_split)
58 # for word in data:
~\anaconda\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
5128 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5129 return self[name]
-> 5130 return object.__getattribute__(self, name)
5131
5132 def __setattr__(self, name: str, value) -> None:
AttributeError: 'Series' object has no attribute 'split'
def Clean_stop_words(data):
stemmer = PorterStemmer()
stop_words=stopwords.words('english')
new_data=[]
#print(data)
# for i in range(0:500):
# data=data.apply(str)
data_split = data.split(' ')
#print(data_split)
# for word in data:
# print(word)
for word in data_split:
np.char.lower(word)
#print(data_split)
word = re.sub('[^A-Za-z0-9]+', '', word)
for word in data_split:
if word not in stop_words:
word1=stemmer.stem(word)
#print(word1)
new_data.append(word1)
symbols = "!\"#$%&()*+-./:;<=>?@[\]^_`{|}~\n"
for i in symbols:
new_data= np.char.replace(new_data, i, ' ')
return new_data
new_data=Clean_stop_words(df["Text"])
#new_data=Clean_stop_words(df["Text"][0])
print(new_data, end=",")