Remove_stopwords

import pandas as pd
import nltk
nltk.download('stopwords')
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/aishwarya/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.





True
Sms_content=['hi,how are you','I am fine','myself aishwarya?']
df=pd.DataFrame(Sms_content,columns={'sms'})
df
sms
0 hi,how are you
1 I am fine
2 myself aishwarya?
stopwords=nltk.corpus.stopwords.words('english')
stopwords[:5]
['i', 'me', 'my', 'myself', 'we']
def remove_stopwords(text):
    clean_text=[word for word in text if word not in stopwords]
    return clean_text
df['clean_text'] = df['sms'].apply(lambda row : remove_stopwords(row))
df.head()
sms clean_text
0 hi,how are you [h, ,, h, w, , r, e, , u]
1 I am fine [I, , , f, n, e]
2 myself aishwarya? [e, l, f, , h, w, r, ?]