import pandas as pd
import nltk
nltk.download('stopwords')
[nltk_data] Downloading package stopwords to
[nltk_data] /home/aishwarya/nltk_data...
[nltk_data] Unzipping corpora/stopwords.zip.
True
Sms_content=['hi,how are you','I am fine','myself aishwarya?']
df=pd.DataFrame(Sms_content,columns={'sms'})
df
|
sms |
0 |
hi,how are you |
1 |
I am fine |
2 |
myself aishwarya? |
stopwords=nltk.corpus.stopwords.words('english')
stopwords[:5]
['i', 'me', 'my', 'myself', 'we']
def remove_stopwords(text):
clean_text=[word for word in text if word not in stopwords]
return clean_text
df['clean_text'] = df['sms'].apply(lambda row : remove_stopwords(row))
df.head()
|
sms |
clean_text |
0 |
hi,how are you |
[h, ,, h, w, , r, e, , u] |
1 |
I am fine |
[I, , , f, n, e] |
2 |
myself aishwarya? |
[e, l, f, , h, w, r, ?] |