Remove_stopwords

10 Aug 2020

import pandas as pd
import nltk

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/aishwarya/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.





True

Sms_content=['hi,how are you','I am fine','myself aishwarya?']
df=pd.DataFrame(Sms_content,columns={'sms'})
df

stopwords=nltk.corpus.stopwords.words('english')
stopwords[:5]

['i', 'me', 'my', 'myself', 'we']

def remove_stopwords(text):
    clean_text=[word for word in text if word not in stopwords]
    return clean_text

df['clean_text'] = df['sms'].apply(lambda row : remove_stopwords(row))
df.head()