lem = nltk.stem.wordnet.WordNetLemmatizer() text = lem.lemmatize(text)
mostcommon = FreqDist(allwords).most_common(100)#来自文本中出现频率最高的100个词 wordcloud = WordCloud(width=1600, height=800, background_color='white', stopwords=STOPWORDS).generate(str(mostcommon)) fig = plt.figure(figsize=(30,10), facecolor='white') plt.imshow(wordcloud, interpolation="bilinear") plt.axis('off') plt.title('Top 100 Most Common Words in cleaned_label', fontsize=50) plt.tight_layout(pad=0) plt.show()
for dirname, _, filenames in os.walk('/kaggle/input'): for filename in filenames: print(os.path.join(dirname, filename)) break
labelext_res_df.loc[pd.isna(labelext_res_df['human_label'])]
import re [m.start() for m in re.finditer('test', 'test test test test')]