遍历文档中的字/词语,并整理其字频和词频

import pyecharts,jieba,random
import pandas as pd

ss = open(r"E:\桌面\不小心卖了自己[重生].txt","r", encoding='UTF-8').read()
'''
# 遍历所有的字,并整理其每一个字的字频
aa ={}
for i in ss:
    aa[i] = aa.get(i,0) + 1
aa = pd.Series(aa)
# print(aa)
bb = aa.sort_values(ascending=False)
bb = bb.drop([',',"。",'”','“',':','?','\n','、'," ","!",";"])
# print(bb)
# print(bb[:9])
'''


# 用jieba作词,遍历所有的词,并整理其每一个词的字频
cc = jieba.lcut(ss,cut_all=True)
# print(cc)
ff = {}
for i in cc:
    ff[i] = ff.get(i,0) + 1
ff = pd.Series(ff)
ff = ff.sort_values(ascending = False)
ff = ff.drop([',',"。",'”','“',':','?','\n','、'," ","!",";"," "])
print(ff[2:52])

 

© 版权声明
THE END
喜欢就支持一下吧
点赞60
分享
相关推荐
  • 暂无相关文章
  • 评论 抢沙发