Python实现微信好友的数据分析
基于微信开放的个人号接口python库itchat,实现对微信好友的获取,并对省份、性别、微信签名做数据分析。
效果:
直接上代码,建三个空文本文件stopwords.txt,newdit.txt、unionWords.txt,下载字体simhei.ttf或删除字体要求的代码,就可以直接运行。
#wxfriends.py2018-07-09
importitchat
importsys
importpandasaspd
importmatplotlib.pyplotasplt
plt.rcParams['font.sans-serif']=['SimHei']#绘图时可以显示中文
plt.rcParams['axes.unicode_minus']=False#绘图时可以显示中文
importjieba
importjieba.possegaspseg
fromscipy.miscimportimread
fromwordcloudimportWordCloud
fromosimportpath
#解决编码问题
non_bmp_map=dict.fromkeys(range(0x10000,sys.maxunicode+1),0xfffd)
#获取好友信息
defgetFriends():
friends=itchat.get_friends(update=True)[0:]
flists=[]
foriinfriends:
fdict={}
fdict['NickName']=i['NickName'].translate(non_bmp_map)
ifi['Sex']==1:
fdict['Sex']='男'
elifi['Sex']==2:
fdict['Sex']='女'
else:
fdict['Sex']='雌雄同体'
ifi['Province']=='':
fdict['Province']='未知'
else:
fdict['Province']=i['Province']
fdict['City']=i['City']
fdict['Signature']=i['Signature']
flists.append(fdict)
returnflists
#将好友信息保存成CSV
defsaveCSV(lists):
df=pd.DataFrame(lists)
try:
df.to_csv("wxfriends.csv",index=True,encoding='gb18030')
exceptExceptionasret:
print(ret)
returndf
#统计性别、省份字段
defanysys(df):
df_sex=pd.DataFrame(df['Sex'].value_counts())
df_province=pd.DataFrame(df['Province'].value_counts()[:15])
df_signature=pd.DataFrame(df['Signature'])
returndf_sex,df_province,df_signature
#绘制柱状图,并保存
defdraw_chart(df_list,x_feature):
try:
x=list(df_list.index)
ylist=df_list.values
y=[]
foriinylist:
forjini:
y.append(j)
plt.bar(x,y,label=x_feature)
plt.legend()
plt.savefig(x_feature)
plt.close()
except:
print("绘图失败")
#解析取个性签名构成列表
defgetSignList(signature):
sig_list=[]
foriinsignature.values:
forjini:
sig_list.append(j.translate(non_bmp_map))
returnsig_list
#分词处理,并根据需要填写停用词、自定义词、合并词替换
defsegmentWords(txtlist):
stop_words=set(line.strip()forlineinopen('stopwords.txt',encoding='utf-8'))
newslist=[]
#新增自定义词
jieba.load_userdict("newdit.txt")
forsubjectintxtlist:
ifsubject.isspace():
continue
word_list=pseg.cut(subject)
forword,flaginword_list:
ifnotwordinstop_wordsandflag=='n'orflag=='eng'andword!='span'andword!='class':
newslist.append(word)
#合并指定的相似词
forlineinopen('unionWords.txt',encoding='utf-8'):
newline=line.encode('utf-8').decode('utf-8-sig')#解决\ufeff问题
unionlist=newline.split("*")
forjinrange(1,len(unionlist)):
#wordDict[unionlist[0]]+=wordDict.pop(unionlist[j],0)
forindex,valueinenumerate(newslist):
ifvalue==unionlist[j]:
newslist[index]=unionlist[0]
returnnewslist
#高频词统计
defcountWords(newslist):
wordDict={}
foriteminnewslist:
wordDict[item]=wordDict.get(item,0)+1
itemList=list(wordDict.items())
itemList.sort(key=lambdax:x[1],reverse=True)
foriinrange(100):
word,count=itemList[i]
print("{}:{}".format(word,count))
#绘制词云
defdrawPlant(newslist):
d=path.dirname(__file__)
mask_image=imread(path.join(d,"timg.png"))
content=''.join(newslist)
wordcloud=WordCloud(font_path='simhei.ttf',background_color="white",width=1300,height=620,max_words=200).generate(content)#mask=mask_image,
#Displaythegeneratedimage:
plt.imshow(wordcloud)
plt.axis("off")
wordcloud.to_file('wordcloud.jpg')
plt.show()
defmain():
#登陆微信
itchat.auto_login()#登陆后不需要扫码hotReload=True
flists=getFriends()
fdf=saveCSV(flists)
df_sex,df_province,df_signature=anysys(fdf)
draw_chart(df_sex,"性别")
draw_chart(df_province,"省份")
wordList=segmentWords(getSignList(df_signature))
countWords(wordList)
drawPlant(wordList)
main()
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。
声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:czq8825#qq.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。