先展示一下实物图吧
不知道有没有人能猜出来这是哪个视频的弹幕
直接上代码
先是爬取视频弹幕
import re
import requests
def main():print("输入想爬取的b站视频网址:")url = input()res = requests.get(url)cid = re.findall(r'"cid":(.*?),', res.text)[0]#其中cid是弹幕对应的iddmurl = f'https://comment.bilibili.com/{cid}.xml'datalist = get_Html(dmurl)list=datalist.content.decode("utf-8")savelist(list)
def get_Html(url):headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'} #请输入你个人的User-Agentresponse = requests.get(url, headers=headers)return response
def savelist(list):danmu = re.compile(r'<d p=".*?">(.*?)</d>')File = open("弹幕.txt", "w", encoding="utf-8")data = re.findall(danmu,list)for i in data:File.writelines(i)File.writelines("\n")File.close()
if __name__ =="__main__":main()print("爬取完毕")
然后是制作词云代码
可能出现的报错:
缺少stoplist.txt文件,自行创建即可。作用是制作词的云排除你不想要的词,一个词占一行
如果你想制作一个特定图案的词云,就可以将第六步中的两行代码的注释求掉,并添加你的图片文件,最好是颜色相差较大的图片,不然制作出来的词云效果不好,如果效果不好的话可以用PS把图片背景去掉,留下你想要的图案
# 1 导入相关库
import pandas as pd
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from imageio import imreadimport warnings
warnings.filterwarnings("ignore")# 2 读取文本文件,并使用lcut()方法进行分词
with open("弹幕.txt",encoding="utf-8") as f:txt = f.read()
txt = txt.split()
txt = [i.upper() for i in txt]
data_cut = [jieba.lcut(x) for x in txt]# 3 读取停用词
with open("stoplist.txt",encoding="utf-8") as f:stop = f.read()
stop = stop.split()
stop = [" "] + stop# 4 去掉停用词之后的最终词
s_data_cut = pd.Series(data_cut)
all_words_after = s_data_cut.apply(lambda x:[i for i in x if i not in stop])# 5 词频统计
all_words = []
for i in all_words_after:all_words.extend(i)
word_count = pd.Series(all_words).value_counts()# 6 词云图的绘制
# 1)读取背景图片
# back_picture = imread("1.jpg")# 2)设置词云参数
wc = WordCloud(font_path="simhei.ttf",background_color="white",max_words=1000,# mask=back_picture,max_font_size=150,random_state=42)
wc2 = wc.fit_words(word_count)# 3)绘制词云图
plt.figure(figsize=(16,8))
plt.imshow(wc2)
plt.axis("off")
plt.show()
wc.to_file("ciyun.png")
之后就要制作ui界面了
可以跟着这个视频学习一下点我
我这个只是一个示例,以作参考,如果需要做出一个界面的话还是得自己学习一下
文件类型为 .ui
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0"><class>Form</class><widget class="QWidget" name="Form"><property name="geometry"><rect><x>0</x><y>0</y><width>645</width><height>496</height></rect></property><property name="minimumSize"><size><width>0</width><height>0</height></size></property><property name="windowTitle"><string>弹幕词云</string></property><property name="styleSheet"><string notr="true">QPushButton:hover{font-family:微软雅黑;font-size:15px;color:#1d649c;
}
*{background-image:url(bi.jpg);
}</string></property><layout class="QVBoxLayout" name="verticalLayout" stretch="0,0,0,0,0"><item><spacer name="verticalSpacer"><property name="orientation"><enum>Qt::Vertical</enum></property><property name="sizeType"><enum>QSizePolicy::Expanding</enum></property><property name="sizeHint" stdset="0"><size><width>20</width><height>150</height></size></property></spacer></item><item><layout class="QHBoxLayout" name="horizontalLayout_2" stretch="1,0,20"><item><spacer name="horizontalSpacer"><property name="orientation"><enum>Qt::Horizontal</enum></property><property name="sizeHint" stdset="0"><size><width>40</width><height>20</height></size></property></spacer></item><item><widget class="QTextEdit" name="textEdit"><property name="minimumSize"><size><width>280</width><height>10</height></size></property><property name="maximumSize"><size><width>16777215</width><height>30</height></size></property><property name="placeholderText"><string>请输入b站视频网址:</string></property></widget></item><item><spacer name="horizontalSpacer_2"><property name="orientation"><enum>Qt::Horizontal</enum></property><property name="sizeHint" stdset="0"><size><width>40</width><height>20</height></size></property></spacer></item></layout></item><item><spacer name="verticalSpacer_2"><property name="orientation"><enum>Qt::Vertical</enum></property><property name="sizeType"><enum>QSizePolicy::Expanding</enum></property><property name="sizeHint" stdset="0"><size><width>20</width><height>89</height></size></property></spacer></item><item><layout class="QHBoxLayout" name="horizontalLayout" stretch="2,1,7"><item><spacer name="horizontalSpacer_4"><property name="orientation"><enum>Qt::Horizontal</enum></property><property name="sizeHint" stdset="0"><size><width>40</width><height>20</height></size></property></spacer></item><item><widget class="QPushButton" name="pushButton"><property name="sizePolicy"><sizepolicy hsizetype="Maximum" vsizetype="Fixed"><horstretch>0</horstretch><verstretch>0</verstretch></sizepolicy></property><property name="layoutDirection"><enum>Qt::LeftToRight</enum></property><property name="text"><string>制作词云</string></property></widget></item><item><spacer name="horizontalSpacer_3"><property name="orientation"><enum>Qt::Horizontal</enum></property><property name="sizeHint" stdset="0"><size><width>40</width><height>20</height></size></property></spacer></item></layout></item><item><spacer name="verticalSpacer_3"><property name="orientation"><enum>Qt::Vertical</enum></property><property name="sizeType"><enum>QSizePolicy::Expanding</enum></property><property name="sizeHint" stdset="0"><size><width>20</width><height>89</height></size></property></spacer></item></layout></widget><resources/><connections/>
</ui>
最后是汇总的代码
这里如果你直接复制粘贴的话必出错,就算你用了我上面的 .ui文件。因为我在 .ui文件里还设置了背景图片,所以这段代码也是用于参考。
汇总的原因是方便打包文件,制作成 .exe文件,可以发送给别人运行,即使电脑上没有python环境。可以根据这篇文章制作 .exe点我
from PySide2.QtWidgets import QApplication,QMessageBox
from PySide2.QtUiTools import QUiLoader
from PySide2.QtGui import QIcon
import re
import requests
import pandas as pd
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from imageio import imread
import warnings
warnings.filterwarnings("ignore")class Stats:def __init__(self):self.ui = QUiLoader().load('bilibili.ui')self.ui.pushButton.clicked.connect(self.handleCalc)def handleCalc(self):url = self.ui.textEdit.toPlainText()res = requests.get(url)cid = re.findall(r'"cid":(.*?),', res.text)[0] #其中cid是弹幕对应的iddmurl = f'https://comment.bilibili.com/{cid}.xml'datalist = self.get_Html(dmurl)list=datalist.content.decode("utf-8")self.savelist(list)self.ciyun()def get_Html(self,url): headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'} #请输入你个人的User-Agentresponse = requests.get(url, headers=headers)return responsedef savelist(self,list):danmu = re.compile(r'<d p=".*?">(.*?)</d>')File = open("弹幕.txt", "w", encoding="utf-8")data = re.findall(danmu,list)for i in data:File.writelines(i)File.writelines("\n")File.close()def ciyun(self):with open("弹幕.txt",encoding="utf-8") as f:txt = f.read()txt = txt.split()txt = [i.upper() for i in txt]data_cut = [jieba.lcut(x) for x in txt]with open("stoplist.txt",encoding="utf-8") as f:stop = f.read()stop = stop.split()stop = [" "] + stops_data_cut = pd.Series(data_cut)all_words_after = s_data_cut.apply(lambda x:[i for i in x if i not in stop])all_words = []for i in all_words_after:all_words.extend(i)word_count = pd.Series(all_words).value_counts()# 1)读取背景图片# back_picture = imread("EDG.jpg")# 2)设置词云参数wc = WordCloud(font_path="simhei.ttf",background_color="white",max_words=1000,# mask=back_picture,max_font_size=150,random_state=42)wc2 = wc.fit_words(word_count)# 3)绘制词云图plt.figure(figsize=(16,8))plt.imshow(wc2)plt.axis("on")plt.show()wc.to_file("ciyun.png")app=QApplication([])
app.setWindowIcon(QIcon('li.jpg'))
stats=Stats()
stats.ui.show()
app.exec_()
文章到这里就结束了,后面是一些废话
之前因为个人习惯原因丢失的代码现在基本上补回来了(能想起来的),之后如果能想起来还有哪些代码的话随缘补上吧,这次其实不是很想补,得花几天的时间,但想着顺便复习一下Qt designer的用法,就补了这次代码