1 数据预处理
1.1 各省份高校总分排名
df.groupby(by="省份").sum().sort_values(by="总分")[::-1][:10]
df.groupby(by="省份").sum().sort_values(by="总分", ascending=False)[:10]
1.2 各省份高校个数
df.groupby(by="省份").count().sort_values(by="总分", ascending=False)[:10]
province_cat = df["省份"].astype("category")
# print(province_cat)
print(province_cat.values, "\n")
print(type(province_cat.values))
province_cat.values.categories
province_cat.values.codes
province_cat.value_counts()[:10]
1.3 整合数据
province_sum = df.groupby(by="省份").sum().sort_values(by="总分", ascending=False)["总分"] #province_num = df["省份"].astype("category").value_counts()
组合"总分"&"个数"数组
province = pd.DataFrame({"总分": province_sum,"个数": province_num})
添加“平均分”列数据
province_mean = province["总分"] / province["个数"]province["平均分"] = pd.Series(province_mean) #添加“平均分”列数据province.sort_values("平均分")[::-1][:10]
top10_sum = province.sort_values("总分")[::-1][:10]
top10_num = province.sort_values("个数")[::-1][:10]
top10_mean = province.sort_values("平均分")[::-1][:10]top10_sum
top10_num
top10_mean
2 数据可视化
使用pyecharts绘制地图
数据可视化
top10.index.tolist()
top10["总分"].tolist()
Emmm
top10["总分"].astype('int').tolist()
2.1 柱状图
from pyecharts.charts import Bar
from pyecharts import options as optsbar = (Bar().add_xaxis(top10_sum.index.tolist()).add_yaxis("总分", top10_sum["总分"].astype('int').tolist()).set_global_opts(title_opts=opts.TitleOpts(title="中国最好大学TOP10(各省份)" ,subtitle="总分"))
)
bar.render_notebook()
2.2 折线图
from pyecharts.charts import Line
from pyecharts import options as optsline = (Line().add_xaxis(top10_sum.index.tolist()).add_yaxis("总分", top10_sum["总分"].astype('int').tolist()).set_global_opts(title_opts=opts.TitleOpts(title="中国最好大学TOP10(各省份)" ,subtitle="总分"))
)
line.render_notebook()
2.3 饼状图
from pyecharts.charts import Pie
from pyecharts import options as optspie =(Pie().add("", [list(z) for z in zip(top10_sum.index.tolist(), top10_sum["总分"].astype('int').tolist())], radius=["30%", "75%"],center=["40%", "50%"],rosetype="radius").set_global_opts(title_opts=opts.TitleOpts(title="中国最好大学TOP10(各省份)" ,subtitle="总分"),legend_opts=opts.LegendOpts(type_="scroll", pos_left="80%", orient="vertical"))
)
pie.render_notebook()
2.4 散点图
from pyecharts.charts import Scatter
from pyecharts import options as optsscatter = (Scatter().add_xaxis(top10_sum.index.tolist()).add_yaxis("总分", top10_sum["总分"].astype('int').tolist()).set_global_opts(title_opts=opts.TitleOpts(title="中国最好大学TOP10(各省份)" ,subtitle="总分"))
)
scatter.render_notebook()
2.5 基于Geo的高校分布图
去掉"省"字
print(top10_sum.index.tolist())
print(top10_sum.index.tolist()[1])
print(top10_sum.index.tolist()[1][:-1])
top10_sum_index = []
for i in range(10):_ = top10_sum.index.tolist()[i][:-1]top10_sum_index.append(_)top10_sum_index
city = top10_sum_index
value = top10_sum["总分"].astype('int').tolist()from pyecharts import options as opts
from pyecharts.charts import Geo
from pyecharts.globals import ChartType, SymbolTypegeo = (Geo().add_schema(maptype="china").add("高校分布图", [list(z) for z in zip(city, value)]).set_global_opts(visualmap_opts=opts.VisualMapOpts(is_piecewise=True,max_=1300),title_opts=opts.TitleOpts(title="各省高校总分排名")).set_series_opts(label_opts=opts.LabelOpts(is_show=False))
)
geo.render_notebook()
2.6 基于Map的高校分布图
from pyecharts import options as opts #负责图表配置的模块
from pyecharts.charts import Map #地图主要用于地理区域数据的可视化_map = (Map().add("高校分布图", [list(z) for z in zip(city, value)], "china").set_global_opts(title_opts=opts.TitleOpts(title="各省高校总分排名"),visualmap_opts=opts.VisualMapOpts(max_=1300))
)_map.render_notebook()