1. Apache ECharts
说到pyecharts,就不得不先介绍一下ECharts。
ECharts是一个百度开源项目,是百度为数不多的良心产品之一。它是一个使用JavaScript 实现的开源可视化库,底层依托了开源渲染引擎 ZRender,支持 Canvas 和 SVG 等多种方式的渲染,提供直观,交互丰富,可高度个性化定制的数据可视化图表,可以流畅地运行在 PC 和移动设备上,兼容当前绝大部分浏览器。
在2018年,ECharts成功进入了Apache 孵化器,成为百度首个进入国际顶级开源社区的项目。
而pyecharts 是一个用于生成 Echarts 图表的类库,其实就是Python和ECharts的对接。
2. pyecharts的特性
简洁的 API 设计,使用如丝滑般流畅,支持链式调用
囊括了 30+ 种常见图表,应有尽有
支持主流 Notebook 环境,Jupyter Notebook 和 JupyterLab
可轻松集成至 Flask,Django 等主流 Web 框架
高度灵活的配置项,可轻松搭配出精美的图表
详细的文档和示例,帮助开发者更快的上手项目
多达 400+ 地图文件以及原生的百度地图,为地理数据可视化提供强有力的支持
二、环境搭建
1. 安装Python3环境搭建
请参考:
https://www.runoob.com/python3/python3-install.html
2. 安装pyecharts
pyecharts 分为 v0.5.X 和 v1 两个大版本,v0.5.X 和 v1 间不兼容。因为v0.5.X已经不再维护,因此推荐安装v1最新版本。
使用pip安装
$ pip install -U pyecharts
$ git clone https://github.com/pyecharts/pyecharts.git
$ cd pyecharts
$ pip install -r requirements.txt
$ python setup.py install
from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.faker import Faker
c = (
Pie()
.add("", [list(z) for z in zip(Faker.choose(), Faker.values())])
.set_global_opts(title_opts=opts.TitleOpts(title="Pie-基本示例"))
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
.render("饼状图基本示例.html")
其中[Faker.choose()](#1. Faker函数库)和[Faker.values()](#1. Faker函数库)为pyecharts提供的随机假数据生成方法。
2. 柱状图 ( Bar )
from pyecharts import options as opts
from pyecharts.charts import Bar
from pyecharts.faker import Faker
c = (
Bar()
.add_xaxis(Faker.choose())
.add_yaxis("商家A", Faker.values())
.add_yaxis("商家B", Faker.values())
.set_global_opts(title_opts=opts.TitleOpts(title="Bar-基本示例", subtitle="我是副标题"))
.render("柱状图基本示例.html")
3. 折线图 ( Line )
import pyecharts.options as opts
from pyecharts.charts import Line
from pyecharts.faker import Faker
c = (
Line()
.add_xaxis(Faker.choose())
.add_yaxis("商家A", Faker.values())
.add_yaxis("商家B", Faker.values())
.set_global_opts(title_opts=opts.TitleOpts(title="Line-基本示例"))
.render("折线图基本示例.html")
4. 3D柱状图 ( Bar3D )
import random
from pyecharts import options as opts
from pyecharts.charts import Bar3D
from pyecharts.faker import Faker
data = [(i, j, random.randint(0, 12)) for i in range(6) for j in range(24)]
c = (
Bar3D()
.add(
[[d[1], d[0], d[2]] for d in data],
xaxis3d_opts=opts.Axis3DOpts(Faker.clock, type_="category"),
yaxis3d_opts=opts.Axis3DOpts(Faker.week_en, type_="category"),
zaxis3d_opts=opts.Axis3DOpts(type_="value"),
.set_global_opts(
visualmap_opts=opts.VisualMapOpts(max_=20),
title_opts=opts.TitleOpts(title="Bar3D-基本示例"),
.render("3D柱状图基本示例.html")
其中[Faker.clock](#1. Faker函数库)和[Faker.week_en](#1. Faker函数库)为pyecharts提供的假数据。
5. 日历图 ( Calendar )
import datetime
import random
from pyecharts import options as opts
from pyecharts.charts import Calendar
begin = datetime.date(2017, 1, 1)
end = datetime.date(2017, 12, 31)
data = [
[str(begin + datetime.timedelta(days=i)), random.randint(1000, 25000)]
for i in range((end - begin).days + 1)
c = (
Calendar()
.add("", data, calendar_opts=opts.CalendarOpts(range_="2020"))
.set_global_opts(
title_opts=opts.TitleOpts(title="Calendar-2020年微信步数情况"),
visualmap_opts=opts.VisualMapOpts(
max_=20000,
min_=500,
orient="horizontal",
is_piecewise=True,
pos_top="230px",
pos_left="100px",
.render("日历图基本示例.html")
6. 仪表盘 ( Gauge )
from pyecharts import options as opts
from pyecharts.charts import Gauge
c = (
Gauge()
.add("", [("完成率", 66.6)])
.set_global_opts(title_opts=opts.TitleOpts(title="Gauge-基本示例"))
.render("仪表盘基本示例.html")
7. 地理坐标 ( Geo )
from pyecharts import options as opts
from pyecharts.charts import Geo
from pyecharts.globals import ChartType, SymbolType
c = (
Geo()
.add_schema(maptype="china")
.add(
[("广州", 55), ("北京", 66), ("杭州", 77), ("重庆", 88)],
type_=ChartType.EFFECT_SCATTER,
color="white",
.add(
"geo",
[("广州", "上海"), ("广州", "北京"), ("广州", "杭州"), ("广州", "重庆")],
type_=ChartType.LINES,
effect_opts=opts.EffectOpts(
symbol=SymbolType.ARROW, symbol_size=6, color="blue"
linestyle_opts=opts.LineStyleOpts(curve=0.2),
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(title_opts=opts.TitleOpts(title="Geo-Lines"))
.render("地理坐标示例.html")
8. 关系图 ( Graph )
from pyecharts import options as opts
from pyecharts.charts import Graph
nodes = [
{"name": "结点1", "symbolSize": 10},
{"name": "结点2", "symbolSize": 20},
{"name": "结点3", "symbolSize": 30},
{"name": "结点4", "symbolSize": 40},
{"name": "结点5", "symbolSize": 50},
{"name": "结点6", "symbolSize": 40},
{"name": "结点7", "symbolSize": 30},
{"name": "结点8", "symbolSize": 20},
links = []
for i in nodes:
for j in nodes:
links.append({"source": i.get("name"), "target": j.get("name")})
c = (
Graph()
.add("", nodes, links, repulsion=8000)
.set_global_opts(title_opts=opts.TitleOpts(title="Graph-基本示例"))
.render("关系图基本示例.html")
9. 水球图 ( Liquid )
from pyecharts import options as opts
from pyecharts.charts import Liquid
c = (
Liquid()
.add("lq", [0.6, 0.7, 0.8], is_outline_show=False)
.set_global_opts(title_opts=opts.TitleOpts(title="Liquid-无边框"))
.render("无边框水球图示例.html")
10. 表格组件 ( Table )
from pyecharts.components import Table
from pyecharts.options import ComponentTitleOpts
table = Table()
headers = ["City name", "Area", "Population", "Annual Rainfall"]
rows = [
["Brisbane", 5905, 1857594, 1146.4],
["Adelaide", 1295, 1158259, 600.5],
["Darwin", 112, 120900, 1714.7],
["Hobart", 1357, 205556, 619.5],
["Sydney", 2058, 4336374, 1214.8],
["Melbourne", 1566, 3806092, 646.9],
["Perth", 5386, 1554769, 869.4],
table.add(headers, rows)
table.set_global_opts(
title_opts=ComponentTitleOpts(title="Table-基本示例", subtitle="我是副标题支持换行哦")
table.render("表格组件示例.html")
11. 组合组件 ( Grid )
from pyecharts import options as opts
from pyecharts.charts import Bar, Grid, Line
x_data = ["{}月".format(i) for i in range(1, 13)]
bar = (
Bar()
.add_xaxis(x_data)
.add_yaxis(
"蒸发量",
[2.0, 4.9, 7.0, 23.2, 25.6, 76.7, 135.6, 162.2, 32.6, 20.0, 6.4, 3.3],
yaxis_index=0,
color="#d14a61",
.add_yaxis(
"降水量",
[2.6, 5.9, 9.0, 26.4, 28.7, 70.7, 175.6, 182.2, 48.7, 18.8, 6.0, 2.3],
yaxis_index=1,
color="#5793f3",
.extend_axis(
yaxis=opts.AxisOpts(
name="蒸发量",
type_="value",
min_=0,
max_=250,
position="right",
axisline_opts=opts.AxisLineOpts(
linestyle_opts=opts.LineStyleOpts(color="#d14a61")
axislabel_opts=opts.LabelOpts(formatter="{value} ml"),
.extend_axis(
yaxis=opts.AxisOpts(
type_="value",
name="温度",
min_=0,
max_=25,
position="left",
axisline_opts=opts.AxisLineOpts(
linestyle_opts=opts.LineStyleOpts(color="#675bba")
axislabel_opts=opts.LabelOpts(formatter="{value} °C"),
splitline_opts=opts.SplitLineOpts(
is_show=True, linestyle_opts=opts.LineStyleOpts(opacity=1)
.set_global_opts(
yaxis_opts=opts.AxisOpts(
name="降水量",
min_=0,
max_=250,
position="right",
offset=80,
axisline_opts=opts.AxisLineOpts(
linestyle_opts=opts.LineStyleOpts(color="#5793f3")
axislabel_opts=opts.LabelOpts(formatter="{value} ml"),
title_opts=opts.TitleOpts(title="Grid-多 Y 轴示例"),
tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
line = (
Line()
.add_xaxis(x_data)
.add_yaxis(
"平均温度",
[2.0, 2.2, 3.3, 4.5, 6.3, 10.2, 20.3, 23.4, 23.0, 16.5, 12.0, 6.2],
yaxis_index=2,
color="#675bba",
label_opts=opts.LabelOpts(is_show=False),
bar.overlap(line)
grid = Grid()
grid.add(bar, opts.GridOpts(pos_left="5%", pos_right="20%"), is_control_axis_index=True)
grid.render("组合组件示例.html")
12. 时间轴组件 ( Timeline )
from pyecharts import options as opts
from pyecharts.charts import Bar, Timeline
from pyecharts.faker import Faker
x = Faker.choose()
tl = Timeline()
for i in range(2015, 2020):
bar = (
Bar()
.add_xaxis(x)
.add_yaxis("商家A", Faker.values())
.add_yaxis("商家B", Faker.values())
.set_global_opts(title_opts=opts.TitleOpts("某商店{}年营业额".format(i)))
tl.add(bar, "{}年".format(i))
tl.render("时间轴组件示例.html")
clothes = ["衬衫", "毛衣", "领带", "裤子", "风衣", "高跟鞋", "袜子"]
drinks = ["可乐", "雪碧", "橙汁", "绿茶", "奶茶", "百威", "青岛"]
phones = ["小米", "三星", "华为", "苹果", "魅族", "VIVO", "OPPO"]
fruits = ["草莓", "芒果", "葡萄", "雪梨", "西瓜", "柠檬", "车厘子"]
animal = ["河马", "蟒蛇", "老虎", "大象", "兔子", "熊猫", "狮子"]
cars = ["宝马", "法拉利", "奔驰", "奥迪", "大众", "丰田", "特斯拉"]
dogs = ["哈士奇", "萨摩耶", "泰迪", "金毛", "牧羊犬", "吉娃娃", "柯基"]
visual_color = [
"#313695", "#4575b4", "#74add1", "#abd9e9", "#e0f3f8", "#ffffbf",
"#fee090", "#fdae61", "#f46d43", "#d73027", "#a50026",
week = ["周一", "周二", "周三", "周四", "周五", "周六", "周日"]
week_en = "Saturday Friday Thursday Wednesday Tuesday Monday Sunday".split()
clock = (
"12a 1a 2a 3a 4a 5a 6a 7a 8a 9a 10a 11a 12p "
"1p 2p 3p 4p 5p 6p 7p 8p 9p 10p 11p".split()
months = ["{}月".format(i) for i in range(1, 13)]
days_attrs = ["{}天".format(i) for i in range(30)]
days_values = [random.randint(1, 30) for _ in range(30)]
provinces = ["广东", "北京", "上海", "江西", "湖南", "浙江", "江苏"]
guangdong_city = ["汕头市", "汕尾市", "揭阳市", "阳江市", "肇庆市", "广州市", "惠州市"]
country = ["China", "Canada", "Brazil", "Russia", "United States", "Africa", "Germany",
随机生成数据的方法
choose:随机元素
def choose(self) -> list:
return random.choice(
self.clothes,
self.drinks,
self.phones,
self.fruits,
self.animal,
self.dogs,
self.week,
values:随机值
@staticmethod
def values(start: int = 20, end: int = 150) -> list:
return [random.randint(start, end) for _ in range(7)]
rand_color:随机颜色
@staticmethod
def rand_color() -> str:
return random.choice(
"#c23531", "#2f4554", "#61a0a8", "#d48265", "#749f83", "#ca8622", "#bda29a", "#6e7074", "#546570",
"#c4ccd3", "#f05b72", "#444693", "#726930", "#b2d235", "#6d8346", "#ac6767", "#1d953f", "#6950a1",
# height 图表画布高度
# renderer 渲染风格:"canvas", "svg"
Bar(init_opts=opts.InitOpts(width="1200px", height="800px", renderer=RenderType.CANVAS, page_title="网页标题", bg_color="#24c92c"))
2.2 标题
# pos_left:title 组件离容器左侧的距离
Bar().set_global_opts(title_opts=opts.TitleOpts(title="主标题", subtitle="副标题", title_link="主标题链接", subtitle_link="主标题链接"), pos_left="20%")
2.3 图例
Bar().set_global_opts(legend_opts=opts.LegendOpts(type_="scroll", is_show=True, orient="vertical", pos_left="20%")
2.4 提示框
# trigger 触发类型 : 'item': 数据项图形触发,'axis': 坐标轴触发
# trigger_on 触发条件 : 'mousemove': 鼠标移动时触发,'click': 鼠标点击时触发,'mousemove|click': 同时
# axis_pointer_type 指示器类型 : line,shadow,cross,none
# hide_delay 浮层隐藏的延迟,单位为 ms
Bar().set_global_opts(tooltip_opts=opts.TooltipOpts(is_show=True, trigger="item", trigger_on="mousemove|click",axis_pointer_type=“line”, hide_delay=100)
2.5 工具箱
Bar().set_global_opts(toolbox_opts=opts.ToolboxOpts(is_show=True, orient="vertical", pos_left="20%", feature=ToolBoxFeatureOpts())
class ToolBoxFeatureOpts(
# 保存为图片
save_as_image: Union[ToolBoxFeatureSaveAsImageOpts, dict] = ToolBoxFeatureSaveAsImageOpts(),
# 配置项还原
restore: Union[ToolBoxFeatureRestoreOpts, dict] = ToolBoxFeatureRestoreOpts(),
# 数据视图工具,可以展现当前图表所用的数据,编辑后可以动态更新
data_view: Union[ToolBoxFeatureDataViewOpts, dict] = ToolBoxFeatureDataViewOpts(),
# 数据区域缩放。(目前只支持直角坐标系的缩放)
data_zoom: Union[ToolBoxFeatureDataZoomOpts, dict] = ToolBoxFeatureDataZoomOpts(),
# 动态类型切换。
magic_type: Union[ToolBoxFeatureMagicTypeOpts, dict] = ToolBoxFeatureMagicTypeOpts(),
# 选框组件的控制按钮。
brush: Union[ToolBoxFeatureBrushOpts, dict] = ToolBoxFeatureBrushOpts(),
3. 系统配置项
3.1 文字样式配置项
title_textstyle_opts = opts.TextStyleOpts(color="red", font_style="italic", font_weight="bold",
font_family="Arial", font_size=11))
Bar().set_global_opts(title_opts=opts.TitleOpts(title="主标题", title_textstyle_opts= title_textstyle_opts)
3.2 标签配置项
# rotate 标签旋转。从 -90 度到 90 度。正值是逆时针
# formatter标签内容格式器 : {a}(系列名称),{b}(数据项名称),{c}(数值), {d}(百分比)
Bar().set_series_opts(label_opts=opts.LabelOpts(is_show=True, color=None, font_size=12, rotate=0, formatter="{b} : {d}%"))
3.3 线样式配置项
# opacity 透明度。支持从 0 到 1 的数字,为 0 时不绘制该图形
# curve 线的弯曲度,0 表示完全不弯曲
# type_ 线的类型 : 'solid', 'dashed', 'dotted'
Bar().set_series_opts(linestyle_opts=opts.LineStyleOpts(is_show=True, width=10, opacity=0.5, curve=0, type_="solid", color=None))
五、pyecharts与Web框架整合
pyecharts支持与Flask、Sanic、Tornado和Django框架整合在一起。
有兴趣的同学们可以去官网学习一下。
六、数据分析之pandas
1. 数据结构
Series:一维数组,与Numpy中的一维array类似。二者与Python基本的数据结构List也很相近。Series如今能保存不同种数据类型,字符串、boolean值、数字等都能保存在Series中。
Time- Series:以时间为索引的Series。
DataFrame:二维的表格型数据结构。很多功能与R中的data.frame类似。可以将DataFrame理解为Series的容器。
Panel :三维的数组,可以理解为DataFrame的容器。
Panel4D:是像Panel一样的4维数据容器。
PanelND:拥有factory集合,可以创建像Panel4D一样N维命名容器的模块。
2. 数据读取/输出
csv : pd.read_csv('data.csv')
excel : pd.read_excel('data.xlsx', index_col=0)
json : pd.read_json("data.json", encoding='utf-8', lines=True)
clipboard : pd.read_clipboard()
html : pd.read_html('data.html')
xml : pd.read_xml('data.xml')
sql : pd.read_sql('SELECT name, age FROM student', conn)
3. 数据处理
df = pd.DataFrame({
'name': ['zhao', 'qian', 'sun', 'li', 'zhao'],
'age': [10, 15, 22, 24, 33]
去重 : df.drop_duplicates(subset=['name'])
过滤 : df[df.age>15]
统计 : df.value_counts(suset=['name'])
排序 : df.sort_values(by='name', ascending=False)
选取 : 某几行、某几列(at
、iat
、loc
、iloc
)
分组 : df.groupby(by="age").mean()
合并和拼接 : merge
/concat