数据集下载:数据集
运行工具:jupyter notebook
可视化部分需要用到 pyecharts==1.9.0
已安装其他低版本的需要升级,如果未安装过pyecharts,直接pip安装就是最新版本
!pip install --upgrade pyecharts

import pandas as pd
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.globals import SymbolType
from pyecharts.commons.utils import JsCode
from pyecharts.charts import Line,Bar,Page,Map
df_marry = pd.read_csv('/home/mw/input/202108196091/结婚登记(万对).csv', encoding='gb2312')
df_marry.head()

df_divorce = pd.read_csv('/home/mw/input/202108196091/离婚登记(万对).csv', encoding='gb2312')
df_divorce.head()

df_marry.isnull().any()

结婚登记数据没有缺失值。
df_divorce.isnull().any()

离婚登记数据没有缺失值。
# 2019年各地区离结率
df_tmp = pd.DataFrame()
df_tmp['地区'] = df_marry['地区']
df_tmp['结婚'] = df_marry['2019年']
df_tmp['离婚'] = df_divorce['2019年']
df_tmp['结婚占比'] = round(df_marry['2019年']*100 /(df_marry['2019年'] + df_divorce['2019年']), 2)
df_tmp['离婚占比'] = round(df_divorce['2019年']*100 /(df_marry['2019年'] + df_divorce['2019年']), 2)
df_tmp['离结率'] = round(df_tmp['离婚占比'] /(df_tmp['结婚占比']), 2)
df_tmp.head()

# 阴影样式
itemstyle={
'normal': {
'shadowColor': 'rgba(0, 0, 0, .5)', # 阴影颜色
'shadowBlur': 5, # 阴影大小
'shadowOffsetY': 2, # Y轴方向阴影偏移
'shadowOffsetX': 2, # x轴方向阴影偏移
'borderColor': '#fff'
}
}
area = df_tmp['地区'].values.tolist()
marry_count = df_tmp['结婚占比'].values.tolist()
divorce_count = df_tmp['离婚占比'].values.tolist()
b1 = (
Bar(
init_opts=opts.InitOpts(
width='800px', height='600px',
)
)
.add_xaxis(area)
.add_yaxis('结婚占比', marry_count, stack='stack1',itemstyle_opts=opts.ItemStyleOpts(color='#ed1941'))
.add_yaxis('离婚占比', divorce_count, stack='stack1',itemstyle_opts=opts.ItemStyleOpts(color='#009ad6'))
.set_series_opts(
label_opts=opts.LabelOpts(
is_show=True,
position='inside'
)
)
.set_global_opts(
legend_opts=opts.LegendOpts(
pos_right='10%',
pos_top='2%',
orient='horizontal',
),
title_opts=opts.TitleOpts(
title='2019年各地区结婚离婚占比',
pos_top='2%'
),
)
.reversal_axis()
)
b1.render_notebook()

df_tmp = df_tmp.sort_values('离结率', ascending=False)
df_tmp.style.bar(subset=['离结率'],color='#ed1941')

sort_info = df_tmp.sort_values('离结率', ascending=True)
b1 = (
Bar(init_opts=opts.InitOpts(
width='800px', height='600px',))
.add_xaxis(list(sort_info['地区'])[-20:])
.add_yaxis('', sort_info['离结率'].values.tolist()[-20:],
category_gap='30%',
itemstyle_opts={
'normal': {
'shadowColor': 'rgba(0, 0, 0, .5)', # 阴影颜色
'shadowBlur': 5, # 阴影大小
'shadowOffsetY': 2, # Y轴方向阴影偏移
'shadowOffsetX': 2, # x轴方向阴影偏移
'borderColor': '#fff'
}
}
)
.reversal_axis()
.set_global_opts(
xaxis_opts=opts.AxisOpts(is_show=False),
yaxis_opts=opts.AxisOpts(is_show=False,
axisline_opts=opts.AxisLineOpts(is_show=False),
axistick_opts=opts.AxisTickOpts(is_show=False)
),
title_opts=opts.TitleOpts(
title='离结率 TOP20',
pos_left='9%',
pos_top='4%',
title_textstyle_opts=opts.TextStyleOpts(
color='#ed1941', font_size=16)
),
visualmap_opts=opts.VisualMapOpts(
is_show=False,
max_=20,
series_index=0,
),
)
.set_series_opts(
itemstyle_opts={
"normal": {
"color": JsCode(
"""new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
offset: 0,
color: '#ed1941'
}, {
offset: 1,
color: '#009ad6'
}], false)"""
),
"barBorderRadius": [30, 30, 30, 30],
"shadowColor": "rgb(0, 160, 221)",
}
},
label_opts=opts.LabelOpts(position="insideLeft",
font_size=10,
vertical_align='middle',
horizontal_align='left',
font_weight='bold',
formatter='{b}: {c}'))
)
b1.render_notebook()

print('全国共有{}个省市地区离结率超过50%,这已经是相当高了'.format(len(df_tmp[df_tmp['离结率'] > 0.5])))
全国共有15个省市地区离结率超过50%,这已经是相当高了
离结率超过50%的城市:
df_tmp[df_tmp['离结率'] > 0.5]['地区']

可以自行查看自己所在城市数据,这里我用了离结率第一的天津市
# 线性渐变
color_js0 = """new echarts.graphic.LinearGradient(0, 1, 0, 0,
[{offset: 0, color: '#FFFFFF'}, {offset: 1, color: '#ed1941'}], false)"""
color_js1 = """new echarts.graphic.LinearGradient(0, 1, 0, 0,
[{offset: 0, color: '#FFFFFF'}, {offset: 1, color: '#009ad6'}], false)"""
city = '天津市'
b2 = (
Bar()
.add_xaxis(df_divorce.columns[1:].values.tolist())
.add_yaxis(
series_name="结婚登记",
y_axis=df_marry[df_marry['地区']==city].values[0][1:].tolist(),
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
]
),
itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js0))
)
.add_yaxis(
series_name="离婚登记",
y_axis=df_divorce[df_divorce['地区']=='北京市'].values[0][1:].tolist(),
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(type_="max", name="最大值"),
opts.MarkPointItem(type_="min", name="最小值"),
]
),
itemstyle_opts=opts.ItemStyleOpts(color=JsCode(color_js1))
)
.set_global_opts(
title_opts=opts.TitleOpts(title=""),
tooltip_opts=opts.TooltipOpts(trigger="axis"),
xaxis_opts=opts.AxisOpts(name="年份",type_="category",
boundary_gap=True,
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=4, color='#DB7093')),
axislabel_opts=opts.LabelOpts(rotate=45)),
yaxis_opts=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(formatter="{value} /万对"),
name=f'{city}历年结婚/离婚登记',
is_scale=True,
name_textstyle_opts=opts.TextStyleOpts(font_size=14,font_weight='bold',color='#FF1493'),
splitline_opts=opts.SplitLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(type_='dashed')),
axisline_opts=opts.AxisLineOpts(is_show=False,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))
),
legend_opts=opts.LegendOpts(is_show=True, pos_top='2%', legend_icon='roundRect'),
)
)
b2.render_notebook()

# 线性渐变
color_js = """new echarts.graphic.LinearGradient(0, 0, 1, 0,
[{offset: 0, color: '#009ad6'}, {offset: 1, color: '#ed1941'}], false)"""
df_tmp = df_tmp.replace('省', '', regex=True).replace('市', '', regex=True).replace('自治区', '', regex=True).replace('壮族', '', regex=True).replace('维吾尔', '', regex=True).replace('回族', '', regex=True)
map_chart = Map(init_opts=opts.InitOpts(theme='light',
width='800px',
height='600px'))
map_chart.add('离婚/结婚',
[list(z) for z in zip(df_tmp['地区'].values.tolist(), df_tmp['离结率'].values.tolist())],
maptype='china',
is_map_symbol_show=False,
itemstyle_opts={
'normal': {
'shadowColor': 'rgba(0, 0, 0, .5)', # 阴影颜色
'shadowBlur': 5, # 阴影大小
'shadowOffsetY': 0, # Y轴方向阴影偏移
'shadowOffsetX': 0, # x轴方向阴影偏移
'borderColor': '#fff'
}
}
)
map_chart.set_global_opts(
visualmap_opts=opts.VisualMapOpts(
is_show=True,
is_piecewise=True,
min_ = 0,
max_ = 1,
split_number = 5,
series_index=0,
pos_top='70%',
pos_left='10%',
range_text=['离结率', ''],
pieces=[
{'max':1.0, 'min':0.8, 'label':'0.8-1.0', 'color': '#990000'},
{'max':0.8, 'min':0.6, 'label':'0.6-0.8', 'color': '#CD5C5C'},
{'max':0.6, 'min':0.4, 'label':'0.4-0.6', 'color': '#F08080'},
{'max':0.4, 'min':0.2, 'label':'0.2-0.4', 'color': '#FFCC99'},
{'max':0.2, 'min':0.0, 'label':'0.0-0.2', 'color': '#FFE4E1'},
],
),
legend_opts=opts.LegendOpts(is_show=False),
tooltip_opts=opts.TooltipOpts(
is_show=True,
trigger='item',
formatter='{b}:{c}'
),
title_opts=dict(
text='2001-2019年各地区离结率',
left='center',
top='5%',
textStyle=dict(
color='#DC143C'))
)
map_chart.render_notebook()

df_marry.loc["总计"] =df_marry.apply(lambda x:x.sum())
df_divorce.loc["总计"] =df_divorce.apply(lambda x:x.sum())
df_marry.loc['总计','地区'] = '全国'
df_divorce.loc['总计','地区'] = '全国'
df_marry[df_marry['地区'] == '全国']

df_divorce[df_marry['地区'] == '全国']

# 背景色
background_color_js = """
new echarts.graphic.RadialGradient(0.3, 0.3, 0.8, [{offset: 0,color: '#f7f8fa'},
{offset: 1,color: '#cdd0d5'}])
"""
# 线条样式
linestyle_dic = { 'normal': {
'width': 4,
'shadowColor': '#696969',
'shadowBlur': 10,
'shadowOffsetY': 10,
'shadowOffsetX': 10,
}
}
l1 = (
Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js)))
.add_xaxis(xaxis_data=df_divorce.columns[1:][::-1])
.add_yaxis(
series_name="结婚登记",
y_axis=[round(i,2) for i in df_marry.loc["总计"].values.tolist()[1:]],
symbol_size=8,
is_smooth=True,
color="#009ad6",
)
.add_yaxis(
series_name="离婚登记",
y_axis=[round(i,2) for i in df_divorce.loc["总计"].values.tolist()[1:]],
symbol_size=8,
is_smooth=True,
color="#ed1941",
)
# 系列配置项
.set_series_opts(linestyle_opts=linestyle_dic,
areastyle_opts=opts.AreaStyleOpts(opacity=0.6),
label_opts=opts.LabelOpts(is_show=False),
markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average")]),
markpoint_opts=opts.MarkPointOpts(
data=[opts.MarkPointItem(type_="max"), opts.MarkPointItem(type_="min")],
symbol_size=[65, 50],
label_opts=opts.LabelOpts(position="inside", color="#fff", font_size=10)
),
)
# 通用配置项
.set_global_opts(
title_opts=opts.TitleOpts(
title='历年全国结/离婚登记趋势',
pos_top='2%',
title_textstyle_opts=opts.TextStyleOpts(color='#4169E1', font_size=20)),
tooltip_opts=opts.TooltipOpts(trigger="axis"),
xaxis_opts=opts.AxisOpts(name="",type_="category",
boundary_gap=True,
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093')),
axislabel_opts=opts.LabelOpts(rotate=45)),
yaxis_opts=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(formatter="{value} /万对"),
is_scale=True,
name_textstyle_opts=opts.TextStyleOpts(font_size=12,font_weight='bold',color='#FF1493'),
splitline_opts=opts.SplitLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(type_='dashed')),
axisline_opts=opts.AxisLineOpts(is_show=False,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))
),
# 图例样式
legend_opts=opts.LegendOpts(is_show=True, pos_right='1%', pos_top='2%',legend_icon='roundRect'),
)
)
l1.render_notebook()

1.结婚登记数量前五地区:河南、广东、四川、江苏、安徽
2.离婚登记数量前五地区:河南、四川、江苏、山东、河北
3.全国结婚登记数量从2007年开始出现持续下降趋势
4.北方地区离结率较南方地区高出不少
待续。。