数据分析小案例：招聘数据可视化，查看领域最需技术~-百木园

前言

嗨喽~大家好呀，这里是魔王呐

在前一章：让我们用python来采集数据看看找工作都要会什么吧~

我们讲了如何采集zhaopin网站数据，现在~

我们来把数据可视化，更好的查看在自己领域最需的技术是什么~

下面，我们直接上代码~

代码提供者：青灯教育-自游老师

代码

import pandas as pd
from pyecharts.charts import *
from pyecharts import options as opts
import re
from pyecharts.globals import ThemeType
from pyecharts.commons.utils import JsCode

# 读取数据
df = pd.read_csv(\"招聘数据.csv\")
df.head()

df.info()

df[\'薪资\'].unique()
df[\'bottom\']=df[\'薪资\'].str.extract(\'^(\\d+).*\')
df[\'top\']=df[\'薪资\'].str.extract(\'^.*?-(\\d+).*\')
df[\'top\'].fillna(df[\'bottom\'],inplace=True)

df[\'commision_pct\']=df[\'薪资\'].str.extract(\'^.*?·(\\d{2})薪\')
df[\'commision_pct\'].fillna(12,inplace=True)
df[\'commision_pct\']=df[\'commision_pct\'].astype(\'float64\')
df[\'commision_pct\']=df[\'commision_pct\']/12

df.dropna(inplace=True)

df[\'bottom\'] = df[\'bottom\'].astype(\'int64\')
df[\'top\'] = df[\'top\'].astype(\'int64\')
df[\'平均薪资\'] = (df[\'bottom\']+df[\'top\'])/2*df[\'commision_pct\']
df[\'平均薪资\'] = df[\'平均薪资\'].astype(\'int64\')

df.head()

df[\'薪资\'] = df[\'薪资\'].apply(lambda x:re.sub(\'.*千/月\', \'0.3-0.7万/月\', x))
df[\"薪资\"].unique()

df[\'bottom\'] = df[\'薪资\'].str.extract(\'^(.*?)-.*?\')
df[\'top\'] = df[\'薪资\'].str.extract(\'^.*?-(\\d\\.\\d|\\d)\')
df.dropna(inplace=True)
df[\'bottom\'] = df[\'bottom\'].astype(\'float64\')
df[\'top\'] = df[\'top\'].astype(\'float64\')
df[\'平均薪资\'] = (df[\'bottom\']+df[\'top\'])/2 * 10
df.head()

mean = df.groupby(\'学历\')[\'平均薪资\'].mean().sort_values()
x = mean.index.tolist()
y = mean.values.tolist()
c = (
    Bar()
    .add_xaxis(x)
    .add_yaxis(
        \"学历\",
        y
    )
    .set_global_opts(title_opts=opts.TitleOpts(title=\"不同学历的平均薪资\"),datazoom_opts=opts.DataZoomOpts())
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
)
c.render_notebook()

color_js = \"\"\"new echarts.graphic.LinearGradient(0, 1, 0, 0,
    [{offset: 0, color: \'#63e6be\'}, {offset: 1, color: \'#0b7285\'}], false)\"\"\"

color_js1 = \"\"\"new echarts.graphic.LinearGradient(0, 0, 0, 1, [{
                            offset: 0,
                            color: \'#ed1941\'
                        }, {
                            offset: 1,
                            color: \'#009ad6\'
                        }], false)\"\"\"

dq = df.groupby(\'城市\')[\'职位\'].count().to_frame(\'数量\').sort_values(by=\'数量\',ascending=False).reset_index()
x_data = dq[\'城市\'].values.tolist()[:20]
y_data = dq[\'数量\'].values.tolist()[:20]
b1 = (
        Bar(init_opts=opts.InitOpts(theme=ThemeType.DARK,bg_color=JsCode(color_js1),width=\'1000px\',height=\'600px\'))
        .add_xaxis(x_data)
        .add_yaxis(\'\',
                   y_data ,
                   category_gap=\"50%\",
                   label_opts=opts.LabelOpts(
                        font_size=12,
                        color=\'yellow\',
                        font_weight=\'bold\', 
                        font_family=\'monospace\',
                        position=\'insideTop\',  
                        formatter = \'{b}\\n{c}\'  
                    ),
                  )
        .set_series_opts(
            itemstyle_opts={
                \"normal\": {
                    \"color\": JsCode(color_js),
                    \"barBorderRadius\": [15, 15, 0, 0],
                    \"shadowColor\": \"rgb(0, 160, 221)\",
                }
            }
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title=\'招 聘 数 量 前 20 的 城 市 区 域\',
                                       title_textstyle_opts=opts.TextStyleOpts(color=\"yellow\"),
                                       pos_top=\'7%\',pos_left = \'center\'
                                     ),
            legend_opts=opts.LegendOpts(is_show=False),
            xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),
            yaxis_opts=opts.AxisOpts(name=\"\",
                                     name_location=\'middle\',
                                     name_gap=40,
                                     name_textstyle_opts=opts.TextStyleOpts(font_size=16)),
                         datazoom_opts=[opts.DataZoomOpts(range_start=1,range_end=50)]
                        )

    )
b1.render_notebook()

boss = df[\'学历\'].value_counts()
x = boss.index.tolist()
y = boss.values.tolist()
data_pair = [list(z) for z in zip(x, y)]
c = (
    Pie(init_opts=opts.InitOpts(width=\"1000px\", height=\"600px\", bg_color=\"#2c343c\"))
    .add(
        series_name=\"学历需求占比\",
        data_pair=data_pair,
        label_opts=opts.LabelOpts(is_show=False, position=\"center\", color=\"rgba(255, 255, 255, 0.3)\"),
    )
    .set_series_opts(
        tooltip_opts=opts.TooltipOpts(
            trigger=\"item\", formatter=\"{a} <br/>{b}: {c} ({d}%)\"
        ),
        label_opts=opts.LabelOpts(color=\"rgba(255, 255, 255, 0.3)\"),
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title=\"学历需求占比\",
            pos_left=\"center\",
            pos_top=\"20\",
            title_textstyle_opts=opts.TextStyleOpts(color=\"#fff\"),
        ),
        legend_opts=opts.LegendOpts(is_show=False),
    )
    .set_colors([\"#D53A35\", \"#334B5C\", \"#61A0A8\", \"#D48265\", \"#749F83\"])
)
c.render_notebook()

boss = df[\'经验\'].value_counts()
x = boss.index.tolist()
y = boss.values.tolist()
data_pair = [list(z) for z in zip(x, y)]
c = (
    Pie(init_opts=opts.InitOpts(width=\"1000px\", height=\"600px\", bg_color=\"#2c343c\"))
    .add(
        series_name=\"经验需求占比\",
        data_pair=data_pair,
        label_opts=opts.LabelOpts(is_show=False, position=\"center\", color=\"rgba(255, 255, 255, 0.3)\"),
    )
    .set_series_opts(
        tooltip_opts=opts.TooltipOpts(
            trigger=\"item\", formatter=\"{a} <br/>{b}: {c} ({d}%)\"
        ),
        label_opts=opts.LabelOpts(color=\"rgba(255, 255, 255, 0.3)\"),
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title=\"经验需求占比\",
            pos_left=\"center\",
            pos_top=\"20\",
            title_textstyle_opts=opts.TextStyleOpts(color=\"#fff\"),
        ),
        legend_opts=opts.LegendOpts(is_show=False),
    )
    .set_colors([\"#D53A35\", \"#334B5C\", \"#61A0A8\", \"#D48265\", \"#749F83\"])
)
c.render_notebook()

boss = df[\'公司领域\'].value_counts()
x = boss.index.tolist()
y = boss.values.tolist()
data_pair = [list(z) for z in zip(x, y)]
c = (
    Pie(init_opts=opts.InitOpts(width=\"1000px\", height=\"600px\", bg_color=\"#2c343c\"))
    .add(
        series_name=\"公司领域占比\",
        data_pair=data_pair,
        label_opts=opts.LabelOpts(is_show=False, position=\"center\", color=\"rgba(255, 255, 255, 0.3)\"),
    )
    .set_series_opts(
        tooltip_opts=opts.TooltipOpts(
            trigger=\"item\", formatter=\"{a} <br/>{b}: {c} ({d}%)\"
        ),
        label_opts=opts.LabelOpts(color=\"rgba(255, 255, 255, 0.3)\"),
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title=\"公司领域占比\",
            pos_left=\"center\",
            pos_top=\"20\",
            title_textstyle_opts=opts.TextStyleOpts(color=\"#fff\"),
        ),
        legend_opts=opts.LegendOpts(is_show=False),
    )
    .set_colors([\"#D53A35\", \"#334B5C\", \"#61A0A8\", \"#D48265\", \"#749F83\"])
)
c.render_notebook()

from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.faker import Faker
boss = df[\'经验\'].value_counts()
x = boss.index.tolist()
y = boss.values.tolist()
data_pair = [list(z) for z in zip(x, y)]

c = (
    Pie()
    .add(\"\", data_pair)
    .set_colors([\"blue\", \"green\", \"yellow\", \"red\", \"pink\", \"orange\", \"purple\"])
    .set_global_opts(title_opts=opts.TitleOpts(title=\"经验要求占比\"))
    .set_series_opts(label_opts=opts.LabelOpts(formatter=\"{b}: {c}\"))
)
c.render_notebook()

from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.faker import Faker
boss = df[\'经验\'].value_counts()
x = boss.index.tolist()
y = boss.values.tolist()
data_pair = [list(z) for z in zip(x, y)]

c = (
    Pie()
    .add(
        \"\",
        data_pair,
        radius=[\"40%\", \"55%\"],
        label_opts=opts.LabelOpts(
            position=\"outside\",
            formatter=\"{a|{a}}{abg|}\\n{hr|}\\n {b|{b}: }{c}  {per|{d}%}  \",
            background_color=\"#eee\",
            border_color=\"#aaa\",
            border_width=1,
            border_radius=4,
            rich={
                \"a\": {\"color\": \"#999\", \"lineHeight\": 22, \"align\": \"center\"},
                \"abg\": {
                    \"backgroundColor\": \"#e3e3e3\",
                    \"width\": \"100%\",
                    \"align\": \"right\",
                    \"height\": 22,
                    \"borderRadius\": [4, 4, 0, 0],
                },
                \"hr\": {
                    \"borderColor\": \"#aaa\",
                    \"width\": \"100%\",
                    \"borderWidth\": 0.5,
                    \"height\": 0,
                },
                \"b\": {\"fontSize\": 16, \"lineHeight\": 33},
                \"per\": {
                    \"color\": \"#eee\",
                    \"backgroundColor\": \"#334455\",
                    \"padding\": [2, 4],
                    \"borderRadius\": 2,
                },
            },
        ),
    )
    .set_global_opts(title_opts=opts.TitleOpts(title=\"python招聘经验要求\"))
    
)
c.render_notebook()

gsly = df[\'公司领域\'].value_counts()[:10]
x1 = gsly.index.tolist()
y1 = gsly.values.tolist()
c = (
    Bar()
    .add_xaxis(x1)
    .add_yaxis(
        \"公司领域\",
        y1
    )
    .set_global_opts(title_opts=opts.TitleOpts(title=\"公司领域\"),datazoom_opts=opts.DataZoomOpts())
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
)
c.render_notebook()

gsgm = df[\'公司规模\'].value_counts()[1:10]
x2 = gsgm.index.tolist()
y2 = gsgm.values.tolist()
c = (
    Bar()
    .add_xaxis(x2)
    .add_yaxis(
        \"公司规模\",
        y2
    )
    .set_global_opts(title_opts=opts.TitleOpts(title=\"公司规模\"),datazoom_opts=opts.DataZoomOpts())
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
)
c.render_notebook()

import stylecloud
from PIL import Image
welfares = df[\'福利\'].dropna(how=\'all\').values.tolist()
welfares_list = []
for welfare in welfares:
    welfares_list += welfare.split(\'，\')
pic_name = \'福利词云.png\'
stylecloud.gen_stylecloud(
    text=\' \'.join(welfares_list),
    font_path=\'msyh.ttc\',
    palette=\'cartocolors.qualitative.Bold_5\',
    max_font_size=100,
    icon_name=\'fas fa-yen-sign\',
    background_color=\'#212529\',
    output_name=pic_name,
    )
Image.open(pic_name)