前言
QS世界大学排名(QS World University Rankings)是由英国一家国际教育市场咨询公司Quacquarelli Symonds(简称QS)所发表的年度世界大学排名
采集全球大学排名数据(源码已分享,求点赞)
import requests # 发送请求 import re import csv with open(\'rank.csv\', mode=\'a\', encoding=\'utf-8\', newline=\'\') as f: csv_writer = csv.writer(f) csv_writer.writerow([\'country\', \'rank\', \'region\', \'score_1\', \'score_2\', \'score_3\', \'score_4\', \'score_5\', \'score_6\', \'total_score\', \'stars\', \'university\', \'year\']) def replace(str_): str_ = re.findall(\'<div class=\"td-wrap\"><div class=\"td-wrap-in\">(.*?)</div></div>\', str_)[0] return str_ url = \'https://www.qschina.cn/sites/default/files/qs-rankings-data/cn/2057712_indicators.txt\' # 1. 发送请求 response = requests.get(url) # <Response [200]>: 请求成功 # 2. 获取数据 json_data = response.json() # Python 字典 # 3. 解析数据 # 字典 data_list = json_data[\'data\'] for i in data_list: country = i[\'location\'] # 国家/地区 rank = i[\'overall_rank\'] # 排名 region = i[\'region\'] # 大洲 score_1 = replace(i[\'ind_76\']) # 学术声誉 score_2 = replace(i[\'ind_77\']) # 雇主声誉 score_3 = replace(i[\'ind_36\']) # 师生比 score_4 = replace(i[\'ind_73\']) # 教员引用率 score_5 = replace(i[\'ind_18\']) # 国际教师 score_6 = replace(i[\'ind_14\']) # 国际学生 total_score = replace(i[\'overall\']) # 总分 stars = i[\'stars\'] # 星级 uni = i[\'uni\'] # 大学名称 university = re.findall(\'<div class=\"td-wrap\"><div class=\"td-wrap-in\"><a href=\".*?\" class=\"uni-link\">(.*?)</a></div></div>\', uni)[0] year = \"2021\" # 年份 print(country, rank, region, score_1, score_2, score_3, score_4, score_5, score_6, total_score, stars, university, year) with open(\'rank.csv\', mode=\'a\', encoding=\'utf-8\', newline=\'\') as f: csv_writer = csv.writer(f) csv_writer.writerow([country, rank, region, score_1, score_2, score_3, score_4, score_5, score_6, total_score, stars, university, year])
可视化展示
导入所需模块
from pyecharts.charts import * from pyecharts import options as opts from pyecharts.commons.utils import JsCode from pyecharts.components import Table import re import pandas as pd
导入数据
df = pd.read_csv(\'rank.csv\') # 香港,澳门与中国大陆地区等在榜单中是分开的记录的,这边都归为china df[\'loc\'] = df[\'country\'] df[\'country\'].replace([\'China (Mainland)\', \'Hong Kong SAR\', \'Taiwan\', \'Macau SAR\'],\'China\',inplace=True)
2021年世界大学排名(QS) TOP 100
bar = (Bar() .add_xaxis(university) .add_yaxis(\'\', score, category_gap=\'30%\') .set_global_opts(title_opts=opts.TitleOpts(title=\"2021年世界大学排名(QS) TOP 100\", pos_left=\"center\", title_textstyle_opts=opts.TextStyleOpts(font_size=20)), datazoom_opts=opts.DataZoomOpts(range_start=70, range_end=100, orient=\'vertical\'), visualmap_opts=opts.VisualMapOpts(is_show=False, max_=100, min_=60, dimension=0, range_color=[\'#00FFFF\', \'#FF7F50\']), legend_opts=opts.LegendOpts(is_show=False), xaxis_opts=opts.AxisOpts(is_show=False, is_scale=True), yaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=False), axisline_opts=opts.AxisLineOpts(is_show=False), axislabel_opts=opts.LabelOpts(font_size=12))) .set_series_opts(label_opts=opts.LabelOpts(is_show=True, position=\'right\', font_style=\'italic\'), itemstyle_opts={\"normal\": { \"barBorderRadius\": [30, 30, 30, 30], \'shadowBlur\': 10, \'shadowColor\': \'rgba(120, 36, 50, 0.5)\', \'shadowOffsetY\': 5, } } ).reversal_axis()) grid = ( Grid(init_opts=opts.InitOpts(theme=\'purple-passion\', width=\'1000px\', height=\'1200px\')) .add(bar, grid_opts=opts.GridOpts(pos_right=\'10%\', pos_left=\'20%\')) ) grid.render_notebook()
TOP 500中的中国大学
bar = (Bar() .add_xaxis(university) .add_yaxis(\'\', score, category_gap=\'30%\') .set_global_opts(title_opts=opts.TitleOpts(title=\"TOP 500中的中国大学\", pos_left=\"center\", title_textstyle_opts=opts.TextStyleOpts(font_size=20)), datazoom_opts=opts.DataZoomOpts(range_start=50, range_end=100, orient=\'vertical\'), visualmap_opts=opts.VisualMapOpts(is_show=False, max_=90, min_=20, dimension=0, range_color=[\'#00FFFF\', \'#FF7F50\']), legend_opts=opts.LegendOpts(is_show=False), xaxis_opts=opts.AxisOpts(is_show=False, is_scale=True), yaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=False), axisline_opts=opts.AxisLineOpts(is_show=False), axislabel_opts=opts.LabelOpts(font_size=12))) .set_series_opts(label_opts=opts.LabelOpts(is_show=True, position=\'right\', font_style=\'italic\'), itemstyle_opts={\"normal\": { \"barBorderRadius\": [30, 30, 30, 30], \'shadowBlur\': 10, \'shadowColor\': \'rgba(120, 36, 50, 0.5)\', \'shadowOffsetY\': 5, } } ).reversal_axis()) grid = ( Grid(init_opts=opts.InitOpts(theme=\'purple-passion\', width=\'1000px\', height=\'1200px\')) .add(bar, grid_opts=opts.GridOpts(pos_right=\'10%\', pos_left=\'20%\')) ) grid.render_notebook()
TOP 1000高校按大洲分布
t_data = df[(df.year==2021) & (df[\'rank\']<=1000)] t_data = t_data.groupby([\'region\'])[\'university\'].count().reset_index() t_data.columns = [\'region\', \'num\'] t_data = t_data.sort_values(by=\"num\" , ascending=False) bar = (Bar(init_opts=opts.InitOpts(theme=\'purple-passion\', width=\'1000px\', height=\'600px\')) .add_xaxis(t_data[\'region\'].tolist()) .add_yaxis(\'出现次数\', t_data[\'num\'].tolist(), category_gap=\'50%\') .set_global_opts(title_opts=opts.TitleOpts(title=\"TOP 1000高校按大洲分布\", pos_left=\"center\", title_textstyle_opts=opts.TextStyleOpts(font_size=20)), visualmap_opts=opts.VisualMapOpts(is_show=False, max_=300, min_=0, dimension=1, range_color=[\'#00FFFF\', \'#FF7F50\']), legend_opts=opts.LegendOpts(is_show=False), xaxis_opts=opts.AxisOpts(axistick_opts=opts.AxisTickOpts(is_show=False), axisline_opts=opts.AxisLineOpts(is_show=False), axislabel_opts=opts.LabelOpts(font_size=15)), yaxis_opts=opts.AxisOpts(is_show=False)) .set_series_opts(label_opts=opts.LabelOpts(is_show=True, position=\'top\', font_size=15, font_style=\'italic\'), itemstyle_opts={\"normal\": { \"barBorderRadius\": [30, 30, 30, 30], \'shadowBlur\': 10, \'shadowColor\': \'rgba(120, 36, 50, 0.5)\', \'shadowOffsetY\': 5, } } )) bar.render_notebook()
TOP 1000高校按国家分布
fmt_js = \"\"\"function (params) {return params.name+\': \'+Number(params.value[2]);}\"\"\" mp = Map() mp.add( \"高校数量\", data_pair, \"world\", is_map_symbol_show=False, is_roam=False) mp.set_series_opts(label_opts=opts.LabelOpts(is_show=False), itemstyle_opts={\'normal\': { \'areaColor\': \'#191970\', \'borderColor\': \'#1773c3\', \'shadowColor\': \'#1773c3\', \'shadowBlur\': 20, \'opacity\': 0.8 } }) mp.set_global_opts( title_opts=opts.TitleOpts(title=\"TOP 1000高校按国家分布\", pos_left=\'center\', title_textstyle_opts=opts.TextStyleOpts(font_size=18)), legend_opts=opts.LegendOpts(is_show=False), visualmap_opts=opts.VisualMapOpts(is_show=False, max_=100, is_piecewise=False, dimension=0, range_color=[\'rgba(255,228,225,0.6)\', \'rgba(255,0,0,0.9)\', \'rgba(255,0,0,1)\']) ) data_pair = [[x, y] for x, y in data_pair if x in country_list] geo = Geo() # 需要先将几个国家的经纬度信息加入到geo中 for k, v in loc.items(): geo.add_coordinate(k, v[0], v[1]) # 这里将geo的地图透明度配置为0 geo.add_schema(maptype=\"world\", is_roam=False, itemstyle_opts={\'normal\': {\'opacity\': 0}}) geo.add(\"\", data_pair, symbol_size=1) # 显示标签配置 geo.set_series_opts( label_opts=opts.LabelOpts( is_show=True, position=\'right\', color=\'white\', font_size=12, font_weight=\'bold\', formatter=JsCode(fmt_js)), ) grid = ( Grid(init_opts=opts.InitOpts(theme=\'chalk\', width=\'1000px\', height=\'600px\')) .add(mp, grid_opts=opts.GridOpts(pos_top=\"12%\")) .add(geo, grid_opts=opts.GridOpts(pos_bottom=\"12%\")) ) grid.render_notebook()
大洲-国家分布
c = (Sunburst( init_opts=opts.InitOpts( theme=\'purple-passion\', width=\"1000px\", height=\"1000px\")) .add( \"\", data_pair=data_pair, highlight_policy=\"ancestor\", radius=[0, \"100%\"], sort_=\'null\', levels=[ {}, { \"r0\": \"20%\", \"r\": \"48%\", \"itemStyle\": {\"borderColor\": \'rgb(220,220,220)\', \"borderWidth\": 2} }, {\"r0\": \"50%\", \"r\": \"80%\", \"label\": {\"align\": \"right\"}, \"itemStyle\": {\"borderColor\": \'rgb(220,220,220)\', \"borderWidth\": 1}} ], ) .set_global_opts( visualmap_opts=opts.VisualMapOpts(is_show=False, max_=300, min_=0, is_piecewise=False, range_color=[\'#4285f4\', \'#34a853\', \'#fbbc05\', \'#ea4335\', \'#ea4335\']), title_opts=opts.TitleOpts(title=\"TOP 1000\\n\\n大学地理分布\", pos_left=\"center\", pos_top=\"center\", title_textstyle_opts=opts.TextStyleOpts(font_style=\'oblique\', font_size=20),)) .set_series_opts(label_opts=opts.LabelOpts(font_size=14, formatter=\"{b}: {c}\")) ) c.render_notebook()
对于本篇文章有疑问的同学可以加【资料白嫖、解答交流群:753182387】
来源:https://www.cnblogs.com/qshhl/p/16469988.html
本站部分图文来源于网络,如有侵权请联系删除。