import urllib import requests import time import random from bs4 import BeautifulSoup from lxml import etree #xpath from requests.exceptions import RequestException import xlwt #excel操作
import pandas as pd from pyecharts.charts import Geo from pyecharts.charts import Bar from pyecharts import options as opts import re from pyecharts.globals import GeoType from pyecharts.render import make_snapshot from snapshot_selenium import snapshot
defcity_counter(): """ 全国工作分布热力图 """ datas = pd.read_excel('jobData.xls', encoding = 'utf-8') datas_copy = datas datas_copy['工作地点'] = datas_copy['工作地点'].apply(lambda x :x.split(' ')[0]) grouped_city = datas_copy.groupby(datas['工作地点']) grouped_city_count = grouped_city['工作地点'].agg(['count']) # 对城市数量进行统计 grouped_city_count.reset_index(inplace = True) # 若不进行此操作,将会报错 city_data = [(grouped_city_count['工作地点'][i], grouped_city_count['count'][i]) for i in range(grouped_city_count.shape[0])] #print(city_data) attr = [] value = [] for obj in city_data: attrSub = re.findall(r'[(](.*?)[)]', str(obj))[0].split(',')[0] attr.append(attrSub.split("'")[1]) value.append(int(re.findall(r'[(](.*?)[)]', str(obj))[0].split(',')[1].strip())) #print(attr) #print(value) # 全国工作分布热力图 geo = Geo() geo = ( Geo() .add_schema(maptype="china") .add("岗位数",[list(z) for z in zip(attr, value)],symbol_size = 20) .set_series_opts(label_opts=opts.LabelOpts(is_show=False)) #设置图例不可见 .set_global_opts(visualmap_opts=opts.VisualMapOpts(min_=0,max_=40),title_opts=opts.TitleOpts(title="自动驾驶职位招聘地理位置")) ) #geo.render('自动驾驶岗位全国热力图.html')
import pandas as pd from pyecharts.charts import Bar from pyecharts import options as opts import re from pyecharts.render import make_snapshot from snapshot_selenium import snapshot
defdeal_salary(salary_data): if re.match('.*K',salary_data): # 计算平均工资 return int(float((float(re.findall(r'(.*?)K',salary_data)[0].split('-')[0])*1000+\ float(re.findall(r'(.*?)K',salary_data)[0].split('-')[1])*1000)/2))
defcity_salary(): """ 每个城市的平均工资条形统计图 """ datas = pd.read_excel('jobData.xls', encoding = 'utf-8') datas_copy = datas datas_copy['工作地点'] = datas_copy['工作地点'].apply(lambda x :x.split(' ')[0]) datas_copy = datas_copy[~datas_copy['工资'].str.contains('天')] # 除去实习的数据 datas_copy = datas_copy[~datas_copy['工资'].isna()] # 除去工资为空的值 datas_copy['工资'] = datas_copy['工资'].apply(lambda x :deal_salary(x)) grouped_city_salary = datas_copy['工资'].groupby(datas_copy['工作地点']) salary_month = grouped_city_salary.agg(['mean']) #计算各个城市的平均工资 salary_month.reset_index(inplace = True) attr = [] value = [] for i in range(salary_month.shape[0]): attr.append(salary_month['工作地点'][i]) value.append(str(salary_month['mean'][i])) #将 int 数字转化为 string才能显示图片 bar = ( Bar() .add_xaxis(attr) .add_yaxis("工资(元)",value) .set_global_opts(title_opts=opts.TitleOpts(title="城市平均工资统计图")) .set_series_opts( label_opts=opts.LabelOpts(is_show=False), markline_opts=opts.MarkLineOpts( data=[ opts.MarkLineItem(type_="average", name="平均值") ] ), markpoint_opts=opts.MarkPointOpts( data=[ opts.MarkPointItem(type_="max", name="最大值"), opts.MarkPointItem(type_="min", name="最小值"), ] ) ) ) return bar
import pandas as pd from pyecharts.charts import Page, Pie from pyecharts import options as opts import re from pyecharts.render import make_snapshot from snapshot_selenium import snapshot
defedu_require(): """ 学历要求饼图 """ datas = pd.read_excel('jobData.xls', encoding = 'utf-8') datas_copy = datas datas_copy = datas_copy[~datas_copy['学历要求'].str.contains('个月')] # 去掉实习工作 grouped_eductaion = datas_copy.groupby(datas_copy['学历要求']) # 按学历分组 grouped_eductaion_count = grouped_eductaion['学历要求'].agg(['count']) # 统计不同分组的数量 grouped_eductaion_count.reset_index(inplace = True) # edu_data = [(grouped_eductaion_count['学历要求'][i], grouped_eductaion_count['count'][i]) for i in range(grouped_eductaion_count.shape[0])] attr = [] value = [] for i in range(grouped_eductaion_count.shape[0]): attr.append(grouped_eductaion_count['学历要求'][i]) value.append(str(grouped_eductaion_count['count'][i])) #将 int 数字转化为 string才能显示图片 pie = ( Pie() .add( "", [list(z) for z in zip(attr, value)], radius=["40%", "75%"] ) .set_global_opts( title_opts=opts.TitleOpts(title="学历要求"), legend_opts=opts.LegendOpts( orient="vertical", pos_top="15%", pos_left="2%" ) ) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) ) return pie
defexp_require(): """ 工作经验要求饼图 """ datas = pd.read_excel('jobData.xls', encoding = 'utf-8') datas_copy = datas datas_copy = datas_copy[~datas_copy['工作经验要求'].str.contains('天/周')] # 去掉实习工作 grouped_exp = datas_copy.groupby(datas_copy['工作经验要求']) # 按工作经验分组 grouped_exp_count = grouped_exp['工作经验要求'].agg(['count']) # 统计不同分组的数量 grouped_exp_count.reset_index(inplace = True) attr = [] value = [] for i in range(grouped_exp_count.shape[0]): attr.append(grouped_exp_count['工作经验要求'][i]) value.append(str(grouped_exp_count['count'][i])) #将 int 数字转化为 string才能显示图片 pie = ( Pie() .add( "", [list(z) for z in zip(attr, value)], radius=["40%", "75%"] ) .set_global_opts( title_opts=opts.TitleOpts(title="工作经验要求"), legend_opts=opts.LegendOpts( orient="vertical", pos_top="15%", pos_left="2%" ) ) .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}")) ) return pie
import pandas as pd from pyecharts import options as opts from pyecharts.charts import Page, WordCloud from pyecharts.globals import SymbolType import jieba import re from collections import Counter
defjob_title(): datas = pd.read_excel('jobData.xls', encoding = 'utf-8') datas_copy = datas text = ''.join(datas_copy['岗位']) requirements = [word for word in jieba.cut(text, cut_all=True)] # 全模式分词 requirements_top = Counter(requirements) # Counter()函数对str进行统计 requirements_top50 = requirements_top.most_common(50) # 统计最多的50个 ,most_common返回(string , count)的数据形式 c = ( WordCloud() .add("", requirements_top50, word_size_range=[20, 100], shape=SymbolType.DIAMOND) .set_global_opts(title_opts=opts.TitleOpts(title="主要岗位名称")) ) return c
[1] LI D, MEI H, YI S, et al. ECharts: A declarative framework for rapid construction of web-based visualization ☆ [J]. Visual Informatics, 2018, S2468502X18300068-.