- 阅读权限
- 255
- 威望
- 0 级
- 论坛币
- 18999 个
- 通用积分
- 1029.7754
- 学术水平
- 146 点
- 热心指数
- 166 点
- 信用等级
- 135 点
- 经验
- 36357 点
- 帖子
- 541
- 精华
- 0
- 在线时间
- 887 小时
- 注册时间
- 2015-9-25
- 最后登录
- 2025-12-4
|
你的代码太麻烦了,感觉你应该是从python阵营转过来的。我今天也编写了一个,比你的简单些
- library(rvest)
- library(stringr)
- site1 <- 'https://www.zhipin.com/c101280600-p100104/?page=%s&ka=page-%s'
- salary=job=adr=ed=exper=demand=HR_name=
- HR_job=com_name=hangye=guimo=ziben=date=NULL
- for(i in 1:10){
- site <- sprintf(site1,i,i)
- web <- read_html(site)
- salary <-c(salary,web%>%html_nodes('.info-primary>.name')%>%html_text()%>%
- str_extract('\\d{1,}K-\\d{1,}K'))
- job <- c(job,web%>%html_nodes('.info-primary>.name')%>%html_text()%>%
- str_replace('\\d{1,}K-\\d{1,}K','')%>%str_trim())
- tt1=web%>%html_nodes('.info-primary>p')%>%html_text()
- tt12=str_split(tt1,'\\d-\\d{1,}年|经验不限|1年以内')
- n=length(tt12)
- adr1=ed1=rep(NA,n)
- for(i in 1:n) {
- adr1[i]=tt12[[i]][1]
- ed1[i]=tt12[[i]][2]
- }
- exper1=str_extract(tt1,'\\d-\\d{1,}年|经验不限|1年以内')
- adr=c(adr,adr1);ed=c(ed,ed1);exper=c(exper,exper1)
-
- demand1=web%>%
- html_nodes('.job-list > ul:nth-child(2) > li> a:nth-child(1) >
- div> span')
- nn=which(str_length(demand1)>22)
- ndemand = length(which(str_length(demand1)>22))
- demand2= demand1[1:(nn[1]-1)]%>%html_text()%>%str_c(collapse=' ')
- for(i in 2:ndemand) demand2 = c(demand2,
- demand1[(nn[i-1]+1):(nn[i]-1)]%>%html_text()%>%
- str_c(collapse=' '))
- demand = c(demand,demand2)
-
- tt3=web%>%html_nodes('.job-author>p')
- HR_name=c(HR_name,as.character(tt3)%>%str_extract('p>[\\s\\S]*<em')%>%
- str_sub(3,-4))
- HR_job=c(HR_job,as.character(tt3)%>%str_extract('em>[\\s\\S]*<img')%>%
- str_sub(4,-5))
-
- com_name = c(com_name,web%>%html_nodes('.company-text>h3')%>%html_text()%>%
- str_trim())
- com_ifo = web%>%html_nodes('.company-text>p')%>%as.character()%>%
- str_split('<em class=\"vline\"></em>')
-
- ncom_ifo = length(com_ifo)
- hangye1 = ziben1 = guimo1 =rep(NA,ncom_ifo)
- for(i in 1:ncom_ifo){
- hangye1[i] = com_ifo[[i]][1]%>%str_sub(4,-1)
- guimo1[i] = com_ifo[[i]][length(com_ifo[[i]])]%>%str_sub(0,-5)
- ziben1[i] = ifelse(length(com_ifo[[i]])==2,
- NA,com_ifo[[i]][2])
- }
- hangye = c(hangye,hangye1);guimo=c(guimo,guimo1);ziben=c(ziben,ziben1)
- }
- data=data.frame(公司=com_name,职位=job,薪资=salary,学历=ed,经验=exper,要求=demand,
- 地点=adr,HR姓名=HR_name,HR职位=HR_job,行业=hangye,类型=ziben,
- 规模=guimo)
-
- DT::datatable(data)
复制代码
|
-
总评分: 学术水平 + 1
热心指数 + 1
查看全部评分
|