python requests用接口爬拉钩网职位信息

来源:互联网 发布:linux 大文件中查找 编辑:程序博客网 时间:2024/05/17 17:15
# -*- coding:utf-8 -*-# 参考:http://docs.python-requests.org/zh_CN/latest/user/quickstart.html# 请求返回的是dict套接的形式,可用iteritems或者.json()转化成列表读取__author__ = 'binsen'import sys,requests,jsonreload(sys)sys.setdefaultencoding('utf-8')kong = []for page in range(1, 15):    url = 'https://www.lagou.com/jobs/positionAjax.json'    # proxies = {"http":"http://125.105.17.229:808"}    headers = {'Accept':'application/json, text/javascript, */*; q=0.01',               'Accept-Encoding':'gzip, deflate, br',               'Accept-Language':'zh-CN,zh;q=0.8',               'Connection':'keep-alive',               'Content-Length':'55',               'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',               'Cookie': '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~',               'Host':'www.lagou.com',               'Origin':'https://www.lagou.com',               'Referer':'https://www.lagou.com/jobs/list_%E8%BD%AF%E4%BB%B6%E6%B5%8B%E8%AF%95?city=%E4%B8%8A%E6%B5%B7\&cl=false&fromSearch=true&labelWords=&suginput=',               'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.28\40.99 Safari/537.36',               'X-Anit-Forge-Code':'0',               'X-Anit-Forge-Token':'None',               'X-Requested-With':'XMLHttpRequest'               }    payload = {'px':'default','city':'杭州','needAddtionalResult':'false','first':'true', 'pn':page, 'kd':'python'}    response = requests.post(url,data = payload,headers = headers) # 开启了鉴权的接口,加auth和HTTPBasicAuth    wenben = response.text # 打印出文本       # response = requests.post(url,data = payload,proxies = proxies)    # print type(response.content),type(wenben) # content取的是二进制str数据,可以取到图片,文件等数据,text取unicode的文本    # print response.status_code    response_json = response.json() # 从接口返回值中取出json字符串转成python字符串。此处也可用json.loads(wenben)反序列化转成python对象    # print response_json # 多条公司的招聘数据,后续迭代出来    res = response_json['content']['positionResult']['result']    # print type(res) list    print '正在爬取拉钩网第 %s 页の %s 职位的信息...' % (str(page), payload['kd'])    for x in res:        xinxi = "岗位:%s,公司名:%s,规模:%s,业务:%s,工作年限:%s,待遇:%s" % (x['positionName'],x['companyFullName'],\x['companySize'],x['industryField'],x['workYear'],x['salary'])        kong.append(xinxi)with open('C:/Users/bin.sun/Desktop/lagou.txt', 'a') as file:    for index,i in enumerate(kong, start=1):        index1 = '%03d' % index        shuju = index1 + ' ' + i + '\n'        file.write(shuju)
原创粉丝点击