基于PYTHON requests包,代理ip+header+延时爬取的简单实现

来源:互联网 发布:Java毫秒和日期的换算 编辑:程序博客网 时间:2024/06/06 03:33

附上代码,httpbin.org/ip可查看当前访问IP

# coding=utf-8import urllib2import randomimport timeimport requestsdef dl():    a1={    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64)'}    o_g=['114.239.3.149:808','61.232.254.39:3128','218.18.232.29:8080']    a=0    for a in range(0,3):        proxies_l = {'http': o_g[a],             }        print(proxies_l['http'])        try:            req=requests.get('http://httpbin.org/ip',headers=a1,proxies=proxies_l)            print('finish')            print (req.text)        except:            print('no proxies')        sleep_time=random.randint(1,3)        time.sleep(sleep_time)        print('Wait%ds'%sleep_time)dl()
原创粉丝点击