Python代理实现

来源:互联网 发布:学生会的一己之见 知乎 编辑:程序博客网 时间:2024/05/16 09:37



这两天在看python代理IP实现,终于敲定了,哈哈,看代码

from bs4 import BeautifulSoupimport requestsimport randomimport urllib2def get_ip_list(url, headers):    web_data = requests.get(url, headers=headers)    soup = BeautifulSoup(web_data.text, 'lxml')    ips = soup.find_all('tr')    ip_list = []    for i in range(1, len(ips)):        ip_info = ips[i]        tds = ip_info.find_all('td')        ip_list.append(tds[1].text + ':' + tds[2].text)    return ip_listdef get_random_ip(ip_list):    proxy_list = []    for ip in ip_list:        proxy_list.append('http://' + ip)    proxy_ip = random.choice(proxy_list)    proxies = {'http': proxy_ip}    return proxiesif __name__ == '__main__':    url = 'http://www.xicidaili.com/nn/'    headers = {        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'    }    ip_list = get_ip_list(url, headers=headers)    proxies = get_random_ip(ip_list)    print(proxies)    enable_proxy = Trueproxy_handler = urllib2.ProxyHandler(proxies)null_proxy_handler = urllib2.ProxyHandler({})if enable_proxy:    opener = urllib2.build_opener(proxy_handler)else:    opener = urllib2.build_opener(null_proxy_handler)    urllib2.install_opener(opener)response = urllib2.urlopen('http://www.baidu.com', timeout=10)print(response.read())


1 0