爬虫05 BeautifulSoup4初体验

来源:互联网 发布:国际通用聊天软件 编辑:程序博客网 时间:2024/06/05 03:54
# -*- coding: utf-8 -*-import sysreload(sys)sys.setdefaultencoding( "utf-8" )import urllibimport urllib2import refrom bs4 import BeautifulSouppage = 1url = 'http://www.qiushibaike.com/8hr/page/%d/?s=4908781' %pageuser_agent="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0"headers = { 'User-Agent' : user_agent }request = urllib2.Request(url,headers=headers)response = urllib2.urlopen(request)back=response.read()soup= BeautifulSoup(back,'html.parser',from_encoding='utf-8')#print backcontents=soup.find_all("div","content")f=open("糗事百科"+str(page)+".txt","w")for content in contents:    print content.get_text()    f.write(content.get_text())
0 0
原创粉丝点击