python 爬虫demo

来源:互联网 发布:muse for mac 编辑:程序博客网 时间:2024/06/06 00:50

3.3版如下
import re
from urllib import request

def getHtml(url):
headers = {‘User-Agent’: r’Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ‘,
‘Referer’: r’http://www.lagou.com/zhaopin/Python/?labelWords=label‘,
‘Connection’: ‘keep-alive’
}
req = request.Request(url, headers=headers)
page = request.urlopen(req).read()
page = page.decode(‘utf-8’)
return page

def getImg(html):
reg = r’src=”(.+?.jpg)” pic_ext’
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
return imglist

html = getHtml(“http://tieba.baidu.com/p/2460150866“)
print(getImg(html))
f=open(‘f.txt’,’w’)
html2 = ” “.join(getImg(html))
f.write(html2)
f.close()


2.7版如下
import re
import urllib

def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html

def getImg(html):
reg = r’src=”(.+?.jpg)” pic_ext’
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
return imglist

html = getHtml(“http://tieba.baidu.com/p/2460150866“)
print getImg(html)
f=open(‘f.txt’,’w’)
html2 = ” “.join(getImg(html)) //列表转字符串
f.write(html2)
f.close()

原创粉丝点击