python爬取糗事百科

来源:互联网 发布:mac air使用教程视频 编辑:程序博客网 时间:2024/06/05 18:58

转载:静觅 »Python爬虫实战一之爬取糗事百科段子

#!/usr/bin/env python# _*_coding:utf-8 _*_# @Time     :2017/8/21 23:32# @Author   :luoyu_bie# @File     :QsBaike.py# @Software :PyCharm Community Editionimport urllib2import repage = 1url = "http://www.qiushibaike.com/hot/page/"+str(page)agent = "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0"headers = {"User-Agent":agent}req = urllib2.Request(url,headers=headers)response = urllib2.urlopen(req)html = response.read()#编译正则表达式pattern = re.compile('<div class="author clearfix">.*?<h2>(.*?)</h2>.*?<div class="content">.*?<span>(.*?)</span>.*?<div class="stats">.*?<i class="number">(\d)</i>',re.S)items = re.findall(pattern,html)for item in items:    print "发布者:"+item[0].strip()+"\n","段子:"+"\n"+item[1].strip().replace("<br/>","\n")+"\n"+"点赞数:"+item[2]+"\n"+"*"*10