爬虫06 代码封装(以爬虫04为基础)

来源:互联网 发布:章淘客cms 编辑:程序博客网 时间:2024/05/20 05:54

爬虫04的代码对于coder来说更易读些,但是对于面向对象而言还需进行封装,仅以此作为示范

# -*- coding: utf-8 -*-import urllibimport urllib2import reclass QSBK:    def __init__(self):        self.page = 1        self.url = 'http://www.qiushibaike.com/8hr/page/%d/?s=4908781' %self.page        self.user_agent="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0"        self.headers = { 'User-Agent' : self.user_agent }    def get_response(self,url,headers):        request = urllib2.Request(self.url,headers=self.headers)        response = urllib2.urlopen(request)        back=response.read()        return back    def get_joke(self):        back=self.get_response(self.url,self.headers)        imglist=re.findall(r'<div[^>]class="content">\n\n([^<]+)<[^>]+.+\n\n[^<]',back)        return imglist    def write_joke(self):        imglist=self.get_joke()        f = open('糗事百科'+'.txt', 'w')        for joke in imglist:            print joke            f.write(joke)    def _main_(self):        self.write_joke()spyider=QSBK()spyider._main_()





0 0
原创粉丝点击