来源:互联网 发布:手机红外线水平仪软件 编辑:程序博客网 时间:2024/04/29 07:18
#coding: utf-8import urllib2import urllibimport reimport threadimport timeclass HTML_Tool:    BgnCharToNoneRex = re.compile("(\t|\n| |<a.*?>|<img.*?>)")    CharToNewLineRex = re.compile("(<br/>)")    def Replace_Char(self,x):        x = self.BgnCharToNoneRex.sub("",x)        x = self.CharToNewLineRex.sub("\n",x)        return xclass HTML_Model:    def __init__(self):        self.page=1        self.pages=[]        self.myTool=HTML_Tool()        self.enable=False    def GetPage(self,page):        myUrl="http://m.qiushibaike.com/hot/page/"+page        myResponse=urllib2.urlopen(myUrl)        myPage=myResponse.read()        unicodePage=myPage.decode("utf-8")        myItems=re.findall('<div.*?class="content".*?title="(.*?)">(.*?)</div>',unicodePage,re.S)        items=[]        for item in myItems:            #items.append([item[0],item[1]])            items.append([item[0].replace("\n",""),item[1].replace("\n","")])        return items    def LoadPage(self):        while self.enable:            if len(self.pages)<2:                try:                    myPage=self.GetPage(str(self.page))                    self.page+=1                    self.pages.append(myPage)                except:                    print "无法链接!"            else:                time.sleep(1)    def ShowPage(self,q,page):        for items in q:            print u"第%d页" % page,items[0]            print items[1]            myInput=raw_input()            if myInput=="quit":                self.enable=False                break    def Start(self):        self.enable=True        page=self.page        print u'正在加载中请稍候......'        thread.start_new_thread(self.LoadPage,())        while self.enable:            if self.pages:                nowPage=self.pages[0]                del self.pages[0]                self.ShowPage(nowPage,page)                page+=1print u'请按下回车浏览内容:'raw_input(' ')myModel = HTML_Model()myModel.Start()

0 0
原创粉丝点击