渲染类

来源:互联网 发布:mac上编写c语言的软件 编辑:程序博客网 时间:2024/06/12 22:23
# -*- coding: utf-8 -*-import reimport csvimport timetry:    from PySide.QtGui import QApplication    from PySide.QtCore import QUrl, QEventLoop, QTimer    from PySide.QtWebKit import QWebViewexcept ImportError:    from PyQt4.QtGui import QApplication    from PyQt4.QtCore import QUrl, QEventLoop, QTimer    from PyQt4.QtWebKit import QWebViewimport lxml.htmlclass BrowserRender(QWebView):    def __init__(self, display=True):        self.app = QApplication([])        QWebView.__init__(self)        if display:            self.show()  # show the browser    def open(self, url, timeout=60):        """Wait for download to complete and return result"""        loop = QEventLoop()        timer = QTimer()        timer.setSingleShot(True)        timer.timeout.connect(loop.quit)        self.loadFinished.connect(loop.quit)        self.load(QUrl(url))        timer.start(timeout * 1000)        loop.exec_()  # delay here until download finished        if timer.isActive():            # downloaded successfully            timer.stop()            return self.html()        else:            # timed out            print 'Request timed out:', url    def html(self):        """Shortcut to return the current HTML"""        return self.page().mainFrame().toHtml()    def find(self, pattern):        """Find all elements that match the pattern"""        return self.page().mainFrame().findAllElements(pattern)    def attr(self, pattern, name, value):        """Set attribute for matching elements"""        for e in self.find(pattern):            e.setAttribute(name, value)    def text(self, pattern, value):        """Set attribute for matching elements"""        for e in self.find(pattern):            e.setPlainText(value)    def click(self, pattern):        """Click matching elements"""        for e in self.find(pattern):            e.evaluateJavaScript("this.click()")    def wait_load(self, pattern, timeout=60):        """Wait for this pattern to be found in webpage and return matches"""        deadline = time.time() + timeout        while time.time() < deadline:            self.app.processEvents()            matches = self.find(pattern)            if matches:                return matches        print 'Wait load timed out'def main():    br = BrowserRender()    br.open('http://example.webscraping.com/search')    br.attr('#search_term', 'value', '.')    br.text('#page_size option:checked', '1000')    br.click('#search')    elements = br.wait_load('#results a')    writer = csv.writer(open('countries.csv', 'w'))    for country in [e.toPlainText().strip() for e in elements]:        writer.writerow([country])if __name__ == '__main__':    main()
原创粉丝点击