批量下载matplotlib网站实例源码python脚本

来源:互联网 发布:丁丁软件 编辑:程序博客网 时间:2024/06/05 15:46
模块功能描述:
该模块是为了批量下载matplotlib网站实例源码而设计。
getUrlList():函数是为了获取每个实例的url列表

GetDemoDownload():下载每个实例源码包括py与ipynb文件

#coding=utf8 '''作者:ewang日期:2017/8/14模块功能描述:    该模块是为了批量下载matplotlib网站实例源码而设计。getUrlList():函数是为了获取每个实例的url列表GetDemoDownload():下载每个实例源码包括py与ipynb文件'''from selenium import webdriverfrom selenium.webdriver.support.ui import WebDriverWait import osimport urllib2,rePATH=lambda p:os.path.abspath(os.path.join(    os.path.dirname(__file__), p))class downMatplotlibDemo():    def __init__(self):        self.urlList=self.getUrlList()        self.driver=webdriver.Chrome()         self.driver.maximize_window()         self.GetDemoDownload()        self.driver.close()               def getUrlList(self):        try:            url="http://matplotlib.org/devdocs/api/_as_gen/matplotlib.pyplot.subplots.html#matplotlib.pyplot.subplots"                matutl="http://matplotlib.org/devdocs/gallery/"            pageContent=urllib2.urlopen(url).read()            if pageContent:                linkList=re.findall('class="reference internal" href="../../gallery/(.*?)"><span class="std std-ref">(.*?)</span></a>', pageContent, re.S)                 charList=[matutl+var[0] for var in linkList if len(linkList)>0]                return charList        except Exception,e:            print "Create UrlList Error:",e                   def GetDemoDownload(self):        count=0             for url in self.urlList:                 self.driver.get(url)            js="var q=document.body.scrollTop=200000"            self.driver.execute_script(js)            try:                downLoadBtnList=WebDriverWait(self.driver,5).until(lambda driver:driver.find_elements_by_partial_link_text('Download'))            except Exception,e:                print "Download not exist:",e                        try:                if len(downLoadBtnList)>0:                    for downLoad in downLoadBtnList:                          downurl=downLoad.get_attribute("href")                        if downurl:                            fileName=downurl.split("/")[-1]                            if fileName:                                filePath=PATH('./sourceCode/')                                if os.path.exists(filePath):                                    pass                                else:                                    os.mkdir(filePath)                                fileWithPath=PATH(filePath+'\\'+fileName)                                if not os.path.exists(fileWithPath):                                    with open(fileWithPath,"wb+") as FH:                                                           pageConet=urllib2.urlopen(downurl).read()                                         FH.write(pageConet)                                else:                                    print "the file with path is exists...."                            else:                                print "The file name is null!"                        else:                            print "the download url is null!"            except Exception,e:                print "Download List:",e                          count+=1                  print count,"\t url=",downurl                                                 if __name__=="__main__":    downMatplotlibDemo()


原创粉丝点击