GitHub目录下载

来源:互联网 发布:php 单双引号转义 编辑:程序博客网 时间:2024/06/05 10:42

有时只想下载GIthub上某个大项目下面的一个文件夹,真心麻烦,还要把整个项目clone下来,当然也有使用svn项目检出功能的。本人不喜欢安装太多软件,就用python写了一个脚本来完成。

from bs4 import BeautifulSoupimport requests, os, sys, timeclass DownloadDir(object):    def __init__(self, storDir, repoUrl):        self.repoUrl = repoUrl        self.storDir = storDir        self.sess = requests.Session()    def run(self):        self._run(self.repoUrl)    def _run(self, url):        response = self.sess.get(url)        res = self.parseStruct(response.text)        if isinstance(res, list):            for r in res:                newUrl = url + '/' + str(r[0])                self._run(newUrl)        else:            filePath, (fileType, content) = res.popitem()            self.writeContent(filePath, fileType, content)        print(url)    def parseContent(self, html):        textDict = {}        self.bs = BeautifulSoup(html, "html.parser")                filePath = self.bs.select(".file-navigation .breadcrumb")[0].text.strip()        content = self.bs.select(".file .data .image")        if content:            content = content.select("a")[0].attr("href")            fileType = 'url'        else:            content = self.bs.select(".file .data")[0].text            fileType = 'text'                    textDict[filePath] = [fileType, content]        return textDict    def parseStruct(self, html):        struct = []        self.bs = BeautifulSoup(html, "html.parser")        if self.bs.select(".file .data"):            return self.parseContent(html)        currDir = self.bs.select(".file-navigation .breadcrumb")[0].text.strip()        res = self.bs.select(".file-wrap .files tr.js-navigation-item")        for node in res:            if node == '\n' or not node.select("td.content"):                continue            try:                name = node.select(".content")[0].text.strip()                isFile = False if node.select(".octicon-file-directory") else True                struct.append([name, isFile, currDir])            except:                continue        return struct      def writeContent(self, filePath, fileType, content):        filePath = os.path.join(self.storDir, filePath)        print(filePath)        mode = 'w'        if fileType == 'url':            res = self.sess.get(content)            content = res.content            mode = 'wb'        if not os.path.exists(os.path.dirname(filePath)):            os.makedirs(os.path.dirname(filePath))        with open(filePath, mode) as f:            f.write(content)if __name__ == "__main__":    dd = DownloadDir("c:\\","https://github.com/baoboa/pyqt5/tree/master/examples")    dd.run()
原创粉丝点击