Beautifulsoup 小用

来源:互联网 发布:mac 卸载 编辑:程序博客网 时间:2024/04/29 14:37

用 beautifulsoup 爬了下伯克利大学 programming languages  and compilers 的课件


import reimport requestsfrom bs4 import BeautifulSoupr = requests.get( "http://inst.eecs.berkeley.edu/~cs164/fa11/lectures/index.html" )soup = BeautifulSoup( r.text, "html.parser" )for elem in soup.findAll( name = "a", attrs = { "href" : re.compile( "lecture[0-9]*.pdf" ) } ):            file_name = elem["href"][:-4] + "-" +\                reduce( lambda a, b: a + " " + b,                        elem.find_parent().find_previous_sibling().get_text().split( ":" ) ) + ".pdf"    file_url = "http://inst.eecs.berkeley.edu/~cs164/fa11/lectures/" + elem["href"]    file_get = requests.get( file_url, stream = True )    with open( file_name, "wb" ) as f:        for chunk in file_get.iter_content( chunk_size = 1024 ):            if chunk:                f.write( chunk )


0 0
原创粉丝点击