利用requests和BeautifulSoup完成抓取数据

来源:互联网 发布:香港奥海城mac在哪 编辑:程序博客网 时间:2024/06/16 09:27
from bs4 import BeautifulSoupimport requestsurl = 'https://book.douban.com/'wb_data = requests.get(url)soup = BeautifulSoup(wb_data.text,'lxml')titles = soup.select('#content > div > div.article > div.section.popular-books > div.bd > ul > li > div.info > h4')authors = soup.select('#content > div > div.article > div.section.popular-books > div.bd > ul > li > div.info > p.author')grades =  soup.select('#content > div > div.article > div.section.popular-books > div.bd > ul > li > div.info > p.entry-star-small > span.average-rating')info=[]for title,author,grade in zip(titles,authors,grades):    data = {        'title' :'《'+title.get_text()+'》',        'author':list(author.stripped_strings),        'grade' :grade.get_text()    }    print(data)    info.append(data)for i in info:    if float(i['grade']) > 8:        print(i['title'],i['author'])'''#content > div > div.article > div.section.popular-books > div.bd > ul > li > div.info > h4#content > div > div.article > div.section.popular-books > div.bd > ul > li > div.info > p.author#content > div > div.article > div.section.popular-books > div.bd > ul > li > div.info > p.entry-star-small > span.average-rating
这些是标签层次'''
阅读全文
0 0
原创粉丝点击