Python 爬虫学习1

来源:互联网 发布:圣思园java视频 编辑:程序博客网 时间:2024/05/21 10:34
from bs4 import BeautifulSoupimport requestsimport timeurl = 'http://www.tripadvisor.cn/Attractions-g60763-Activities-New_York_City_New_York.html'urls =['http://www.tripadvisor.cn/Attractions-g60763-Activities-oa{}-New_York_City_New_York.html#ATTRACTION_LIST'.format(str(i)) for i in range(0,1080,30)]def get_attractions(url , data=None):    wb_data = requests.get(url)    time.sleep(4)    soup = BeautifulSoup(wb_data.text, 'lxml')    titles = soup.select('div.property_title > a[target="_blank"]')    imgs = soup.select('img[width="160"]')    cates = soup.select('div.p13n_reasoning_v2 > a')    for title , img , cate in zip(titles , imgs , cates):        data = {            'title': title.get_text(),            'img': img.get('src'),            'cate': list(cate.stripped_strings)        }        print(data)for i in urls:    get_attractions(i)


 

0 0
原创粉丝点击