58批量数据爬取

来源:互联网 发布:广东人年轻 知乎 编辑:程序博客网 时间:2024/05/14 06:53

爬取58上100页的某商品分类信息,代码如下:

from bs4 import BeautifulSoupimport requestsimport timeimport lxmldef get_links_from(who_sells,page=1):    urls = []    list_view = 'http://xa.58.com/pbdn/'+str(who_sells)+'/pn{}/'.format(str(page))    web_data = requests.get(list_view)     soup = BeautifulSoup(web_data.text,'lxml')    for link in soup.select('td.t > a[onclick]'):        urls.append(link.get('href').split('?')[0])    return urlsdef get_item_info(who_sells=0,page=1):    urls=get_links_from(who_sells,page)    for url in urls:        web_data = requests.get(url)        time.sleep(1)        soup = BeautifulSoup(web_data.text,'lxml')        data = {            'title' : soup.select('div.box_left_top > h1')[0].text,            'price' : soup.select('div.price_li > span > i')[0].text,            'area' : soup.select('div.palce_li > span > i')[0].text,            'look_time' : soup.select('div.box_left_top > p > span.look_time')[0].text,            'want_person':soup.select('div.box_left_top > p > span.want_person')[0].text,            'cate' :'个人' if who_sells==0 else '商家'            }        print(data)for page in range(1,101):     get_item_info(who_sells=0,page=page)

结果如图:
这里写图片描述