诸葛找房房源信息爬取

来源:互联网 发布:数学建模算法程序 编辑:程序博客网 时间:2024/04/28 16:39
import requestsfrom bs4 import BeautifulSoupimport pymongoimport datetimeimport re lg = '15001927982ttcc'lgttcc = re.sub("\D", "", lg)headers={        'UserAgent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'        }a=['bj','sh','dl','dg','gz','jn','jh','lz','qd','tj',   'xa','zz','zh','zs','cd','cq','cz','cs','fz','hz','heb',   'hf','hn','hz','km','nj','nc','nb','sz','sy','sz','sjz','wh','wx','yt'   ]def ad(i):    cc='http://'+i+'.zhugefang.com/'    return ccb=list(map(ad,a))proxies = {  "http": "http://192.168.0.103:3234"}for k in b:    res=requests.get(k,proxies=proxies)    soup=BeautifulSoup(res.text,'html.parser')    #print(soup.text)    #price=soup.select('body > div.index_content.content_1200 > div:nth-child(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p')[0].text.strip()    area=soup.select('.banner_left')[0].text.strip()    #print(area)    xinshang=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(1) > a > p')[0].text.strip()    #print(xinshang)    xinshang_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(1) > a > h5')[0].text.strip()    #print(xinshang_tao)    jiangjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(2) > a > p')[0].text.strip()    #print(jiangjia)    jiangjia_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(2) > a > h5')[0].text.strip()    #print(jiangjia_tao)    zhangjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(3) > a > p')[0].text.strip()    #print(zhangjia)    zhangjia_tao=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > ul > li:nth-of-type(3) > a > h5')[0].text.strip()    #print(zhangjia_tao)    #print(area,xinshang,xinshang_tao,jiangjia,jiangjia_tao,zhangjia,zhangjia_tao)    junjia=soup.select('body > div.index_content.content_1200 > div:nth-of-type(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p')[0].text.strip()                        body > div.index_content.content_1200 > div:nth-child(1) > div.zhuge_exponent_wrap > div.zhuge_exponent_box > div > div.show_box > ul > li.line > p    for ele in area,xinshang,xinshang_tao,jiangjia,jiangjia_tao,zhangjia,zhangjia_tao,junjia:        total.append(ele)    print(total)    import pandas    deal=pandas.DataFrame(total)    print(deal)    price=total[7]    pp = re.sub("\D", "", price)    """    try:        now=datetime.datetime.now()        date="2017-12-20"        client1 = pymongo.MongoClient('192.168.0.136',27017)        db1 = client1.fangjia_stat        stat = db1.zgzf_stat        stat.save({"city":total[0],"add_house_count":int(total[2]),"cut_price_house_count":int(total[4]),               "increase_price_houseCount":int(total[6]), "update_date":date, 'city_avg_price':int(pp),               "stat_date":date,"c_date":now})    except:        now=datetime.datetime.now()        date="2017-12-20"        client1 = pymongo.MongoClient('192.168.0.136',27017)        db1 = client1.fangjia_stat        stat = db1.zgzf_stat        stat.save({"city":total[0],"add_house_count":int(total[2]),"cut_price_house_count":int(total[4]),               "increase_price_houseCount":int(total[6]), "update_date":date, 'city_avg_price':int(pp),               "stat_date":date,"c_date":now})    """

这里写图片描述
截图

原创粉丝点击