python selenium 获取动态网页数据

来源：互联网发布：在线监测数据造假编辑：程序博客网时间：2024/05/09 10:41

# -*- coding:utf-8 -*-import requestsfrom bs4 import BeautifulSoupfrom selenium import webdriverimport timeimport jsonimport sysreload(sys)sys.setdefaultencoding("utf-8")curpath=sys.path[0]print curpathdef getData(url):    driver=webdriver.Chrome()    driver.set_page_load_timeout(40)    time.sleep(3)    html=driver.get(url[0])    for page in range(1):        html=driver.page_source        soup=BeautifulSoup(html,'lxml')        table=soup.find('div',class_="unit_loan_prj_detail")        name=[]        for th in table.find_all('span',class_="prolist_info_title"):            name.append(th.get_text())        i=0        for tr in table.find_all('span',class_="prolist_info_detail"):            dic={}            value=tr.get_text()            if value is not None:                dic[name[i]]=value            else:                for td in tr.find_all('span'):                    dic[name[i]]=td.get_text()            i+=1            jsonDump(dic,url[1])def jsonDump(_json,name):    with open(curpath+'/'+name+'.json','a')as outfile:        json.dump(_json,outfile,ensure_ascii=False)    with open(curpath+'/'+name+'.json','a')as outfile:        outfile.write(',\n')if __name__=='__main__':    url=['http://www.powerec.net/gdwz-web/html/xjxx/inquiry_detail.html?inq_h_id=ZGFmNTM2ZjctOWFlYi00ZDEyLWEyZjItNDFjNjAxYmY4MTZj','test']    getData(url)

阅读全文

0 0