selenium模拟登陆知乎

来源:互联网 发布:淘宝鹊桥是什么意思 编辑:程序博客网 时间:2024/05/18 00:28
from selenium import webdriverfrom bs4 import BeautifulSoupimport time
#selenium模拟登陆知乎browser=webdriver.Chrome()browser.get('http://www.zhihu.com#signin')browser.find_element_by_class_name('qrcode-signin-cut-button').click()browser.find_element_by_name('account').send_keys('18251552002')browser.find_element_by_name('password').send_keys('000189')time.sleep(5)  #手动输入验证码browser.find_element_by_class_name('sign-button').click()time.sleep(2)browser.get('http:www.zhihu.com')browser.execute_script("window.scrollTo(0,document.body.scrollHeight)")time.sleep(2)   #模拟下拉获取ajax加载的内容# browser.execute_script("window.scrollTo(0,document.body.scrollHeight)")# time.sleep(2)soup=BeautifulSoup(browser.page_source,'lxml')titles=soup.find_all('h2',class_='ContentItem-title')news=[]for title in titles:    info={}    info['title']=title.find('a').get_text()    print(info)    news.append(info)print(len(news))
# LOL Seleium
def seleium_crawl(start_page,need_page):    browser = webdriver.Chrome()    browser.get("http://lol.qq.com/guide/list.shtml")    already_page=0    time.sleep(1)    browser.find_element_by_css_selector('#list_page input').clear()    time.sleep(1)    browser.find_element_by_css_selector('#list_page input').send_keys('%d'%start_page)    time.sleep(1)    browser.find_element_by_css_selector('.pagejump').click()    time.sleep(2)    for i in range(start_page,start_page+need_page):        html=browser.page_source        soup=BeautifulSoup(html,'lxml')        all_news=soup.find('ul',id='list_content').find_all('li')        for news in all_news:            new_info={}            new_info['title']=news.find('p',class_='btn-a').get_text()            new_info['read_num']=news.find('p',class_='bfl-playing').get_text()[4:]            new_info['time']=news.find('span',class_='recommend-div-div-raiders-date fr').get_text()            print(new_info)        print('第%d页'%(start_page+already_page))        already_page += 1        try:            browser.find_element_by_class_name('pagenext').click()            time.sleep(1)        except:            break    browser.close()

原创粉丝点击