python+selenium搜狗微信主页抓取

来源:互联网 发布:linux黑客帝国代码雨 编辑:程序博客网 时间:2024/05/22 07:57
# coding:utf-8from selenium import webdriverfrom selenium.webdriver.common.action_chains import *import timeurl = 'http://weixin.sogou.com/'for i in range(0, 22):    browser = webdriver.PhantomJS()    browser.get(url)    browser.refresh()    # 鼠标悬停更多    implement = browser.find_element_by_xpath("//a[@id='more_anchor']")    ActionChains(browser).move_to_element(implement).perform()    time.sleep(2)    # 点击标签    browser.find_element_by_xpath("//a[@id='pc_"+str(i)+"']").click()    x = browser.find_element_by_xpath("//a[@id='pc_"+str(i)+"']")    # print x.text    # 点击加载更多内容    browser.find_element_by_xpath("//a[@id='look-more']").click()    time.sleep(2)    browser.find_element_by_xpath("//a[@id='look-more']").click()    time.sleep(2)    for j in range(0, 40):        title = browser.find_element_by_xpath("//a[@uigs='pc_"+str(i)+"_"+str(j)+"_title']")        detail = browser.find_element_by_xpath("//a[@uigs='pc_"+str(i)+"_"+str(j)+"_title']/../..//p")        s = x.text+'  '+title.text+'  '+detail.text        print s
阅读全文
0 0