python+selenium搜狗微信主页抓取

来源：互联网发布：linux黑客帝国代码雨编辑：程序博客网时间：2024/05/22 07:57

# coding:utf-8from selenium import webdriverfrom selenium.webdriver.common.action_chains import *import timeurl = 'http://weixin.sogou.com/'for i in range(0, 22):    browser = webdriver.PhantomJS()    browser.get(url)    browser.refresh()    # 鼠标悬停‘更多’    implement = browser.find_element_by_xpath("//a[@id='more_anchor']")    ActionChains(browser).move_to_element(implement).perform()    time.sleep(2)    # 点击标签    browser.find_element_by_xpath("//a[@id='pc_"+str(i)+"']").click()    x = browser.find_element_by_xpath("//a[@id='pc_"+str(i)+"']")    # print x.text    # 点击‘加载更多内容’    browser.find_element_by_xpath("//a[@id='look-more']").click()    time.sleep(2)    browser.find_element_by_xpath("//a[@id='look-more']").click()    time.sleep(2)    for j in range(0, 40):        title = browser.find_element_by_xpath("//a[@uigs='pc_"+str(i)+"_"+str(j)+"_title']")        detail = browser.find_element_by_xpath("//a[@uigs='pc_"+str(i)+"_"+str(j)+"_title']/../..//p")        s = x.text+'  '+title.text+'  '+detail.text        print s

阅读全文

0 0