phantomjs例子1

来源:互联网 发布:qq windows版 编辑:程序博客网 时间:2024/05/16 13:50
#!/usr/bin/env python# -*- coding: utf-8 -*-# created by fhqplzj on 2017/07/15 下午3:38import osimport refrom urlparse import urljoinfrom gensim.utils import to_utf8from scrapy import Selectorfrom selenium import webdriverout_path = '/Users/fhqplzj/WebstormProjects/untitled/haha.html'def check():    with open(out_path) as fin:        data = fin.read()    selector = Selector(text=data)    urls = selector.xpath('//*[@id="J-head-menu-alert"]/ul/li/div[2]/a/@href').extract()    with open(os.path.join('/Users/fhqplzj/data/travel', 'domestic'), 'w') as fout:        for url in urls:            abs_url = urljoin('https://lvyou.baidu.com/scene/', url) + 'jingdian/\n'            abs_url = re.sub(r'^https', 'http', abs_url)            fout.write(abs_url)            print abs_url,    print len(urls)def load():    driver = webdriver.PhantomJS()    driver.get('https://lvyou.baidu.com/scene/')    button = driver.find_element_by_xpath('//*[@id="J-head-menu"]/li[2]')    button.click()    with open(out_path, 'w') as fout:        fout.write(to_utf8(driver.page_source))    driver.close()    driver.quit()if __name__ == '__main__':    flag = 0    if flag:        load()    else:        check()

原创粉丝点击