phantomjs的使用

来源:互联网 发布:数据库编辑器 编辑:程序博客网 时间:2024/05/22 14:28

一、使用selenium去使用phantomjs,原因是因为selenium封装了phantomjs一部分功能,selenium又提供了python的接口模块,在python语言中可以很好地去使用selenium,间接地就可以使用phantomjs。

例子如:

import requestsfrom selenium import webdriverfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECimport timeSERVICE_ARGS = ['--load-images=false', '--disk-cache=true']driver = webdriver.PhantomJS(executable_path=r"E:\phantomjs-2.1.1-windows\bin\phantomjs.exe",service_args=SERVICE_ARGS)def login_newrank(url):    try:        driver.get(url)                login = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//div[@class="login-normal-tap"]')))        login.click()        print('登陆界面')        user = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//input[@id="account_input"]')))        user.send_keys('13500000000')                pwd = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, '//input[@id="password_input"]')))        pwd.send_keys('abc123')               confirm = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@id="pwd_confirm"]')))        confirm.click()        print('登陆')    except Exception as e:        print(e)        return login_newrank(url)    time.sleep(3)    print(driver.page_source)if __name__ == '__main__':
    url = 'http://www.newrank.cn/public/login/login.html?back=http%3A//www.newrank.cn/'    login_newrank(url)
二、使用phantomjs Webservice作为一种web服务的形式(api),将其与其他语言分离开来(比如python)。
如:

servcie.js

var system=require('system'); var args=system.args;if (args.length ===2){    var port=Number(args[1]);}else{    var port=8080;}var webserver = require('webserver');var server = webserver.create()var service = server.listen(port, function(request, response) {    try{        var postRaw=request.postRaw;        var aaa=new Array();        aaa=postRaw.split("=");        var url=aaa[0];        var md5_url=aaa[1];        url=decodeURIComponent(url);        var webPage = require('webpage');        var page = webPage.create();        page.settings.userAgent = 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Mobile Safari/537.36';        page.settings.resourceTimeout = 20000;//timeout is 20s        page.onError = function(msg, trace) {            console.log("[Warning]This is page.onError");            var msgStack = ['ERROR: ' + msg];            if (trace && trace.length) {                msgStack.push('TRACE:');                trace.forEach(function(t) {                  msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function +'")' : ''));                });            }        };        phantom.onError = function(msg, trace) {            console.log("[Warning]This is phantom.onError");            var msgStack = ['PHANTOM ERROR: ' + msg];            if (trace && trace.length) {              msgStack.push('TRACE:');              trace.forEach(function(t) {                msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));              });            }              console.log(msgStack.join('\n'));              phantom.exit(1);        };        page.open(url, function (status) {            console.log('Target_url is ' + url);          });        page.onLoadFinished = function(status) {        console.log('Status: ' + status);        if(status=='success'){                 var current_url = page.url;                 var body= page.content;                 response.status=200;                response.write(body);                page.close();                response.close();              }              else              {                var body="1";                var current_url="";                  response.status=200;                response.write(body);                  page.close();                response.close();              }};    }    catch(e)    {      console.log('[Error]'+e.message+'happen'+e.lineNumber+'line');    }});
requests_test.py

import requestsimport hashlibimport base64,refrom multiprocessing.dummy import Poolclass http_request:  def __init__(self,port="8080"):    self.url="http://localhost:"+port    def getwebbody(self,domain):    '''    获取网页源代码    '''    base_domain=base64.b64encode(domain.encode('utf8'))    md5_domain=hashlib.md5(base_domain).hexdigest()    payload={domain:md5_domain}    try:      response=requests.post(self.url,data=payload,timeout=30).content      return response    except requests.exceptions.ConnectionError:      print ("requests connection error")    except Exception as e:      print (e)    returnif __name__=="__main__":    port="8080"    cur=http_request(port)    # domain="http://app.cntv.cn/special/cportal/newlive/index.html?id=LiveRZy6XP4F1Z2DERFogaLe170917&fromapp=cctvnews&from=singlemessage&isappinstalled=1&btime=1505612965&bauth=4ef2309698028ea2f53824d6bc707cae"    # domain="http://izhibo.ifeng.com/live.html?liveid=110695&c_from_app=ifengnews&aman=06o208R4ecqe0b8f56fe88Ve54Ub9d1ec9x515aa08"    # domain="http://wap-live.myzaker.com/?wap_open_type=wap&live_id=21840"    # domain="https://c.m.163.com/news/l/154715.html?spss=newsapp&spsw=1&from=singlemessage&isappinstalled=1"    domain="http://www.newscctv.net/219h5/#/article?videoId=ACBEF932-87FB-A8FB-179D-9BE3CCCEF9DA"    ctn = cur.getwebbody(domain)    print(ctn)    m3u8 = re.findall("<video.*?src=\"(http.*?m3u8)",ctn.decode('utf8'))[0]    print(m3u8)
但这个例子里其中有些获取不到m3u8,具体原因暂时未知。


参考:

https://thief.one/2017/03/31/Phantomjs%E6%AD%A3%E7%A1%AE%E6%89%93%E5%BC%80%E6%96%B9%E5%BC%8F/
https://thief.one/2017/03/01/Phantomjs%E7%88%AC%E8%BF%87%E7%9A%84%E9%82%A3%E4%BA%9B%E5%9D%91/

原创粉丝点击