phanomjsdriver使用和爬虫实例

来源:互联网 发布:python random.sample 编辑:程序博客网 时间:2024/05/18 11:50
phanomjsdriver Linux安装
/usr/local/share/
 tar   -zxvf   xx.tar.gz
export PHANTOMJS_HOME=/usr/local/share/phantomjs-2.1.1-linux-x86_64/bin
export PATH=$PHANTOMJS_HOME:$PATH
phantomjs
yum install -y bzip2

创建phanomjsdriver对象
String driver="D:\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe";
//driver=/usr/local/share/phantomjs-2.1.1-linux-x86_64/bin/phantomjs
DesiredCapabilities caps = new DesiredCapabilities();
//开开启js
((DesiredCapabilities) caps).setJavascriptEnabled(true);
//加载驱动
((DesiredCapabilities) caps).setCapability(PhantomJSDriverService.PHANTOMJS_EXECUTABLE_PATH_PROPERTY,driver);
caps.setCapability("takesScreenshot", false);
return new PhantomJSDriver(caps);

使用phanomjsdriver获取某微博cookie
//转换js执行器
JavascriptExecutor js = (JavascriptExecutor) driver;
//获取用户名输入框
//WebElement loginname = driver.findElement(By.xpath("//*[@id='loginname']"));
WebElement loginname = driver.findElement(By.id("loginname"));
//js模拟点击事件
js.executeScript("arguments[0].click();", loginname);
//js传值
js.executeScript("arguments[0].value=arguments[1]", loginname, username);
try {
Thread.sleep(3000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
//获取密码输入框
WebElement password = driver.findElement(By.cssSelector("input.W_input[name=password]"));
//js模拟点击事件
js.executeScript("arguments[0].click();", password);
//js传值
js.executeScript("arguments[0].value=arguments[1]", password, pwd);
try {
Thread.sleep(3000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
try {
Thread.sleep(3000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
//获取登录按钮
WebElement loginbtn = driver.findElement(By.cssSelector("a.W_btn_a.btn_32px"));
try {
Thread.sleep(3000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
//js模拟登录按钮点击事件
js.executeScript("arguments[0].click();", loginbtn);
try {
Thread.sleep(3000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
try {
Thread.sleep(3000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
// ====================================================
//通过driver获取模拟浏览器的cookie
Set<Cookie> cookies = driver.manage().getCookies();
StringBuffer headerCookie = new StringBuffer();
//拼接cookie
for (Cookie cookie : cookies) {
headerCookie.append(cookie.getName()).append("=").append(cookie.getValue()).append(";");
}
headerCookie.deleteCharAt(headerCookie.length() - 1);
return headerCookie.toString();

htmlunitdriver使用及linkedin获取cookie
https://www.linkedin.com/directory/people-a linkedin爬虫入口
导入selenium-server-standalone-3.4.0.jar
linkedin爬虫在服务器上会检测浏览器类型所以使用htmlunitdriver
Proxy proxy = new Proxy();
//设置代理服务器地址
proxy.setHttpProxy(proxy_ip+":"+proxy_port);
DesiredCapabilities capabilities = DesiredCapabilities.htmlUnit();
capabilities.setCapability(CapabilityType.PROXY, proxy);
String userAgent="Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36";
WebDriver driver=new HtmlUnitDriver(new BrowserVersion("Chrome","5.0 (Windows NT 6.1; Win64; x64)",userAgent,58));
//打开百度首页
driver.get("https://www.linkedin.com/uas/login");
//打印页面标题
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace();
}
// WebElement findElement5 = driver.findElement(By.xpath("/html"));
// String attribute2 = findElement5.getAttribute("outerHTML");
// writeToTXT.TxtWrite(attribute2, "linkedin1");
// System.out.println("页面标题:"+driver.getTitle());
// //根据id获取页面元素输入框
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
WebElement username=driver.findElement(By.id("session_key-login"));
// //在id=“kw”的输入框输入“selenium”
username.sendKeys(user);
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
WebElement password=driver.findElement(By.id("session_password-login"));
// //在id=“kw”的输入框输入“selenium”
password.sendKeys(pass);
// //根据id获取提交按钮
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
WebElement submit=driver.findElement(By.id("btn-primary"));
//点击按钮查询
submit.click();
try {
Thread.sleep(2000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
try {
Thread.sleep(3000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
// WebElement findElement3 = driver.findElement(By.xpath("/html"));
// String attribute = findElement3.getAttribute("outerHTML");
// //打印当前页面标题
// System.out.println("页面标题:"+driver.getTitle());
// //返回当前页面的url
// System.out.println("页面url:"+driver.getCurrentUrl());
// //返回当前的浏览器的窗口句柄
// System.out.println("窗口句柄:"+driver.getWindowHandle());
Set<Cookie> cookies = driver.manage().getCookies();
// WebElement findElement = driver.findElement(By.xpath("/html"));
// writeToTXT.TxtWrite(findElement.getAttribute("outerHTML"), "linkedin");
driver.close();
driver.quit();
StringBuffer headerCookie = new StringBuffer();
//拼接cookie
for (Cookie cookie : cookies) {
headerCookie.append(cookie.getName()).append("=").append(cookie.getValue()).append("; ");
}
headerCookie.deleteCharAt(headerCookie.length() - 1).deleteCharAt(headerCookie.length() - 1);
return headerCookie.toString();

阅读全文
1 0
原创粉丝点击