htmlunit 工具类
来源:互联网 发布:网络水军怎么做 编辑:程序博客网 时间:2024/04/28 19:22
package cn.ys.fare.supply.aisle.extend;import cn.ys.util.CrackUtil;import cn.ys.util.CryptUtil;import com.gargoylesoftware.htmlunit.*;import com.gargoylesoftware.htmlunit.html.HtmlImage;import com.gargoylesoftware.htmlunit.html.HtmlPage;import com.gargoylesoftware.htmlunit.util.NameValuePair;import com.google.common.io.ByteStreams;import org.apache.logging.log4j.Logger;import javax.imageio.ImageIO;import javax.imageio.ImageReader;import java.awt.image.BufferedImage;import java.io.*;import java.net.URL;import java.net.URLDecoder;import java.util.Map;/** * HtmlUnit java 浏览器模拟操作工具 * Created by ShuPF on 2017/11/17. */public class HtmlUnitUtil { protected static final String User_Agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36"; private static Logger logger; /** * 获取 WebClient 连接实例 * @param address * @param port * @return */ public static WebClient getConnection(String address, int port, Logger logger) { HtmlUnitUtil.logger = logger; WebClient webClient = new WebClient(); init(webClient); //加载配置 if (address != null && port > 0) { setProxy(webClient,address,port); //设置代理IP } return webClient; } /** * Get请求 * @param url * @return HtmlPage * @throws Exception */ public static HtmlPage sendGetRequest(WebClient webClient, String url) throws Exception { WebRequest webRequest = new WebRequest(new URL(url)); webRequest.setAdditionalHeader("User-Agent", User_Agent); webRequest.setHttpMethod(HttpMethod.GET); HtmlPage htmlPage = webClient.getPage(webRequest); return htmlPage; } /** * Post 请求 * @param url * @param params * @return HtmlPage * @throws Exception */ public static HtmlPage sendPostRequest(WebClient webClient,String url, Map<String, String> params) throws Exception { WebRequest webRequest = new WebRequest(new URL(url)); webRequest.setHttpMethod(HttpMethod.POST); webRequest.setAdditionalHeader("User-Agent", User_Agent); if (params != null && params.size() > 0) { for (Map.Entry<String, String> param : params.entrySet()) { webRequest.getRequestParameters().add(new NameValuePair(param.getKey(), param.getValue())); } } HtmlPage htmlPage = webClient.getPage(webRequest); return htmlPage; } /** * 将 HtmlPage 转化为 String * @param page * @return * @throws IOException */ public static String getPageToString(HtmlPage page) throws IOException { return new String(getPageToByte(page),"utf-8"); } /** * 将 HtmlPage 转化为 byte * @param page * @return * @throws IOException */ public static byte[] getPageToByte(HtmlPage page) throws IOException { byte[] responseContent = null; WebResponse webResponse = null; try { webResponse = page.getWebResponse(); int status = webResponse.getStatusCode(); // 读取数据内容 if (status == 200) { if (page.isHtmlPage()) { // 等待JS执行完成 responseContent = page.asXml().getBytes(); } else { InputStream bodyStream = webResponse.getContentAsStream(); responseContent = ByteStreams.toByteArray(bodyStream); bodyStream.close(); } } } catch (IOException e) { throw new IOException(e); } finally { if (webResponse != null) { // 关闭响应流 webResponse.cleanUp(); } } return responseContent; } /*** * 加载配置 */ private static void init(WebClient webClient){ // 1 启动JS webClient.getOptions().setJavaScriptEnabled(true); // 2 禁用Css,可避免自动二次请求CSS进行渲染 webClient.getOptions().setCssEnabled(false); // 3 启动客户端重定向 webClient.getOptions().setRedirectEnabled(true); // 4 js运行错误时,是否抛出异常 webClient.getOptions().setThrowExceptionOnScriptError(false); // 5 设置超时 webClient.getOptions().setTimeout(60000); //6 设置忽略证书 webClient.getOptions().setUseInsecureSSL(true); //7 设置Ajax //webClient.setAjaxController(new NicelyResynchronizingAjaxController()); //8设置cookie //webClient.getCookieManager().setCookiesEnabled(true); } /** * 设置代理IP */ private static void setProxy(WebClient webClient, String address, int port){ ProxyConfig proxyConfig = webClient.getOptions().getProxyConfig(); proxyConfig.setProxyHost(address); proxyConfig.setProxyPort(port); } /** * 获取验证码 * @param ta * @param image * @return * @throws IOException */ public static String getCode(String ta, HtmlImage image) throws IOException { String code = ""; ImageReader imageReader = image.getImageReader(); BufferedImage bufferedImage = imageReader.read(0); ByteArrayOutputStream out = new ByteArrayOutputStream(); ImageIO.write(bufferedImage, "png", out); byte[] bytes = out.toByteArray(); try { code = CryptUtil.urlEencode(CryptUtil.base64Eencode(bytes));// String path = URLDecoder.decode("D:/Backup/Desktop/", "utf-8");// File file = new File(path);// if (!file.exists() && !file.isDirectory()) {// file.mkdirs();// }// out = new FileOutputStream(path + "captcha.png");// out.write(bytes); code = CrackUtil.crack(ta + "<-->", 1006, code, logger); } catch (Exception e) { } finally { if (out != null) { out.close(); } } return code; } /** * 保存图片到本地 * @param img * @param name * @throws IOException */ public void saveImage(HtmlImage img, String name) throws IOException { ImageReader imageReader = img.getImageReader(); BufferedImage bufferedImage = imageReader.read(0); BufferedImage inputbig = new BufferedImage(160, 60,BufferedImage.TYPE_INT_BGR); inputbig.getGraphics().drawImage(bufferedImage, 0, 0, 160, 60, null); //画图 File file2 =new File("D:\\Backup\\Desktop\\"+name+".png"); ImageIO.write(inputbig, "png", file2); }}
阅读全文
0 0
- htmlunit 工具类
- htmlunit爬虫工具使用
- HtmlUnit
- HTMLUnit
- htmlunit
- htmlunit
- HtmlUnit
- htmlunit
- 模拟网页访问的工具-htmlunit
- 使用Htmlunit工具获取表单中的input
- HtmlUnit基础
- htmlunit 示例
- java htmlunit
- htmlunit学习
- htmlunit示例
- HtmlUnit入门教程
- htmlunit教程
- htmlunit 优化
- 新时代下员工与公司的关系之我见
- Git Flow常用的分支
- c语言 指针
- asd
- 数据分析要用到的
- htmlunit 工具类
- android 之 6.0动态相册权限
- Pandas初学者代码优化指南
- 关于文件上传和下载的Servlet
- java异常处理
- 中断—响应和处理
- Python新手写出漂亮的爬虫代码2——从json获取信息
- NC集成CAS统一认证+单点登录原理
- Linearizability 与 Serializability 对比