java自写爬虫中常用方法封装
来源:互联网 发布:软件 互联网 网页 运维 编辑:程序博客网 时间:2024/05/09 22:14
- 通过post方式获取cookie
public static String getCookieByPost(String path, String data, String cookie, String contentType) { URL url = null; try { url = new URL(path); HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection(); if (cookie != null && cookie.length() > 0) { httpURLConnection.addRequestProperty("Cookie", cookie); } httpURLConnection.setRequestMethod("POST");// 提交模式 httpURLConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"); if (contentType != null && contentType.length() > 0) { httpURLConnection.addRequestProperty("Content-Type", contentType); } httpURLConnection.addRequestProperty("Connection", "Keep-Alive"); httpURLConnection.setConnectTimeout(5000);//连接超时 单位毫秒 httpURLConnection.setReadTimeout(5000);//读取超时 单位毫秒 // 发送POST请求必须设置如下两行 httpURLConnection.setDoOutput(true); httpURLConnection.setDoInput(true); if (data != null && data.length() > 0) { // 获取URLConnection对象对应的输出流 PrintWriter printWriter = new PrintWriter(httpURLConnection.getOutputStream()); // 发送请求参数 printWriter.write(data);// post的参数 xx=xx&yy=yy // flush输出流的缓冲 printWriter.flush(); // 开始获取数据 } Map map = httpURLConnection.getHeaderFields(); String cookieResult = null; if (map.get("Set-Cookie") != null) { cookieResult = map.get("Set-Cookie").toString(); } return cookieResult; } catch (Exception e) { System.out.println(path + "连接失败!"); } return null; }
- 通过get方式获取cookie
public static String getCookieByGet(String path) { URL url = null; try { url = new URL(path); HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection(); httpURLConnection.setRequestMethod("GET");// 提交模式 httpURLConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"); httpURLConnection.addRequestProperty("Connection", "Keep-Alive"); httpURLConnection.setConnectTimeout(5000);//连接超时 单位毫秒 httpURLConnection.setReadTimeout(5000);//读取超时 单位毫秒 Map map = httpURLConnection.getHeaderFields(); String cookieResult = null; if (map.get("Set-Cookie") != null) { cookieResult = map.get("Set-Cookie").toString(); } return cookieResult; } catch (Exception e) { System.out.println(path + "连接失败!"); } return null; }
- 通过post方式获取请求结果
public static String requestByPost(String path, String data, String cookie, String contentType) { URL url = null; try { url = new URL(path); HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection(); httpURLConnection.setRequestMethod("POST");// 提交模式 httpURLConnection.addRequestProperty("Cookie", cookie); httpURLConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"); httpURLConnection.setRequestProperty("Content-Type", contentType); httpURLConnection.addRequestProperty("Connection", "Keep-Alive"); httpURLConnection.setConnectTimeout(5000);//连接超时 单位毫秒 httpURLConnection.setReadTimeout(5000);//读取超时 单位毫秒 // 发送POST请求必须设置如下两行 httpURLConnection.setDoOutput(true); httpURLConnection.setDoInput(true); if (data != null && data.length() > 0) { // 获取URLConnection对象对应的输出流 PrintWriter printWriter = new PrintWriter(httpURLConnection.getOutputStream()); // 发送请求参数 printWriter.write(data);// post的参数 xx=xx&yy=yy // flush输出流的缓冲 printWriter.flush(); } // 开始获取数据 BufferedInputStream bis = new BufferedInputStream(httpURLConnection.getInputStream()); ByteArrayOutputStream bos = new ByteArrayOutputStream(); int len; byte[] arr = new byte[1024]; while ((len = bis.read(arr)) != -1) { bos.write(arr, 0, len); bos.flush(); } bos.close(); return bos.toString("utf-8"); } catch (Exception e) { System.out.println(path + "连接失败!"); } return null; }
- 通过get方式获取请求结果
public static String requestByGet(String path, String cookie) { URL url = null; try { url = new URL(path); HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection(); httpURLConnection.setRequestMethod("GET");// 提交模式 httpURLConnection.addRequestProperty("Cookie", cookie); httpURLConnection.addRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"); httpURLConnection.addRequestProperty("Connection", "Keep-Alive"); httpURLConnection.setConnectTimeout(5000);//连接超时 单位毫秒 httpURLConnection.setReadTimeout(5000);//读取超时 单位毫秒 // 发送POST请求必须设置如下两行 httpURLConnection.setDoOutput(true); httpURLConnection.setDoInput(true); BufferedInputStream bis = new BufferedInputStream(httpURLConnection.getInputStream()); ByteArrayOutputStream bos = new ByteArrayOutputStream(); int len; byte[] arr = new byte[1024]; while ((len = bis.read(arr)) != -1) { bos.write(arr, 0, len); bos.flush(); } bos.close(); return bos.toString("utf-8"); } catch (Exception e) { System.out.println(path + "连接失败!"); } return null; }
- 下载图片
public static boolean download(String path, String cookie, String filename) { // 构造URL URL url = null; try { url = new URL(path); // 打开连接 URLConnection con = url.openConnection(); con.addRequestProperty("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36"); con.setConnectTimeout(5000);//连接超时 单位毫秒 con.setReadTimeout(5000);//读取超时 单位毫秒 if (cookie != null && cookie.length() > 0) { con.addRequestProperty("Cookie", cookie); } // 输入流 InputStream is = con.getInputStream(); // 1K的数据缓冲 byte[] bs = new byte[1024]; // 读取到的数据长度 int len; // 输出的文件流 OutputStream os = new FileOutputStream(filename); // 开始读取 while ((len = is.read(bs)) != -1) { os.write(bs, 0, len); } // 完毕,关闭所有链接 os.close(); is.close(); } catch (IOException e) { System.out.println(path + "连接失败!"); return false; } return true; }
0 0
- java自写爬虫中常用方法封装
- Java语言自写初级爬虫一
- java封装selenium2常用方法
- java封装selenium2常用方法
- java封装selenium2常用方法
- Java中自写Map
- Java中自写HashSet
- ionic中一些常用的方法封装
- java操作hdfs常用方法封装
- Java Hibernate常用方法封装工具类
- Java中自写ArrayList容器
- Java写的爬虫
- 动手写Java爬虫
- java写的爬虫
- java写一个爬虫
- Java写爬虫
- Java中常用代码封装--未完待续
- java 中常用方法
- 提高mysql千万级大数据SQL查询优化30条经验(Mysql索引优化注意)
- mysql 查询动态所属标签下的动态各有多少条
- android使用HttpURLConnection实现带参数文件上传
- 转发和重定向的区别
- STM32 学习方法
- java自写爬虫中常用方法封装
- 堆和栈的区别
- Linux各种实用操作
- C++11随机数
- linux 之常用命令
- Linux服务器查看日志命令
- Linux的mktemp命令
- Linux的tee命令
- es6 javascript的 规格