java自写爬虫中常用方法封装

来源:互联网 发布:软件 互联网 网页 运维 编辑:程序博客网 时间:2024/05/09 22:14
  • 通过post方式获取cookie
public static String getCookieByPost(String path, String data, String cookie, String contentType) {        URL url = null;        try {            url = new URL(path);            HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();            if (cookie != null && cookie.length() > 0) {                httpURLConnection.addRequestProperty("Cookie", cookie);            }            httpURLConnection.setRequestMethod("POST");// 提交模式            httpURLConnection.addRequestProperty("User-Agent",                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36");            if (contentType != null && contentType.length() > 0) {                httpURLConnection.addRequestProperty("Content-Type", contentType);            }            httpURLConnection.addRequestProperty("Connection", "Keep-Alive");            httpURLConnection.setConnectTimeout(5000);//连接超时 单位毫秒            httpURLConnection.setReadTimeout(5000);//读取超时 单位毫秒            // 发送POST请求必须设置如下两行            httpURLConnection.setDoOutput(true);            httpURLConnection.setDoInput(true);            if (data != null && data.length() > 0) {                // 获取URLConnection对象对应的输出流                PrintWriter printWriter = new PrintWriter(httpURLConnection.getOutputStream());                // 发送请求参数                printWriter.write(data);// post的参数 xx=xx&yy=yy                // flush输出流的缓冲                printWriter.flush();                // 开始获取数据            }            Map map = httpURLConnection.getHeaderFields();            String cookieResult = null;            if (map.get("Set-Cookie") != null) {                cookieResult = map.get("Set-Cookie").toString();            }            return cookieResult;        } catch (Exception e) {            System.out.println(path + "连接失败!");        }        return null;    }

  • 通过get方式获取cookie
public static String getCookieByGet(String path) {        URL url = null;        try {            url = new URL(path);            HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();            httpURLConnection.setRequestMethod("GET");// 提交模式            httpURLConnection.addRequestProperty("User-Agent",                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36");            httpURLConnection.addRequestProperty("Connection", "Keep-Alive");            httpURLConnection.setConnectTimeout(5000);//连接超时 单位毫秒            httpURLConnection.setReadTimeout(5000);//读取超时 单位毫秒            Map map = httpURLConnection.getHeaderFields();            String cookieResult = null;            if (map.get("Set-Cookie") != null) {                cookieResult = map.get("Set-Cookie").toString();            }            return cookieResult;        } catch (Exception e) {            System.out.println(path + "连接失败!");        }        return null;    }

  • 通过post方式获取请求结果
public static String requestByPost(String path, String data, String cookie, String contentType) {        URL url = null;        try {            url = new URL(path);            HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();            httpURLConnection.setRequestMethod("POST");// 提交模式            httpURLConnection.addRequestProperty("Cookie", cookie);            httpURLConnection.addRequestProperty("User-Agent",                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36");            httpURLConnection.setRequestProperty("Content-Type", contentType);            httpURLConnection.addRequestProperty("Connection", "Keep-Alive");            httpURLConnection.setConnectTimeout(5000);//连接超时 单位毫秒            httpURLConnection.setReadTimeout(5000);//读取超时 单位毫秒            // 发送POST请求必须设置如下两行            httpURLConnection.setDoOutput(true);            httpURLConnection.setDoInput(true);            if (data != null && data.length() > 0) {                // 获取URLConnection对象对应的输出流                PrintWriter printWriter = new PrintWriter(httpURLConnection.getOutputStream());                // 发送请求参数                printWriter.write(data);// post的参数 xx=xx&yy=yy                // flush输出流的缓冲                printWriter.flush();            }            // 开始获取数据            BufferedInputStream bis = new BufferedInputStream(httpURLConnection.getInputStream());            ByteArrayOutputStream bos = new ByteArrayOutputStream();            int len;            byte[] arr = new byte[1024];            while ((len = bis.read(arr)) != -1) {                bos.write(arr, 0, len);                bos.flush();            }            bos.close();            return bos.toString("utf-8");        } catch (Exception e) {            System.out.println(path + "连接失败!");        }        return null;    }

  • 通过get方式获取请求结果
public static String requestByGet(String path, String cookie) {        URL url = null;        try {            url = new URL(path);            HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();            httpURLConnection.setRequestMethod("GET");// 提交模式            httpURLConnection.addRequestProperty("Cookie", cookie);            httpURLConnection.addRequestProperty("User-Agent",                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36");            httpURLConnection.addRequestProperty("Connection", "Keep-Alive");            httpURLConnection.setConnectTimeout(5000);//连接超时 单位毫秒            httpURLConnection.setReadTimeout(5000);//读取超时 单位毫秒            // 发送POST请求必须设置如下两行            httpURLConnection.setDoOutput(true);            httpURLConnection.setDoInput(true);            BufferedInputStream bis = new BufferedInputStream(httpURLConnection.getInputStream());            ByteArrayOutputStream bos = new ByteArrayOutputStream();            int len;            byte[] arr = new byte[1024];            while ((len = bis.read(arr)) != -1) {                bos.write(arr, 0, len);                bos.flush();            }            bos.close();            return bos.toString("utf-8");        } catch (Exception e) {            System.out.println(path + "连接失败!");        }        return null;    }

  • 下载图片
public static boolean download(String path, String cookie, String filename) {        // 构造URL        URL url = null;        try {            url = new URL(path);            // 打开连接            URLConnection con = url.openConnection();            con.addRequestProperty("User-Agent",                    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36");            con.setConnectTimeout(5000);//连接超时 单位毫秒            con.setReadTimeout(5000);//读取超时 单位毫秒            if (cookie != null && cookie.length() > 0) {                con.addRequestProperty("Cookie", cookie);            }            // 输入流            InputStream is = con.getInputStream();            // 1K的数据缓冲            byte[] bs = new byte[1024];            // 读取到的数据长度            int len;            // 输出的文件流            OutputStream os = new FileOutputStream(filename);            // 开始读取            while ((len = is.read(bs)) != -1) {                os.write(bs, 0, len);            }            // 完毕,关闭所有链接            os.close();            is.close();        } catch (IOException e) {            System.out.println(path + "连接失败!");            return false;        }        return true;    }
0 0
原创粉丝点击