使用代理IP网络爬虫的三种方式
来源:互联网 发布:遗传算法原理及应用pdf 编辑:程序博客网 时间:2024/05/18 06:25
import java.io.IOException;import java.io.InputStream;import java.net.InetSocketAddress;import java.net.Proxy;import java.net.SocketAddress;import java.net.URL;import java.net.URLConnection;import java.nio.ByteBuffer;import java.nio.channels.Channels;import java.nio.channels.ReadableByteChannel;import java.nio.channels.SocketChannel;import java.nio.charset.Charset;import java.util.Scanner;import javax.net.ssl.HostnameVerifier;import javax.net.ssl.HttpsURLConnection;import javax.net.ssl.SSLSession;public class Tester { public static void main(String[] args) { String uri = "https://fpcy.hb-n-tax.gov.cn:443/WebQuery/yzmQuery"; //String uri = "http://pv.sohu.com/cityjson?ie=utf-8"; String host = "52.183.30.241"; int port = 8888; try { test1(uri, host, port); //test2(uri, host, port); //test3(uri, host, port); } catch (Exception e) { e.printStackTrace(); } } static void test1(String uri, String host, Integer port) throws Exception { SocketAddress addr = new InetSocketAddress(host, port); Proxy proxy = new Proxy(Proxy.Type.HTTP, addr); IngoreSSL();//忽略HTTPS请求的SSL证书,按需要设置 URLConnection connection = new URL(uri).openConnection(proxy); show(connection.getInputStream()); } /** * 该方法 如果代理ip失败,会使用本地ip * @param uri * @param host * @param port * @throws Exception */ static void test2(String uri, String host, Integer port) throws Exception { System.setProperty("http.proxySet", "true"); System.setProperty("http.proxyHost", host); System.setProperty("http.proxyPort", port + ""); IngoreSSL(); URLConnection connection = new URL(uri).openConnection(); show(connection.getInputStream()); } static void test3(String uri, String host, Integer port) throws IOException { SocketChannel sc = SocketChannel.open(); // 要设置连接超时 sc.socket().connect(new InetSocketAddress(host, port), 3000); // 设置读超时 sc.socket().setSoTimeout(3000); sc.write(Charset.forName("utf8").encode("GET " + uri + " \r\n\r\n")); ByteBuffer buffer = ByteBuffer.allocate(1024); InputStream is = sc.socket().getInputStream(); ReadableByteChannel readCh = Channels.newChannel(is); while (readCh.read(buffer) != -1) { buffer.flip(); System.out.println(Charset.forName("utf8").decode(buffer)); buffer.clear(); } sc.close(); } static void show(InputStream in) throws IOException { Scanner cin = new Scanner(in); StringBuilder builder = new StringBuilder(); while (cin.hasNext()) { builder.append(cin.nextLine()); } cin.close(); System.out.println(builder.toString()); } /** * 忽略HTTPS请求的SSL证书,必须在openConnection之前调用 * @throws Exception */ static void IngoreSSL() throws Exception { trustAllHttpsCertificates(); HostnameVerifier hv = new HostnameVerifier() { public boolean verify(String urlHostName, SSLSession session) { return true; } }; HttpsURLConnection.setDefaultHostnameVerifier(hv); } private static void trustAllHttpsCertificates() throws Exception { javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1]; javax.net.ssl.TrustManager tm = new miTM(); trustAllCerts[0] = tm; javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext.getInstance("SSL"); sc.init(null, trustAllCerts, null); javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory()); } static class miTM implements javax.net.ssl.TrustManager, javax.net.ssl.X509TrustManager { public java.security.cert.X509Certificate[] getAcceptedIssuers() { return null; } public boolean isServerTrusted(java.security.cert.X509Certificate[] certs) { return true; } public boolean isClientTrusted(java.security.cert.X509Certificate[] certs) { return true; } public void checkServerTrusted(java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException { return; } public void checkClientTrusted(java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException { return; } }}
0 0
- 使用代理IP网络爬虫的三种方式
- 网络爬虫—动态代理的使用
- 关于网络爬虫及ip代理服务的理解
- Jsoup使用代理ip爬虫
- 如何使用ip代理爬虫
- 网络层—IP地址的三种分类方式
- 代理ip的爬虫实现
- Python爬虫IP代理池的建立和使用
- 代理的三种方式
- Scrapy爬虫框架使用IP代理池
- python3实现网络爬虫(7)-- 使用ip代理抓取网页
- Python3网络爬虫(四):使用User Agent和代理IP隐藏身份
- Python3网络爬虫(四):使用User Agent和代理IP隐藏身份
- Python3网络爬虫_使用User Agent和代理IP隐藏身份
- 网络爬虫的六种方式
- 【爬虫学习6】爬虫自动获取并使用代理ip
- 从零开始学网络爬虫之代理IP池
- python 网络爬虫——请求头,ip代理
- 书海月色
- spring xsd 文件找不到问题
- Android--WIFI--Mac--Ip
- jsp四域九对象
- 在安卓中使用VideoView来播放视频
- 使用代理IP网络爬虫的三种方式
- 问题 C: 瑞神要考研(重排链表)
- IDEA破解 2017 IDEA license server 激活
- span 固定宽度,内容自适应容器自动换行
- Navicat 快捷键
- 跟随屏幕滚动后固定导航到顶端
- java小实现map家族
- java----------华为机试------------合并表记录
- Programming over R