使用代理IP网络爬虫的三种方式

来源:互联网 发布:遗传算法原理及应用pdf 编辑:程序博客网 时间:2024/05/18 06:25
import java.io.IOException;import java.io.InputStream;import java.net.InetSocketAddress;import java.net.Proxy;import java.net.SocketAddress;import java.net.URL;import java.net.URLConnection;import java.nio.ByteBuffer;import java.nio.channels.Channels;import java.nio.channels.ReadableByteChannel;import java.nio.channels.SocketChannel;import java.nio.charset.Charset;import java.util.Scanner;import javax.net.ssl.HostnameVerifier;import javax.net.ssl.HttpsURLConnection;import javax.net.ssl.SSLSession;public class Tester {    public static void main(String[] args) {        String uri = "https://fpcy.hb-n-tax.gov.cn:443/WebQuery/yzmQuery";        //String uri = "http://pv.sohu.com/cityjson?ie=utf-8";        String host = "52.183.30.241";        int port = 8888;        try {            test1(uri, host, port);            //test2(uri, host, port);            //test3(uri, host, port);        } catch (Exception e) {            e.printStackTrace();        }    }    static void test1(String uri, String host, Integer port) throws Exception {        SocketAddress addr = new InetSocketAddress(host, port);        Proxy proxy = new Proxy(Proxy.Type.HTTP, addr);        IngoreSSL();//忽略HTTPS请求的SSL证书,按需要设置        URLConnection connection = new URL(uri).openConnection(proxy);        show(connection.getInputStream());    }    /**     * 该方法 如果代理ip失败,会使用本地ip     * @param uri     * @param host     * @param port     * @throws Exception     */    static void test2(String uri, String host, Integer port) throws Exception {        System.setProperty("http.proxySet", "true");        System.setProperty("http.proxyHost", host);        System.setProperty("http.proxyPort", port + "");        IngoreSSL();        URLConnection connection = new URL(uri).openConnection();        show(connection.getInputStream());    }    static void test3(String uri, String host, Integer port) throws IOException {        SocketChannel sc = SocketChannel.open();        // 要设置连接超时        sc.socket().connect(new InetSocketAddress(host, port), 3000);        // 设置读超时        sc.socket().setSoTimeout(3000);        sc.write(Charset.forName("utf8").encode("GET " + uri + " \r\n\r\n"));        ByteBuffer buffer = ByteBuffer.allocate(1024);        InputStream is = sc.socket().getInputStream();        ReadableByteChannel readCh = Channels.newChannel(is);        while (readCh.read(buffer) != -1) {            buffer.flip();            System.out.println(Charset.forName("utf8").decode(buffer));            buffer.clear();        }        sc.close();    }    static void show(InputStream in) throws IOException {        Scanner cin = new Scanner(in);        StringBuilder builder = new StringBuilder();        while (cin.hasNext()) {            builder.append(cin.nextLine());        }        cin.close();        System.out.println(builder.toString());    }    /**     * 忽略HTTPS请求的SSL证书,必须在openConnection之前调用     * @throws Exception     */    static void IngoreSSL() throws Exception {        trustAllHttpsCertificates();        HostnameVerifier hv = new HostnameVerifier() {            public boolean verify(String urlHostName, SSLSession session) {                return true;            }        };        HttpsURLConnection.setDefaultHostnameVerifier(hv);    }    private static void trustAllHttpsCertificates() throws Exception {        javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];        javax.net.ssl.TrustManager tm = new miTM();        trustAllCerts[0] = tm;        javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext.getInstance("SSL");        sc.init(null, trustAllCerts, null);        javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());    }    static class miTM implements javax.net.ssl.TrustManager, javax.net.ssl.X509TrustManager {        public java.security.cert.X509Certificate[] getAcceptedIssuers() {            return null;        }        public boolean isServerTrusted(java.security.cert.X509Certificate[] certs) {            return true;        }        public boolean isClientTrusted(java.security.cert.X509Certificate[] certs) {            return true;        }        public void checkServerTrusted(java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException {            return;        }        public void checkClientTrusted(java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException {            return;        }    }}
0 0
原创粉丝点击