网络爬虫ip代理服务器【程序样例】

来源:互联网 发布:刷qq币软件 编辑:程序博客网 时间:2024/05/20 12:50

爬虫有的时候会遇到被禁ip的情况,这个时候你可以找一下代理网站,抓取一下ip,来进行动态的轮询就没问题了,也可以用别人做好的第三方ip代理平台,比如说crawlera,crawlera是一个利用代理IP地址池来做分布式下载的第三方平台。【具体介绍请看这篇博客:http://blog.csdn.net/djd1234567/article/details/51741557】

package daili;import java.io.BufferedReader;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.UnsupportedEncodingException;import java.net.InetSocketAddress;import java.net.MalformedURLException;import java.net.Proxy;import java.net.URL;import java.net.URLConnection;/* * author:合肥工业大学 管院学院 钱洋  *1563178220@qq.com *博客地址:http://blog.csdn.net/qy20115549/*/public class GetHtml {    public static void main(String[] args) throws UnsupportedEncodingException {        //输入代理ip,端口,及所要爬取的url        gethtml("183.136.217.74",8080,"http://club.autohome.com.cn/bbs/forum-c-2533-1.html?orderby=dateline&qaType=-1");    }    public static String gethtml(String ip,int port,String url) throws UnsupportedEncodingException{        URL url1 = null;        try {            url1 = new URL(url);        } catch (MalformedURLException e1) {            e1.printStackTrace();        }        InetSocketAddress addr = null;        //代理服务器的ip及端口        addr = new InetSocketAddress(ip, port);        Proxy proxy = new Proxy(Proxy.Type.HTTP, addr); // http proxy        InputStream in = null;        try {            URLConnection conn = url1.openConnection(proxy);            conn.setConnectTimeout(3000);            in = conn.getInputStream();        } catch (Exception e) {            System.out.println("ip " + " is not aviable");//异常IP        }        String s = convertStreamToString(in);        System.out.println(s);        return s;    }    public static String convertStreamToString(InputStream is) throws UnsupportedEncodingException {        if (is == null)            return "";        BufferedReader reader = new BufferedReader(new InputStreamReader(is,"gb2312"));        StringBuilder sb = new StringBuilder();        String line = null;        try {            while ((line = reader.readLine()) != null) {                sb.append(line + "/n");            }        } catch (IOException e) {            e.printStackTrace();        } finally {            try {                is.close();            } catch (IOException e) {                e.printStackTrace();            }        }        return sb.toString();    }}

如下图,便可以抓取到url对应的html内容。

这里写图片描述

0 0
原创粉丝点击