HttpClient设置代理做网络爬虫

来源:互联网 发布:网络交友诈骗案例 编辑:程序博客网 时间:2024/05/23 01:11

这里需要用到3个jar包:
common-logging.jar:点击下载
commons-codec.jar:点击下载
commons-httpclient.jar:点击下载

将下载好的jar add to buildpath。
由于我是深大学生,就爬取深大内部网了。

import java.io.FileWriter;import org.apache.commons.httpclient.HttpClient;import org.apache.commons.httpclient.HttpStatus;import org.apache.commons.httpclient.UsernamePasswordCredentials;import org.apache.commons.httpclient.auth.AuthScope;import org.apache.commons.httpclient.methods.GetMethod;public class Crawl {    public static void crawl() {        HttpClient httpClient = new HttpClient();        httpClient.getHostConfiguration().setProxy("proxy.szu.edu.cn", 8080);        httpClient.getParams().setAuthenticationPreemptive(true);        httpClient.getState().setProxyCredentials(AuthScope.ANY, new UsernamePasswordCredentials("账号", "密码"));        String url = "http://192.168.2.229/newkc/djbprint.aspx?xqh=20151&ykch=MC99000201";        GetMethod getMethod = new GetMethod(url);        try {            int status = httpClient.executeMethod(getMethod);            if(status != HttpStatus.SC_OK) {                System.out.println("error");            }            byte[] responseBody = getMethod.getResponseBody();            String html = new String(responseBody);            FileWriter writer = new FileWriter("a.txt");            writer.write(html);            writer.close();        } catch (Exception e) {            e.printStackTrace();        }    }}
0 1
原创粉丝点击