HttpClient简单操作

来源:互联网 发布:淘宝经营人贷款条件 编辑:程序博客网 时间:2024/06/10 05:47

之前一直使用jsoup来写一些简单的爬虫

但是很多功能无法实现,比如代理?于是尝试着用了一下HttpClient

package Test;


import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;


import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.execchain.MainClientExec;


public class HttpclientTest {
public static void main(String[] args){
System.out.println(new HttpclientTest().gethtml("http://www.bilibili.com"));
}
//获取指定页面的html
public String gethtml(String url){
String html = null;
//定义一个客户端
CloseableHttpClient httpclient = HttpClients.createDefault();
//声明一个读取流读取实体的html
BufferedReader reader = null;
//定义一个get方法
HttpGet getmethod = new HttpGet(url);
//获取相应
HttpResponse response = null;
//设置代理ip
HttpHost proxy = new HttpHost("124.88.67.81",80);
RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectTimeout(5000).setConnectionRequestTimeout(5000).setSocketTimeout(5000).build();
//设置请求头,设置代理ip
getmethod.addHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
getmethod.setConfig(config);
try {
//执行get方法获取相应
response = httpclient.execute(getmethod);
//获取实体
HttpEntity entity = response.getEntity();
//创建读取流
reader = new BufferedReader(new InputStreamReader(entity.getContent()));
//读取html
String buff = null;
StringBuilder sb = new StringBuilder();
while((buff = reader.readLine()) != null){
sb.append(buff);
}
html = sb.toString();
System.out.println("html页面获取成功");
return html;
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
System.out.println("连接失败");
return null;
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("读取失败");
return null;
}finally{
try {
httpclient.close();
reader.close();
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("流关闭失败");
}
}
}
}

1 0
原创粉丝点击