http请求学习

来源:互联网 发布:ubuntu google输入法 编辑:程序博客网 时间:2024/06/06 09:35

闲暇来,想通过Http请求一直获取某个网站的公告文章列表,搜索和服务费有关的公告。

刚开始,在get请求时一直返回被aqyun网管过滤掉,非法的请求。

后来尝试加上了User-Agent: Apache-HttpClient/4.2.6 (java 1.5),就可以获取返回的json数据。

猜测只要消息头部有User-Agent就能通过。

以下是我使用Java写的代码,需要的可以参考下。


import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.List;


import org.apache.commons.io.IOUtils;
import org.apache.http.NameValuePair;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.CollectionUtils;


public final class HttpUtils {


    private static final Logger logger = LoggerFactory
            .getLogger(HttpUtils.class);


    private final static String ENCODER_UTF8 = "UTF-8";
    
private HttpUtils(){}

/**
* get请求
* 测试发现对某些网站请求需要携带User-Agent头部,值校验不确定有没有
* 以下两个都能够通过:
* User-Agent: Apache-HttpClient/4.2.6 (java 1.5)
* User-Agent:Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.108 Safari/537.36
* @param url
* @param pairs
* @return
*/
public static String get(String url, List<NameValuePair> pairs){
CloseableHttpClient httpClient = HttpClients.custom().build();
HttpGet get = new HttpGet(url);
StringBuilder sb = new StringBuilder();
if (!CollectionUtils.isEmpty(pairs)) {
try {
for (NameValuePair pair : pairs) {
if (sb.length() > 0) {
sb.append("&");
}
sb.append(pair.getName()).append("=").append(URLEncoder.encode(pair.getValue(), ENCODER_UTF8));
}
} catch (UnsupportedEncodingException e) {
logger.error("encoder utf-8 not found", e);
}
}

if (sb.length() > 0){
if (url.contains("?")) {
url = url + "&" + sb.toString();
} else {
url = url + "?" + sb.toString();
}
}

get.addHeader("User-Agent", "Apache-HttpClient/4.2.6 (java 1.5)");
CloseableHttpResponse resp = null;
try {
//logger.info("Debug request url: {}", url);
resp = httpClient.execute(get);
String respContent = EntityUtils.toString(resp.getEntity(), ENCODER_UTF8);
//logger.info("response content: {}", respContent);
return respContent;
} catch (IOException e) {
logger.error("Http get request error", e);
} finally {
IOUtils.closeQuietly(resp);
if (get != null) {
get.releaseConnection();
}
IOUtils.closeQuietly(httpClient);
}

return null;
}
}

0 0