HttpClient抓取【需登录跳转页面】的数据

来源：互联网发布：怎么在淘宝上搜店铺号编辑：程序博客网时间：2024/06/05 18:47

大家都知道HttpClient可以抓取页面数据，但是有的页面需要用户登录后才可以访问，第一次我用浏览器登录了，把浏览器的Cookie放了进去，可以抓取，但是一天后服务器的Session就过期了，这样很麻烦，后来在网上找了很多资料，才有了下面的版本，下面需要设置两个URL，一个是登录页的，主要是用来获取登录后的Cookie，然后就可以请求第二次的URL了。代码很简单应该大家都可以看懂，我就不解释了。

package cn.amazon.http;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Map.Entry;import org.apache.http.HttpEntity;import org.apache.http.HttpResponse;import org.apache.http.NameValuePair;import org.apache.http.client.CookieStore;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpPost;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.impl.conn.PoolingClientConnectionManager;import org.apache.http.message.BasicNameValuePair;import org.apache.http.util.EntityUtils;//对接口进行测试  public class getCookie {    private String loginUrl = "";    private String SearchUrl = "";    private String charset = "UTF-8";    public void test() {        //存放发送参数        Map<String, String> createMap = new HashMap<String, String>();        createMap.put("userName", "");        createMap.put("password", "");        createMap.put("email", "huayanh@sellercs.amazon.com");        HttpPost httpPost = null;        HttpPost httpPost2 = null;        HttpResponse response = null;        DefaultHttpClient client = null;        String result = null;        try {            client = new DefaultHttpClient(new PoolingClientConnectionManager());            httpPost = new HttpPost(loginUrl);            // 设置请求头            httpPost.setHeader("User-Agent",                    "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0");            // 设置参数            List<NameValuePair> list = new ArrayList<NameValuePair>();            Iterator iterator = createMap.entrySet().iterator();            while (iterator.hasNext()) {                Entry<String, String> elem = (Entry<String, String>) iterator.next();                list.add(new BasicNameValuePair(elem.getKey(), elem.getValue()));            }            if (list.size() > 0) {                UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list, charset);                httpPost.setEntity(entity);            }            // 第一次请求            response = client.execute(httpPost);            System.out.println(response);            // 第二次请求            httpPost2 = new HttpPost(SearchUrl);            response = client.execute(httpPost2);            System.out.println(response);            // 登录后的请求内容            if (response != null) {                HttpEntity resEntity = response.getEntity();                if (resEntity != null) {                    result = EntityUtils.toString(resEntity, charset);                }            }            // System.out.println(result);        } catch (Exception ex) {            ex.printStackTrace();        }    }    public static void main(String[] args) {        getCookie main = new getCookie();        main.test();    }}

1 0