HttpClient抓取【需登录跳转页面】的数据

来源:互联网 发布:怎么在淘宝上搜店铺号 编辑:程序博客网 时间:2024/06/05 18:47

大家都知道HttpClient可以抓取页面数据,但是有的页面需要用户登录后才可以访问,第一次我用浏览器登录了,把浏览器的Cookie放了进去,可以抓取,但是一天后服务器的Session就过期了,这样很麻烦,后来在网上找了很多资料,才有了下面的版本,下面需要设置两个URL,一个是登录页的,主要是用来获取登录后的Cookie,然后就可以请求第二次的URL了。代码很简单应该大家都可以看懂,我就不解释了。

package cn.amazon.http;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Map.Entry;import org.apache.http.HttpEntity;import org.apache.http.HttpResponse;import org.apache.http.NameValuePair;import org.apache.http.client.CookieStore;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpPost;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.impl.conn.PoolingClientConnectionManager;import org.apache.http.message.BasicNameValuePair;import org.apache.http.util.EntityUtils;//对接口进行测试  public class getCookie {    private String loginUrl = "";    private String SearchUrl = "";    private String charset = "UTF-8";    public void test() {        //存放发送参数        Map<String, String> createMap = new HashMap<String, String>();        createMap.put("userName", "");        createMap.put("password", "");        createMap.put("email", "huayanh@sellercs.amazon.com");        HttpPost httpPost = null;        HttpPost httpPost2 = null;        HttpResponse response = null;        DefaultHttpClient client = null;        String result = null;        try {            client = new DefaultHttpClient(new PoolingClientConnectionManager());            httpPost = new HttpPost(loginUrl);            // 设置请求头            httpPost.setHeader("User-Agent",                    "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0");            // 设置参数            List<NameValuePair> list = new ArrayList<NameValuePair>();            Iterator iterator = createMap.entrySet().iterator();            while (iterator.hasNext()) {                Entry<String, String> elem = (Entry<String, String>) iterator.next();                list.add(new BasicNameValuePair(elem.getKey(), elem.getValue()));            }            if (list.size() > 0) {                UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list, charset);                httpPost.setEntity(entity);            }            // 第一次请求            response = client.execute(httpPost);            System.out.println(response);            // 第二次请求            httpPost2 = new HttpPost(SearchUrl);            response = client.execute(httpPost2);            System.out.println(response);            // 登录后的请求内容            if (response != null) {                HttpEntity resEntity = response.getEntity();                if (resEntity != null) {                    result = EntityUtils.toString(resEntity, charset);                }            }            // System.out.println(result);        } catch (Exception ex) {            ex.printStackTrace();        }    }    public static void main(String[] args) {        getCookie main = new getCookie();        main.test();    }}
1 0
原创粉丝点击