8-使用爬虫登录后台系统并爬取数据

来源:互联网 发布:手机信号干扰软件 编辑:程序博客网 时间:2024/05/21 16:23

使用爬虫登陆后台系统并爬取数据

package cn.itcast.spider.login;import java.util.ArrayList;import org.apache.http.Header;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.CloseableHttpResponse;import org.apache.http.client.methods.HttpGet;import org.apache.http.client.methods.HttpPost;import org.apache.http.impl.client.CloseableHttpClient;import org.apache.http.impl.client.HttpClients;import org.apache.http.message.BasicNameValuePair;import org.apache.http.util.EntityUtils;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;public class Login {    public static void main(String[] args) throws Exception {        // 1、创建httpClient        CloseableHttpClient httpclient = HttpClients.createDefault();        // 2、创建Post对象        HttpPost httpPost = new HttpPost("http://shop.itcast.cn/login/login.html");        // 2.1 设置参数        // url        // username password reURL        ArrayList<BasicNameValuePair> paramList = new ArrayList<BasicNameValuePair>();        paramList.add(new BasicNameValuePair("username", "itcast"));        paramList.add(new BasicNameValuePair("password", "itcast"));        paramList.add(new BasicNameValuePair("reURL", "http://shop.itcast.cn/item/itemList.html"));        // 2.2 将参数设置到httpPost对象        httpPost.setEntity(new UrlEncodedFormEntity(paramList));        // 3、执行post请求        CloseableHttpResponse response = httpclient.execute(httpPost);        // 4、获取返回状态        int code = response.getStatusLine().getStatusCode();        System.out.println(code);        if (200 == code) {            System.out.println("login successful");        }        // 发现页面重定向了(已经登录成功)        if (302 == code) {            Header[] locations = response.getHeaders("Location");            System.out.println(locations[0].getValue());            // 获得登录后跳转的url,直接发起httpget请求            CloseableHttpClient hc = HttpClients.createDefault();            HttpGet httpGet = new HttpGet(locations[0].getValue());            CloseableHttpResponse res = hc.execute(httpGet);            if (res.getStatusLine().getStatusCode() == 200) {                String html = EntityUtils.toString(res.getEntity());                System.out.println(html);                // 使用Jsoup解析                Document doc = Jsoup.parse(html);                // 获取商品所在div                Elements trs = doc.select("table tr");                ArrayList<Product> arrayList = new ArrayList<Product>();                for (Element element : trs) {                    Product product = new Product();                    Elements inputs = element.select("input[name^=itemList]");                    for (Element element2 : inputs) {                        String name = element2.attr("name");                        String value = element2.attr("value");                        if (name.contains("name")) {                            product.setName(value);                        } else if (name.contains("price")) {                            product.setPrice(value);                        } else if (name.contains("createTime")) {                            product.setDate(value);                        } else if (name.contains("detail")) {                            product.setDes(value);                        }                    }                    if (product.getName() != null) {                        arrayList.add(product);                    }                }                System.out.println(arrayList);            }        }    }}
阅读全文
1 0