爬取校内网早操刷卡记录

来源:互联网 发布:淘宝店直播怎么开通 编辑:程序博客网 时间:2024/09/21 09:19

httpClient方式

package com.jiuzhouchedai.qqhru;import java.io.IOException;import java.io.InputStream;import java.net.HttpURLConnection;import java.net.MalformedURLException;import java.net.ProtocolException;import java.net.URL;import java.net.URLEncoder;import java.util.ArrayList;import java.util.List;import org.apache.http.HttpEntity;import org.apache.http.NameValuePair;import org.apache.http.client.HttpClient;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpPost;import org.apache.http.conn.ClientConnectionRequest;import org.apache.http.entity.ContentType;import org.apache.http.entity.StringEntity;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.message.BasicNameValuePair;import org.w3c.dom.Element;import org.w3c.dom.NodeList;import com.jiuzhouchedai.crawlerWX.Record;public class QDzaocao {    public static void main(String[] args) {        QDUtil2 qdUtil2=new QDUtil2();        //爬虫爬到的数据        String zaocaoData = qdUtil2.zaocaoToWeixin("2014094041","2014094041");        System.out.println(zaocaoData);        InputStream ins = null;        try {             HttpClient httpclient = new DefaultHttpClient();                  HttpPost httpPost = new HttpPost("http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate");                  List<NameValuePair> nvps = new ArrayList<NameValuePair>();                //httpPost.addHeader("Content-Type","text/html;charset=UTF-8");                nvps.add(new BasicNameValuePair("zaocaoData",zaocaoData));                  nvps.add(new BasicNameValuePair("student_id", "2014094041"));                  httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8"));                  httpclient.execute(httpPost);                  httpclient.getConnectionManager().shutdown();  //          //          //          //          String xx = "2014094041";//          String path = "http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate?zaocaoData='xxxxxx'";//          URL url = new URL(path);//          HttpURLConnection connection = (HttpURLConnection) url.openConnection();//          connection.setRequestMethod("GET");////            connection.setRequestProperty("Cookie",////                    "JSESSIONID=4191C1C6BD26564DE7D7D7EF63CBE7B3");//          connection.setReadTimeout(5000);//          connection.setRequestProperty("zaocaoData", xx);//      //  connection.setRequestProperty("zaocaoData", zaocaoToWeixin);//          //connection.setRequestProperty("zaocaoData", zaocaoToWeixin);//          //          //connection.addRequestProperty("zaocaoData", zaocaoToWeixin);//          //          //          int code = connection.getResponseCode();//          if (code == 200) {//              ins = connection.getInputStream();//              System.out.println("===========上传数据成功!");//              //          }        } catch (Exception e) {            e.printStackTrace();        }    }    }
package com.jiuzhouchedai.qqhru;import java.io.InputStream;import java.util.List;import org.w3c.dom.Element;import com.jiuzhouchedai.crawlerWX.Record;public class QDUtil2 {    public   String zaocaoToWeixin(String name,String pwd) {         //这个是微信需要的结果        String zaocaoResult = null;        try {            InputStream sreamHtml = QDUtil.getSreamHtml(name,pwd);            String strHtml = QDUtil.streamToString(sreamHtml);             System.out.println(strHtml);            String zaocaoTable = QDUtil.zaocaoTable(strHtml);            // System.out.println(zaocaoTable);            InputStream zaocaoTableStream = QDUtil.stringToStream(zaocaoTable);            Element rootNode = QDUtil.getRootNode(zaocaoTableStream);            // System.out.println(rootNode.getNodeName());            List<Record> zaocaoPoList = QDUtil.zaocaoToPoList(rootNode);            zaocaoResult = QDUtil.zaocaoResult(zaocaoPoList);        } catch (Exception e) {            e.printStackTrace();        }        return zaocaoResult;    }}
package com.jiuzhouchedai.qqhru;import java.io.ByteArrayInputStream;import java.io.InputStream;import java.net.HttpURLConnection;import java.net.URL;import java.util.ArrayList;import java.util.List;import javax.xml.parsers.DocumentBuilder;import javax.xml.parsers.DocumentBuilderFactory;import org.apache.http.HttpResponse;import org.apache.http.NameValuePair;import org.apache.http.client.HttpClient;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpPost;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.message.BasicNameValuePair;import org.w3c.dom.Document;import org.w3c.dom.Element;import org.w3c.dom.NodeList;import com.jiuzhouchedai.crawlerWX.Record;import com.jiuzhouchedai.crawlerWX.Test001;public class QDUtil {    public static String jssonid=null;    /**     * 爬到的网站写到流     * @return     * @throws Exception     */    public static InputStream getSreamHtml(String name,String pwd) throws Exception {        InputStream inStream = null;            HttpClient httpclient = new DefaultHttpClient();              HttpPost httpPost = new HttpPost("http://172.16.51.37/personQueryZC_personalDetailQuery.html");              List<NameValuePair> nvps = new ArrayList<NameValuePair>();            nvps.add(new BasicNameValuePair("loginName","2014094041"));             nvps.add(new BasicNameValuePair("password","2014094041"));              httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8"));              HttpResponse response = httpclient.execute(httpPost);            InputStream content = response.getEntity().getContent();            httpclient.getConnectionManager().shutdown();            return content;  //      if(jssonid==null){//          //          String loginName = "2014094041";//          String password = "2014094041";//          //          URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");//          HttpURLConnection connection = (HttpURLConnection) url.openConnection();//          connection.setRequestMethod("POST");//          connection.setReadTimeout(5000);//          //          //          connection.setDoOutput(true);// 是否输入参数////          //          StringBuffer params = new StringBuffer();//          // 表单参数与get形式一样//          params.append("loginName").append("=").append(loginName).append("&")//                .append("password").append("=").append(password);//          byte[] bypes = params.toString().getBytes();//          connection.getOutputStream().write(bypes);// 输入参数//          connection.connect();//          //          int code = connection.getResponseCode();//          //          //          if (code == 200) {//              //              jssonid=connection.getHeaderField("Set-Cookie").split(";")[0];//              System.out.println("jssonid:"+jssonid);//                inStream=connection.getInputStream();//              //              //          }//          //          //      }//      //      return inStream;//          String loginName = "2014094041";//          String password = "2014094041";//          //          //          URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");//          //          //          HttpURLConnection conn = (HttpURLConnection) url.openConnection();//          conn.setRequestMethod("POST");// 提交模式//          // conn.setConnectTimeout(10000);//连接超时 单位毫秒//          // conn.setReadTimeout(2000);//读取超时 单位毫秒//          conn.setDoOutput(true);// 是否输入参数////          //          StringBuffer params = new StringBuffer();//          // 表单参数与get形式一样//          params.append("loginName").append("=").append(loginName).append("&")//                .append("password").append("=").append(password);//          byte[] bypes = params.toString().getBytes();//          conn.getOutputStream().write(bypes);// 输入参数//          InputStream inStream=conn.getInputStream();//          //          //          //          //          return inStream;//      if(jssonid==null){//          //          //          URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");//          HttpURLConnection connection = (HttpURLConnection) url.openConnection();//          connection.setRequestMethod("POST");//          connection.setReadTimeout(5000);//          connection.set//          connection.connect();//          int code = connection.getResponseCode();//          if (code == 200) {//              //              jssonid=connection.getHeaderField("Set-Cookie").split(";")[0];//              System.out.println("jssonid:"+jssonid);//              //          }//          //          //      }//      InputStream ins = null;//      //       //      String path = "http://172.16.51.37/personQueryZC_personalDetailQuery.html";//      URL url = new URL(path);//      HttpURLConnection connection = (HttpURLConnection) url.openConnection();//      connection.setRequestMethod("POST");//      connection.setRequestProperty("Cookie",jssonid);//      //      connection.setReadTimeout(5000);//      connection.setRequestProperty("loginName", name);//      connection.setRequestProperty("password", pwd);////      int code = connection.getResponseCode();//      if (code == 200) {//          ins = connection.getInputStream();//          //      }//      return ins;    }    /**     * 将流变成字符串     * @param ins     * @return     * @throws Exception     */    public static String streamToString(InputStream ins) throws Exception {        StringBuffer sb = new StringBuffer();        byte[] data = new byte[1024];        int len = 0;        while ((len = ins.read(data)) != -1) {            String ss = new String(data, 0, len, "UTF-8");            sb.append(ss);        }        return sb.toString();    }    public static String zaocaoTable(String strHtml){     String tableHtml;     int begin=strHtml.indexOf("<table id=\"dataTable\"");     int end=strHtml.indexOf("</table>", strHtml.indexOf("<table id=\"dataTable\""));     tableHtml = strHtml.substring(begin, end)+"\n</table>";     tableHtml = tableHtml.replaceAll("&nbsp;", "");     return tableHtml;    }    /**     * 将字符串变成流     * @param str     * @return     * @throws Exception     */    public static InputStream stringToStream(String str) throws Exception {        InputStream stream = null;        if (str != null && !str.trim().equals("")) {            stream = new ByteArrayInputStream(str.getBytes("UTF-8"));        }        return stream;    }    /**     * 获取dom根节点     * w3cdom解析     *      * @param is     * @return     * @throws Exception      */    public static Element getRootNode(InputStream is) throws Exception {           //创建Document对象及读取XML文件            DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();            //解析XML            Document document=null;            DocumentBuilder builder = builderFactory.newDocumentBuilder();            document = builder.parse(is);            Element rootElement = document.getDocumentElement(); //获取根节点            return rootElement;        }    /**     * 封装到POJO     * @param rootNode     * @return     */    public static   List<Record> zaocaoToPoList(Element rootNode){     NodeList trs = rootNode.getElementsByTagName("tr");     List<Record> listRecord=new ArrayList<Record>();     for (int i = 0; i < trs.getLength(); i++) {         NodeList tds = trs.item(i).getChildNodes();         List<String> list=new ArrayList<String>();         for (int j = 0; j < tds.getLength(); j++) {             if("td".equals(tds.item(j).getNodeName())){                String tdValue= tds.item(j).getTextContent().trim();                list.add(tdValue);             }        }         Record record=new Record();         record.setId(list.get(0));         record.setStudentID(list.get(1));         record.setName(list.get(2));         record.setIDNumber(list.get(3));         record.setTime(list.get(4));         record.setIsValid(list.get(5));         listRecord.add(record);     }    return listRecord;    }    public static String zaocaoResult(List<Record> zaocaoPoList) throws Exception{    StringBuffer result=new StringBuffer();    for (int i = 0; i < zaocaoPoList.size(); i++) {        //System.out.println("=======================");        //result.append("=====================\n");        //System.out.println("记录号:"+zaocaoPoList.get(i).getId());        result.append("记录号:"+zaocaoPoList.get(i).getId()+"\n");        //System.out.println("学号:"+zaocaoPoList.get(i).getStudentID());        result.append("学号:"+zaocaoPoList.get(i).getStudentID()+"\n");        //System.out.println("姓名:"+zaocaoPoList.get(i).getName());        result.append("姓名:"+zaocaoPoList.get(i).getName()+"\n");        //System.out.println("卡号:"+zaocaoPoList.get(i).getIDNumber());        result.append("卡号:"+zaocaoPoList.get(i).getIDNumber()+"\n");        //System.out.println("刷卡时间:"+zaocaoPoList.get(i).getTime());        result.append("刷卡时间:"+zaocaoPoList.get(i).getTime()+"\n");        //System.out.println("是否有效:"+zaocaoPoList.get(i).getIsValid());        result.append("是否有效:"+zaocaoPoList.get(i).getIsValid()+"\n");        //System.out.println("=======================");    }    return result.toString();    }    /**     * 通过HttpURLConnection模拟post表单提交     * 网上找的     * @param path     * @param params 例如"name=zhangsan&age=21"     * @return     * @throws Exception     */    public static InputStream sendPostRequestByForm(String path, String params) throws Exception{        URL url = new URL(path);        HttpURLConnection conn = (HttpURLConnection) url.openConnection();        conn.setRequestMethod("POST");// 提交模式        // conn.setConnectTimeout(10000);//连接超时 单位毫秒        // conn.setReadTimeout(2000);//读取超时 单位毫秒        conn.setDoOutput(true);// 是否输入参数        byte[] bypes = params.toString().getBytes();        conn.getOutputStream().write(bypes);// 输入参数        InputStream inStream=conn.getInputStream();        return inStream;    }}

WebClient

//爬取校内网数据,并上传到微信服务器import java.util.ArrayList;import java.util.List;import org.apache.http.NameValuePair;import org.apache.http.client.HttpClient;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpPost;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.message.BasicNameValuePair;import com.gargoylesoftware.htmlunit.BrowserVersion;import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;import com.gargoylesoftware.htmlunit.ScriptResult;import com.gargoylesoftware.htmlunit.WebClient;import com.gargoylesoftware.htmlunit.html.DomElement;import com.gargoylesoftware.htmlunit.html.DomNodeList;import com.gargoylesoftware.htmlunit.html.HtmlElement;import com.gargoylesoftware.htmlunit.html.HtmlPage;public class TestC {    public static HtmlPage home_index(String name,String pwd ) throws Exception {        WebClient webClient = new WebClient(BrowserVersion.CHROME);        webClient.setJavaScriptTimeout(5000);        webClient.getOptions().setCssEnabled(false);        webClient.getOptions().setJavaScriptEnabled(true);        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);        webClient.getOptions().setThrowExceptionOnScriptError(false);        webClient.setAjaxController(new NicelyResynchronizingAjaxController());        // WebClient webClient = new WebClient();// 创建WebClient        HtmlPage page = webClient                .getPage("http://172.16.51.37/user_login.html"); // 打开百度        HtmlElement usernameEle = page.getElementByName("loginName");        HtmlElement passwordEle = (HtmlElement) page.getElementById("password");        usernameEle.focus(); // 设置输入焦点        usernameEle.type(name); // 填写值        passwordEle.focus(); // 设置输入焦点        passwordEle.type(pwd); // 填写值        HtmlElement sub = (HtmlElement) page.getElementsByTagName("input")                .item(2);        page = sub.click();        //System.out.println(page.asXml());        return page;    }    public static void main(String[] args) {        try {            HtmlPage home_index = home_index("2015041001", "2015041001");            StringBuffer sb = new StringBuffer();            ScriptResult zaocao_scriptResult = home_index.executeJavaScript("javascript:f_openTab('tabid_personaldetailZC','早操刷卡明细','personQueryZC_personalDetailQuery.html')");            HtmlPage  zaocao_html = (HtmlPage) zaocao_scriptResult.getNewPage();            //System.out.println(zaocao_html.asXml());            DomElement title_Tag = (DomElement) zaocao_html.getElementsByTagName("title").item(0);            //System.out.println(textContent);            sb.append("title_value:"+title_Tag.getTextContent());  //添加标题            //zaocao_tr_html: 相当于   记录的集合            DomNodeList<HtmlElement> zaocao_tr_html = zaocao_html.getElementById("dataTable").getElementsByTagName("tr");            List<Record> record_list=new ArrayList<>();            for (int i = 0; i < zaocao_tr_html.size(); i++) {                //zaocao_td_html: 相当于  某一条记录的属性的集合                DomNodeList<HtmlElement> zaocao_td_html=    ((DomElement) zaocao_tr_html.item(i)).getElementsByTagName("td");                List<String> each_str=new ArrayList<>();                for (int j = 0; j < zaocao_td_html.size(); j++) {                    String  each= zaocao_td_html.item(j).getTextContent();                    each_str.add(each.trim());                }                Record record=new Record();                record.setId(each_str.get(0));                record.setStudentID(each_str.get(1).substring(0, 10));                record.setName(each_str.get(2).substring(0, each_str.get(2).length()-1));                record.setIDNumber(each_str.get(3).substring(0, each_str.get(3).length()-1));                record.setTime(each_str.get(4).substring(0, each_str.get(4).length()-1));                record.setIsValid(each_str.get(5));                record_list.add(record);                //String textContent = zaocao_td_html.item(0).getTextContent();            //System.out.println(textContent);            }            StringBuffer sbs=new StringBuffer();            for (int i = 0; i < record_list.size(); i++) {                Record record = record_list.get(i);                System.out.println("========================");                System.out.println("记录号:"+record.getId());                System.out.println("学号:"+record.getStudentID());                System.out.println("姓名:"+record.getName());                System.out.println("卡号:"+record.getIDNumber());                System.out.println("刷卡时间:"+record.getTime());                System.out.println("是否有效:"+record.getIsValid());                sbs.append("\n========================");                sbs.append("\n记录号:"+record.getId());                sbs.append("\n学号:"+record.getStudentID());                sbs.append("\n姓名:"+record.getName());                sbs.append("\n卡号:"+record.getIDNumber());                sbs.append("\n刷卡时间:"+record.getTime());                sbs.append("\n是否有效:"+record.getIsValid());            }            System.out.println(sbs.toString());            //String zaocao_text = zaocao_html.asText();//          String[] temp = zaocao_text.split("。");//          String temp2=temp[1].split("页")[0];//              //  System.out.println(temp2);            //String replaceAll = temp2.replaceAll(" ", "--");        //System.out.println(replaceAll);            //String[] split = replaceAll.split("--");            //System.out.println("====================");        //System.out.println(split[1].trim());        //System.out.println(split[2].trim());        //System.out.println(split[3].trim());//          DomNodeList<DomElement> span = page.getElementsByTagName("span");//          //          HtmlElement zaocao = (HtmlElement) span.item(3);//          //          page = zaocao.click();//          DomNodeList<DomElement> a = page.getElementsByTagName("a");//          //          System.out.println(a.item(2).getAttributes().getNamedItem("href"));//          //          //          HtmlElement namedItem = a.item(2).getAttributes().getNamedItem("href");//          HtmlAnchor aa=page.//          //          page=aa.click();//                      //System.out.println(page);            //System.out.println(page.asXml());             HttpClient httpclient = new DefaultHttpClient();                  HttpPost httpPost = new HttpPost("http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate");                  List<NameValuePair> nvps = new ArrayList<NameValuePair>();                //httpPost.addHeader("Content-Type","text/html;charset=UTF-8");                nvps.add(new BasicNameValuePair("zaocaoData",sbs.toString()));                  nvps.add(new BasicNameValuePair("student_id", "2014094041"));                  httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8"));                  httpclient.execute(httpPost);                  httpclient.getConnectionManager().shutdown();          } catch (Exception e) {            e.printStackTrace();        }    }}//==================================================public class Record {    private String id;    private String studentID;    private String name;    private String IDNumber;    private String time;    private String  isValid;    public String getId() {        return id;    }    public void setId(String id) {        this.id = id;    }    public String getStudentID() {        return studentID;    }    public void setStudentID(String studentID) {        this.studentID = studentID;    }    public String getName() {        return name;    }    public void setName(String name) {        this.name = name;    }    public String getIDNumber() {        return IDNumber;    }    public void setIDNumber(String iDNumber) {        IDNumber = iDNumber;    }    public String getTime() {        return time;    }    public void setTime(String time) {        this.time = time;    }    public String getIsValid() {        return isValid;    }    public void setIsValid(String isValid) {        this.isValid = isValid;    }}//微信服务器通过  servlet中写  request.getParamer("xxx") 获取数据。

火狐插件 selenium IDE

实现自动化爬虫,
自动生成代码,支持生成为java代码
http://www.cnblogs.com/fnng/archive/2011/10/23/2222157.html

https://addons.mozilla.org/en-US/firefox/addon/selenium-ide-button/

0 0
原创粉丝点击