爬取校内网早操刷卡记录
来源:互联网 发布:淘宝店直播怎么开通 编辑:程序博客网 时间:2024/09/21 09:19
httpClient方式
package com.jiuzhouchedai.qqhru;import java.io.IOException;import java.io.InputStream;import java.net.HttpURLConnection;import java.net.MalformedURLException;import java.net.ProtocolException;import java.net.URL;import java.net.URLEncoder;import java.util.ArrayList;import java.util.List;import org.apache.http.HttpEntity;import org.apache.http.NameValuePair;import org.apache.http.client.HttpClient;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpPost;import org.apache.http.conn.ClientConnectionRequest;import org.apache.http.entity.ContentType;import org.apache.http.entity.StringEntity;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.message.BasicNameValuePair;import org.w3c.dom.Element;import org.w3c.dom.NodeList;import com.jiuzhouchedai.crawlerWX.Record;public class QDzaocao { public static void main(String[] args) { QDUtil2 qdUtil2=new QDUtil2(); //爬虫爬到的数据 String zaocaoData = qdUtil2.zaocaoToWeixin("2014094041","2014094041"); System.out.println(zaocaoData); InputStream ins = null; try { HttpClient httpclient = new DefaultHttpClient(); HttpPost httpPost = new HttpPost("http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate"); List<NameValuePair> nvps = new ArrayList<NameValuePair>(); //httpPost.addHeader("Content-Type","text/html;charset=UTF-8"); nvps.add(new BasicNameValuePair("zaocaoData",zaocaoData)); nvps.add(new BasicNameValuePair("student_id", "2014094041")); httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8")); httpclient.execute(httpPost); httpclient.getConnectionManager().shutdown(); // // // // String xx = "2014094041";// String path = "http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate?zaocaoData='xxxxxx'";// URL url = new URL(path);// HttpURLConnection connection = (HttpURLConnection) url.openConnection();// connection.setRequestMethod("GET");//// connection.setRequestProperty("Cookie",//// "JSESSIONID=4191C1C6BD26564DE7D7D7EF63CBE7B3");// connection.setReadTimeout(5000);// connection.setRequestProperty("zaocaoData", xx);// // connection.setRequestProperty("zaocaoData", zaocaoToWeixin);// //connection.setRequestProperty("zaocaoData", zaocaoToWeixin);// // //connection.addRequestProperty("zaocaoData", zaocaoToWeixin);// // // int code = connection.getResponseCode();// if (code == 200) {// ins = connection.getInputStream();// System.out.println("===========上传数据成功!");// // } } catch (Exception e) { e.printStackTrace(); } } }
package com.jiuzhouchedai.qqhru;import java.io.InputStream;import java.util.List;import org.w3c.dom.Element;import com.jiuzhouchedai.crawlerWX.Record;public class QDUtil2 { public String zaocaoToWeixin(String name,String pwd) { //这个是微信需要的结果 String zaocaoResult = null; try { InputStream sreamHtml = QDUtil.getSreamHtml(name,pwd); String strHtml = QDUtil.streamToString(sreamHtml); System.out.println(strHtml); String zaocaoTable = QDUtil.zaocaoTable(strHtml); // System.out.println(zaocaoTable); InputStream zaocaoTableStream = QDUtil.stringToStream(zaocaoTable); Element rootNode = QDUtil.getRootNode(zaocaoTableStream); // System.out.println(rootNode.getNodeName()); List<Record> zaocaoPoList = QDUtil.zaocaoToPoList(rootNode); zaocaoResult = QDUtil.zaocaoResult(zaocaoPoList); } catch (Exception e) { e.printStackTrace(); } return zaocaoResult; }}
package com.jiuzhouchedai.qqhru;import java.io.ByteArrayInputStream;import java.io.InputStream;import java.net.HttpURLConnection;import java.net.URL;import java.util.ArrayList;import java.util.List;import javax.xml.parsers.DocumentBuilder;import javax.xml.parsers.DocumentBuilderFactory;import org.apache.http.HttpResponse;import org.apache.http.NameValuePair;import org.apache.http.client.HttpClient;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpPost;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.message.BasicNameValuePair;import org.w3c.dom.Document;import org.w3c.dom.Element;import org.w3c.dom.NodeList;import com.jiuzhouchedai.crawlerWX.Record;import com.jiuzhouchedai.crawlerWX.Test001;public class QDUtil { public static String jssonid=null; /** * 爬到的网站写到流 * @return * @throws Exception */ public static InputStream getSreamHtml(String name,String pwd) throws Exception { InputStream inStream = null; HttpClient httpclient = new DefaultHttpClient(); HttpPost httpPost = new HttpPost("http://172.16.51.37/personQueryZC_personalDetailQuery.html"); List<NameValuePair> nvps = new ArrayList<NameValuePair>(); nvps.add(new BasicNameValuePair("loginName","2014094041")); nvps.add(new BasicNameValuePair("password","2014094041")); httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8")); HttpResponse response = httpclient.execute(httpPost); InputStream content = response.getEntity().getContent(); httpclient.getConnectionManager().shutdown(); return content; // if(jssonid==null){// // String loginName = "2014094041";// String password = "2014094041";// // URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");// HttpURLConnection connection = (HttpURLConnection) url.openConnection();// connection.setRequestMethod("POST");// connection.setReadTimeout(5000);// // // connection.setDoOutput(true);// 是否输入参数//// // StringBuffer params = new StringBuffer();// // 表单参数与get形式一样// params.append("loginName").append("=").append(loginName).append("&")// .append("password").append("=").append(password);// byte[] bypes = params.toString().getBytes();// connection.getOutputStream().write(bypes);// 输入参数// connection.connect();// // int code = connection.getResponseCode();// // // if (code == 200) {// // jssonid=connection.getHeaderField("Set-Cookie").split(";")[0];// System.out.println("jssonid:"+jssonid);// inStream=connection.getInputStream();// // // }// // // }// // return inStream;// String loginName = "2014094041";// String password = "2014094041";// // // URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");// // // HttpURLConnection conn = (HttpURLConnection) url.openConnection();// conn.setRequestMethod("POST");// 提交模式// // conn.setConnectTimeout(10000);//连接超时 单位毫秒// // conn.setReadTimeout(2000);//读取超时 单位毫秒// conn.setDoOutput(true);// 是否输入参数//// // StringBuffer params = new StringBuffer();// // 表单参数与get形式一样// params.append("loginName").append("=").append(loginName).append("&")// .append("password").append("=").append(password);// byte[] bypes = params.toString().getBytes();// conn.getOutputStream().write(bypes);// 输入参数// InputStream inStream=conn.getInputStream();// // // // // return inStream;// if(jssonid==null){// // // URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");// HttpURLConnection connection = (HttpURLConnection) url.openConnection();// connection.setRequestMethod("POST");// connection.setReadTimeout(5000);// connection.set// connection.connect();// int code = connection.getResponseCode();// if (code == 200) {// // jssonid=connection.getHeaderField("Set-Cookie").split(";")[0];// System.out.println("jssonid:"+jssonid);// // }// // // }// InputStream ins = null;// // // String path = "http://172.16.51.37/personQueryZC_personalDetailQuery.html";// URL url = new URL(path);// HttpURLConnection connection = (HttpURLConnection) url.openConnection();// connection.setRequestMethod("POST");// connection.setRequestProperty("Cookie",jssonid);// // connection.setReadTimeout(5000);// connection.setRequestProperty("loginName", name);// connection.setRequestProperty("password", pwd);//// int code = connection.getResponseCode();// if (code == 200) {// ins = connection.getInputStream();// // }// return ins; } /** * 将流变成字符串 * @param ins * @return * @throws Exception */ public static String streamToString(InputStream ins) throws Exception { StringBuffer sb = new StringBuffer(); byte[] data = new byte[1024]; int len = 0; while ((len = ins.read(data)) != -1) { String ss = new String(data, 0, len, "UTF-8"); sb.append(ss); } return sb.toString(); } public static String zaocaoTable(String strHtml){ String tableHtml; int begin=strHtml.indexOf("<table id=\"dataTable\""); int end=strHtml.indexOf("</table>", strHtml.indexOf("<table id=\"dataTable\"")); tableHtml = strHtml.substring(begin, end)+"\n</table>"; tableHtml = tableHtml.replaceAll(" ", ""); return tableHtml; } /** * 将字符串变成流 * @param str * @return * @throws Exception */ public static InputStream stringToStream(String str) throws Exception { InputStream stream = null; if (str != null && !str.trim().equals("")) { stream = new ByteArrayInputStream(str.getBytes("UTF-8")); } return stream; } /** * 获取dom根节点 * w3cdom解析 * * @param is * @return * @throws Exception */ public static Element getRootNode(InputStream is) throws Exception { //创建Document对象及读取XML文件 DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); //解析XML Document document=null; DocumentBuilder builder = builderFactory.newDocumentBuilder(); document = builder.parse(is); Element rootElement = document.getDocumentElement(); //获取根节点 return rootElement; } /** * 封装到POJO * @param rootNode * @return */ public static List<Record> zaocaoToPoList(Element rootNode){ NodeList trs = rootNode.getElementsByTagName("tr"); List<Record> listRecord=new ArrayList<Record>(); for (int i = 0; i < trs.getLength(); i++) { NodeList tds = trs.item(i).getChildNodes(); List<String> list=new ArrayList<String>(); for (int j = 0; j < tds.getLength(); j++) { if("td".equals(tds.item(j).getNodeName())){ String tdValue= tds.item(j).getTextContent().trim(); list.add(tdValue); } } Record record=new Record(); record.setId(list.get(0)); record.setStudentID(list.get(1)); record.setName(list.get(2)); record.setIDNumber(list.get(3)); record.setTime(list.get(4)); record.setIsValid(list.get(5)); listRecord.add(record); } return listRecord; } public static String zaocaoResult(List<Record> zaocaoPoList) throws Exception{ StringBuffer result=new StringBuffer(); for (int i = 0; i < zaocaoPoList.size(); i++) { //System.out.println("======================="); //result.append("=====================\n"); //System.out.println("记录号:"+zaocaoPoList.get(i).getId()); result.append("记录号:"+zaocaoPoList.get(i).getId()+"\n"); //System.out.println("学号:"+zaocaoPoList.get(i).getStudentID()); result.append("学号:"+zaocaoPoList.get(i).getStudentID()+"\n"); //System.out.println("姓名:"+zaocaoPoList.get(i).getName()); result.append("姓名:"+zaocaoPoList.get(i).getName()+"\n"); //System.out.println("卡号:"+zaocaoPoList.get(i).getIDNumber()); result.append("卡号:"+zaocaoPoList.get(i).getIDNumber()+"\n"); //System.out.println("刷卡时间:"+zaocaoPoList.get(i).getTime()); result.append("刷卡时间:"+zaocaoPoList.get(i).getTime()+"\n"); //System.out.println("是否有效:"+zaocaoPoList.get(i).getIsValid()); result.append("是否有效:"+zaocaoPoList.get(i).getIsValid()+"\n"); //System.out.println("======================="); } return result.toString(); } /** * 通过HttpURLConnection模拟post表单提交 * 网上找的 * @param path * @param params 例如"name=zhangsan&age=21" * @return * @throws Exception */ public static InputStream sendPostRequestByForm(String path, String params) throws Exception{ URL url = new URL(path); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("POST");// 提交模式 // conn.setConnectTimeout(10000);//连接超时 单位毫秒 // conn.setReadTimeout(2000);//读取超时 单位毫秒 conn.setDoOutput(true);// 是否输入参数 byte[] bypes = params.toString().getBytes(); conn.getOutputStream().write(bypes);// 输入参数 InputStream inStream=conn.getInputStream(); return inStream; }}
WebClient
//爬取校内网数据,并上传到微信服务器import java.util.ArrayList;import java.util.List;import org.apache.http.NameValuePair;import org.apache.http.client.HttpClient;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.HttpPost;import org.apache.http.impl.client.DefaultHttpClient;import org.apache.http.message.BasicNameValuePair;import com.gargoylesoftware.htmlunit.BrowserVersion;import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;import com.gargoylesoftware.htmlunit.ScriptResult;import com.gargoylesoftware.htmlunit.WebClient;import com.gargoylesoftware.htmlunit.html.DomElement;import com.gargoylesoftware.htmlunit.html.DomNodeList;import com.gargoylesoftware.htmlunit.html.HtmlElement;import com.gargoylesoftware.htmlunit.html.HtmlPage;public class TestC { public static HtmlPage home_index(String name,String pwd ) throws Exception { WebClient webClient = new WebClient(BrowserVersion.CHROME); webClient.setJavaScriptTimeout(5000); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(true); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.setAjaxController(new NicelyResynchronizingAjaxController()); // WebClient webClient = new WebClient();// 创建WebClient HtmlPage page = webClient .getPage("http://172.16.51.37/user_login.html"); // 打开百度 HtmlElement usernameEle = page.getElementByName("loginName"); HtmlElement passwordEle = (HtmlElement) page.getElementById("password"); usernameEle.focus(); // 设置输入焦点 usernameEle.type(name); // 填写值 passwordEle.focus(); // 设置输入焦点 passwordEle.type(pwd); // 填写值 HtmlElement sub = (HtmlElement) page.getElementsByTagName("input") .item(2); page = sub.click(); //System.out.println(page.asXml()); return page; } public static void main(String[] args) { try { HtmlPage home_index = home_index("2015041001", "2015041001"); StringBuffer sb = new StringBuffer(); ScriptResult zaocao_scriptResult = home_index.executeJavaScript("javascript:f_openTab('tabid_personaldetailZC','早操刷卡明细','personQueryZC_personalDetailQuery.html')"); HtmlPage zaocao_html = (HtmlPage) zaocao_scriptResult.getNewPage(); //System.out.println(zaocao_html.asXml()); DomElement title_Tag = (DomElement) zaocao_html.getElementsByTagName("title").item(0); //System.out.println(textContent); sb.append("title_value:"+title_Tag.getTextContent()); //添加标题 //zaocao_tr_html: 相当于 记录的集合 DomNodeList<HtmlElement> zaocao_tr_html = zaocao_html.getElementById("dataTable").getElementsByTagName("tr"); List<Record> record_list=new ArrayList<>(); for (int i = 0; i < zaocao_tr_html.size(); i++) { //zaocao_td_html: 相当于 某一条记录的属性的集合 DomNodeList<HtmlElement> zaocao_td_html= ((DomElement) zaocao_tr_html.item(i)).getElementsByTagName("td"); List<String> each_str=new ArrayList<>(); for (int j = 0; j < zaocao_td_html.size(); j++) { String each= zaocao_td_html.item(j).getTextContent(); each_str.add(each.trim()); } Record record=new Record(); record.setId(each_str.get(0)); record.setStudentID(each_str.get(1).substring(0, 10)); record.setName(each_str.get(2).substring(0, each_str.get(2).length()-1)); record.setIDNumber(each_str.get(3).substring(0, each_str.get(3).length()-1)); record.setTime(each_str.get(4).substring(0, each_str.get(4).length()-1)); record.setIsValid(each_str.get(5)); record_list.add(record); //String textContent = zaocao_td_html.item(0).getTextContent(); //System.out.println(textContent); } StringBuffer sbs=new StringBuffer(); for (int i = 0; i < record_list.size(); i++) { Record record = record_list.get(i); System.out.println("========================"); System.out.println("记录号:"+record.getId()); System.out.println("学号:"+record.getStudentID()); System.out.println("姓名:"+record.getName()); System.out.println("卡号:"+record.getIDNumber()); System.out.println("刷卡时间:"+record.getTime()); System.out.println("是否有效:"+record.getIsValid()); sbs.append("\n========================"); sbs.append("\n记录号:"+record.getId()); sbs.append("\n学号:"+record.getStudentID()); sbs.append("\n姓名:"+record.getName()); sbs.append("\n卡号:"+record.getIDNumber()); sbs.append("\n刷卡时间:"+record.getTime()); sbs.append("\n是否有效:"+record.getIsValid()); } System.out.println(sbs.toString()); //String zaocao_text = zaocao_html.asText();// String[] temp = zaocao_text.split("。");// String temp2=temp[1].split("页")[0];// // System.out.println(temp2); //String replaceAll = temp2.replaceAll(" ", "--"); //System.out.println(replaceAll); //String[] split = replaceAll.split("--"); //System.out.println("===================="); //System.out.println(split[1].trim()); //System.out.println(split[2].trim()); //System.out.println(split[3].trim());// DomNodeList<DomElement> span = page.getElementsByTagName("span");// // HtmlElement zaocao = (HtmlElement) span.item(3);// // page = zaocao.click();// DomNodeList<DomElement> a = page.getElementsByTagName("a");// // System.out.println(a.item(2).getAttributes().getNamedItem("href"));// // // HtmlElement namedItem = a.item(2).getAttributes().getNamedItem("href");// HtmlAnchor aa=page.// // page=aa.click();// //System.out.println(page); //System.out.println(page.asXml()); HttpClient httpclient = new DefaultHttpClient(); HttpPost httpPost = new HttpPost("http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate"); List<NameValuePair> nvps = new ArrayList<NameValuePair>(); //httpPost.addHeader("Content-Type","text/html;charset=UTF-8"); nvps.add(new BasicNameValuePair("zaocaoData",sbs.toString())); nvps.add(new BasicNameValuePair("student_id", "2014094041")); httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8")); httpclient.execute(httpPost); httpclient.getConnectionManager().shutdown(); } catch (Exception e) { e.printStackTrace(); } }}//==================================================public class Record { private String id; private String studentID; private String name; private String IDNumber; private String time; private String isValid; public String getId() { return id; } public void setId(String id) { this.id = id; } public String getStudentID() { return studentID; } public void setStudentID(String studentID) { this.studentID = studentID; } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getIDNumber() { return IDNumber; } public void setIDNumber(String iDNumber) { IDNumber = iDNumber; } public String getTime() { return time; } public void setTime(String time) { this.time = time; } public String getIsValid() { return isValid; } public void setIsValid(String isValid) { this.isValid = isValid; }}//微信服务器通过 servlet中写 request.getParamer("xxx") 获取数据。
火狐插件 selenium IDE
实现自动化爬虫,
自动生成代码,支持生成为java代码
http://www.cnblogs.com/fnng/archive/2011/10/23/2222157.html
https://addons.mozilla.org/en-US/firefox/addon/selenium-ide-button/
0 0
- 爬取校内网早操刷卡记录
- 刷卡,取每一个卡号的最后三次记录的问题
- 刷卡,取每一个卡号的最后三次记录的问题
- 爬虫实战(1)——爬取校内网招聘信息的名称
- mongodb搭建校内搜索引擎——爬取网页文本
- 刷卡
- 校内网的cookie记录与网络信息存储的安全性
- 校内网怎么样呢?
- “校内网”开放平台
- 校内网开放平台
- 校内网简介
- 校内网按钮样式
- 校内网心理分析
- 校内网如何破解
- 解析校内网邮箱
- 校内网项目
- 校内
- 爬取淘宝交易记录的爬虫
- MyBatis浅谈
- Java面向对象之泛型
- .bat文件详解
- 调制:调幅(AM)与调频(FM)
- 数据库常用命令概括
- 爬取校内网早操刷卡记录
- 文章标题
- 清除svn文件(添加注册表方式)
- 结合进退法和抛物线法的一维搜索程序
- POJ3211Washing Clothes(分组01背包)
- S5PV210之UART介绍
- 特征离散化系列(一)方法综述
- 语法分析1
- 2016面试总结