【Java爬虫】爬取南通大学教务系统成绩计算绩点

来源:互联网 发布:坐久了腰疼 知乎 编辑:程序博客网 时间:2024/05/02 04:28

  以前写过一个python版的,但是想做一个jsp网页版的,就又用Java有写了一下。

  具体地址的分析过程在这里,这里简单说一下HttpCliet的Get,Post方法的使用

           1.Get请求方法

//创建一个浏览器客户端CloseableHttpClient httpClient = HttpClients.createDefault();//要Get的地址String url1="http://www.baidu.com";//创建一个Get请求HttpGet baidu=new HttpGet(url1);//用上面创建的浏览器客户端执行该请求CloseableHttpResponse res=httpClient.execute(baidu);//用响应创建一个http实体并获得输入流HttpEntity he=res.getEntity();InputStream in=he.getContent();//将获得的流写到本地磁盘FileOutputStream out=new FileOutputStream("baidu.html'");byte[] buffer=new byte[1024];int count=-1;while((count=in.read(buffer))!=-1){out.write(buffer, 0, count);}in.close();out.close();

   2.Post请求方法

CloseableHttpClient httpClient = HttpClients.createDefault();String url="http://××××.××××.com?#";//要提交的参数username,passwordList<NameValuePair> list = new ArrayList<NameValuePair>();list.add(new BasicNameValuePair("Username","Name"));list.add(new BasicNameValuePair("Password","××××××"));//转换编码UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8"); //创建Post请求HttpPost httpPost=new HttpPost(url);//为请求设置参数httpPost.setEntity(entity);//获得响应,输入流并写入本地磁盘CloseableHttpResponse res=httpClient.execute(httpPost);HttpEntity he=res.getEntity();InputStream in=he.getContent();FileOutputStream out=new FileOutputStream("××××.×××");byte[] buffer=new byte[1024];int count=-1;while((count=in.read(buffer))!=-1){out.write(buffer, 0, count);}in.close();out.close();

爬虫的完整代码:

import org.apache.http.impl.client.CloseableHttpClient;import org.apache.http.impl.client.HttpClients;import org.apache.http.message.BasicNameValuePair;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.UnsupportedEncodingException;import java.util.ArrayList;import java.util.List;import java.util.Scanner;import java.util.regex.Pattern;import java.util.regex.Matcher;import org.apache.http.HttpEntity;import org.apache.http.NameValuePair;import org.apache.http.client.ClientProtocolException;import org.apache.http.client.entity.UrlEncodedFormEntity;import org.apache.http.client.methods.*;public class spider02 {public static void main(String[] args) throws ClientProtocolException, IOException{@SuppressWarnings("resource")Scanner cin=new Scanner(System.in);doon asd=new doon();asd.getyzm();String yzm=cin.nextLine();//测试String stop="1";while(!stop.equals("#")){stop=cin.nextLine();System.out.println(stop);if(stop.equals("n")){Matcher name=asd.patternname(asd.getname());while(name.find())System.out.println(name.group(1));}if(stop.equals("s")){Matcher score=asd.patternscore(asd.getscore());List<lession>les=asd.workjidian(score);double jdsum=0,xfsum=0;for(int i=0;i<les.size();i++){jdsum+=les.get(i).getKcxfjd();xfsum+=Double.valueOf(les.get(i).getXf()).doubleValue();System.out.println(les.get(i).getKcmc()+"\t"+les.get(i).getZpcj()+"\t"+les.get(i).getXf()+"\t"+les.get(i).getKcxfjd());}System.out.println("所修课程学分:"+xfsum);System.out.println("所修课程学分绩点:"+jdsum);System.out.println("平均学分绩点:"+jdsum/xfsum);}}}}class doon{private CloseableHttpClient httpClient = HttpClients.createDefault();public  void done(String xh,String sfzh,String kl,String yzm) {try {login(xh, sfzh, kl, yzm);//尝试登陆getscore();//获取分数} catch (ClientProtocolException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}public String getname(){String url="http://jwgl.ntu.edu.cn/cjcx/QueryAll.aspx";//获取个人信息位置String information="";//Post请求List<NameValuePair> list=new ArrayList<NameValuePair>();list.add(new BasicNameValuePair("xq","2013-2014-1"));try {UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8");HttpPost post=new HttpPost(url);post.setEntity(entity);CloseableHttpResponse res= httpClient.execute(post);HttpEntity he=res.getEntity();InputStream in=he.getContent();//FileOutputStream out=new FileOutputStream("");byte[] buffer=new byte[1024];int count=-1;while((count=in.read(buffer))!=-1){String inf=new String(buffer,0,count);information+=inf;}in.close();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}return information;}public  void getyzm() throws IOException{//获得验证码并写到本地,Get请求String url1="http://jwgl.ntu.edu.cn/cjcx/checkImage.aspx";//验证码页面HttpGet yzm=new HttpGet(url1);CloseableHttpResponse res=httpClient.execute(yzm);HttpEntity he=res.getEntity();InputStream in=he.getContent();FileOutputStream out=new FileOutputStream("yzm.gif");byte[] buffer=new byte[1024];int count=-1;while((count=in.read(buffer))!=-1){out.write(buffer, 0, count);}in.close();out.close();}public  void login(String xh,String sfzh,String kl,String yzm) throws ClientProtocolException, IOException{//Post请求String url="http://jwgl.ntu.edu.cn/cjcx/Default.aspx";//登录页面List<NameValuePair> list = new ArrayList<NameValuePair>();list.add(new BasicNameValuePair("__VIEWSTATE","/wEPDwUJODExMDE5NzY5ZGRgtUdRucUbXsT8g55XmVsTwV6PMw=="));list.add(new BasicNameValuePair("__VIEWSTATEGENERATOR","6C0FF253"));list.add(new BasicNameValuePair("xh",xh));list.add(new BasicNameValuePair("sfzh",sfzh));list.add(new BasicNameValuePair("kl",kl));list.add(new BasicNameValuePair("yzm",yzm));UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8"); HttpPost httpPost=new HttpPost(url);httpPost.setEntity(entity);CloseableHttpResponse res=httpClient.execute(httpPost);HttpEntity he=res.getEntity();InputStream in=he.getContent();FileOutputStream out=new FileOutputStream("ans.html");byte[] buffer=new byte[1024];int count=-1;while((count=in.read(buffer))!=-1){out.write(buffer, 0, count);}in.close();out.close();}public  String getscore() throws ClientProtocolException, IOException{//Post请求String url="http://jwgl.ntu.edu.cn/cjcx/Data/ScoreAllData.aspx";  //获取分数List<NameValuePair> list = new ArrayList<NameValuePair>();list.add(new BasicNameValuePair("start","0"));list.add(new BasicNameValuePair("pageSize","80"));UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list,"utf-8"); HttpPost httpPost=new HttpPost(url);httpPost.setEntity(entity);CloseableHttpResponse res=httpClient.execute(httpPost);HttpEntity he=res.getEntity();InputStream in=he.getContent();FileOutputStream out=new FileOutputStream("score.html");byte[] buffer=new byte[1024];int count=-1;String save="";while((count=in.read(buffer))!=-1){out.write(buffer, 0, count);String sav=new String(buffer,0,count);save+=sav;}in.close();out.close();return save;}public Matcher patternscore(String score){//用正则表达式匹配成绩String reg="\"kcmc\":\"(.*?)\",\"jsxm\":\"(.*?)\",\"xq\":\"(.*?)\",\"xs\":\"(.*?)\",\"xf\":\"(.*?)\",\"zpcj\":\"(.*?)\",\"pscj\":\"(.*?)\",\"qmcj\":\"(.*?)\",\"kcsx\":\"(.*?)\",\"cjid\":\"(.*?)\",\"ksfsm\":\"(.*?)\",\"pxcj\":\"(.*?)\"}";Pattern p=Pattern.compile(reg);Matcher m=p.matcher(score);return m;}public Matcher patternname(String name){//匹配个人信息String reg="<b>(.*?)</b>";Pattern p=Pattern.compile(reg);Matcher  m=p.matcher(name);return m;}public List<lession> workjidian(Matcher score){//计算绩点List<lession> les=new ArrayList<lession>();while(score.find()){doublexf=0.0;if(score.group(6).equals("优"))//五级计分xf=Double.valueOf(score.group(5)).doubleValue()*4.5; else if(score.group(6).equals("良"))xf=Double.valueOf(score.group(5)).doubleValue()*3.5; else if(score.group(6).equals("中"))xf=Double.valueOf(score.group(5)).doubleValue()*2.5; else if(score.group(6).equals("及格"))xf=Double.valueOf(score.group(5)).doubleValue()*1.5; else if(score.group(6).equals("缓考")||score.group(6).equals("不及格"))continue;else if(Double.valueOf(score.group(6)).doubleValue()>=90)//百分计分xf=((Double.valueOf(score.group(6)).doubleValue()-90)/10+4.0)*Double.valueOf(score.group(5)).doubleValue();else if(Double.valueOf(score.group(6)).doubleValue()>=80&&Double.valueOf(score.group(6)).doubleValue()<=89)xf=((Double.valueOf(score.group(6)).doubleValue()-80)/10+3.0)*Double.valueOf(score.group(5)).doubleValue();else if(Double.valueOf(score.group(6)).doubleValue()>=70&&Double.valueOf(score.group(6)).doubleValue()<=79)xf=((Double.valueOf(score.group(6)).doubleValue()-70)/10+2.0)*Double.valueOf(score.group(5)).doubleValue();else if(Double.valueOf(score.group(6)).doubleValue()>=60&&Double.valueOf(score.group(6)).doubleValue()<=69)xf=((Double.valueOf(score.group(6)).doubleValue()-60)/10+1.0)*Double.valueOf(score.group(5)).doubleValue();else if(Double.valueOf(score.group(6)).doubleValue()<60)continue;les.add(new lession(score.group(1),score.group(2),score.group(3),score.group(4),score.group(5),score.group(6),score.group(7),score.group(8),score.group(9),score.group(10),score.group(11),score.group(12),xf));//System.out.println(score.group(1)+"\t\t\t\t\t\t"+score.group(2)+"\t"//+score.group(5)+"\t"+score.group(6)+"学分"+Double.toString(xf));}return les;}}



1 0
原创粉丝点击