httpclient+jsoup抓取数据

来源:互联网 发布:javascript 矩阵运算 编辑:程序博客网 时间:2024/05/03 05:39
post方式:
importorg.apache.http.HttpEntity;
importorg.apache.http.HttpResponse;
importorg.apache.http.NameValuePair;
importorg.apache.http.client.HttpClient;
importorg.apache.http.client.entity.UrlEncodedFormEntity;
importorg.apache.http.client.methods.HttpPost;
importorg.apache.http.impl.client.HttpClients;
importorg.apache.http.message.BasicNameValuePair;
importorg.apache.http.util.EntityUtils;
importorg.jsoup.Jsoup;
importorg.jsoup.nodes.Document;
importorg.jsoup.select.Elements;

importjava.io.IOException;
importjava.util.ArrayList;
importjava.util.List;

/**
* Created by chl on 2017/7/28.
*/
public classCatchDataUtils {

public staticInteger catchData(String url){
intnoConsumeNum =0;
try{

//创建client实例
HttpClient client = HttpClients.createDefault();
//创建httpget实例
HttpPost httpPost =newHttpPost(url);
List<NameValuePair> list =newArrayList<NameValuePair>();
list.add(newBasicNameValuePair("groupName","memberGroup"));
UrlEncodedFormEntity entity =newUrlEncodedFormEntity(list,"UTF-8");
httpPost.setEntity(entity);
//执行get请求
HttpResponse response = client.execute(httpPost);
String result ="";
if(response !=null) {
HttpEntity resEntity = response.getEntity();
if(resEntity != null){
result = EntityUtils.toString(resEntity,"UTF-8");
}
Document doc = Jsoup.parse(result);
Elements ps=doc.select("p");//选择器,选取特征信息
String data = ps.get(1).toString();
noConsumeNum = Integer.valueOf(data.substring(data.indexOf("=")+1,data.lastIndexOf(":")));
}

}catch(IOException e) {
e.printStackTrace();
}
returnnoConsumeNum;
}
}

get方式:
  1. public class StockUtils {  
  2.     //第一次获取网页源码  
  3.     public static String getHtmlByUrl(String url) throws IOException{    
  4.         String html = null;    
  5.         CloseableHttpClient httpClient = HttpClients.createDefault();//创建httpClient对象     
  6.         HttpGet httpget = new HttpGet(url);  
  7.         try {    
  8.             HttpResponse responce = httpClient.execute(httpget);  
  9.             int resStatu = responce.getStatusLine().getStatusCode();  
  10.             if (resStatu == HttpStatus.SC_OK) {  
  11.                     
  12.                 HttpEntity entity = responce.getEntity();    
  13.                 if (entity != null) {    
  14.                     html = EntityUtils.toString(entity);//获得html源代码  
  15.                 }    
  16.             }    
  17.         } catch (Exception e) {  
  18.             System.out.println("访问【"+url+"】出现异常!");    
  19.             e.printStackTrace();    
  20.         } finally {  
  21.             //释放连接  
  22.             httpClient.close();    
  23.         }    
  24.         return html;    
  25.     }    
  26. }  
Maven依赖:

<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>

<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.2</version>
</dependency>

<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.3</version>
</dependency>

<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.2</version>
</dependency>
原创粉丝点击