java爬虫之基于httpclient的简单Demo(二)
来源:互联网 发布:种子文件下载软件 编辑:程序博客网 时间:2024/05/22 15:41
延续demo1的 java爬虫的2种爬取方式(HTTP||Socket)简单Demo(一),demo2出炉啦,大家想学爬虫都可以从这个网盘学习哦:https://pan.baidu.com/s/1pJJrcqJ#list/path=%2F
免费课程,非常不错。其实还是主要学习一个httpclient,httpclient全是英文文档,看的我心累啊
package com.simple.crawImpl;import com.simple.Icrawl.ICrawl;import com.simple.pojos.CrawlResultPojo;import com.simple.pojos.UrlPojo;import org.apache.http.HttpEntity;import org.apache.http.ParseException;import org.apache.http.client.ClientProtocolException;import org.apache.http.client.methods.CloseableHttpResponse;import org.apache.http.client.methods.HttpGet;import org.apache.http.client.methods.HttpUriRequest;import org.apache.http.client.methods.RequestBuilder;import org.apache.http.impl.client.CloseableHttpClient;import org.apache.http.impl.client.HttpClients;import java.io.BufferedReader;import java.io.IOException;import java.io.InputStreamReader;import java.net.URI;import java.net.URISyntaxException;import java.util.HashMap;import java.util.Map;import java.util.Map.Entry;/** * * Created by lewis on 2016/10/16. */public class HttpClientCrawlerImpl implements ICrawl{ public CloseableHttpClient httpClient = HttpClients.custom().build(); //创建定制HttpClient @Override public CrawlResultPojo crawl(UrlPojo urlpojo) { if(urlpojo==null){ return null; } CrawlResultPojo crawlResultPojo = new CrawlResultPojo(); //结果集 CloseableHttpResponse response = null; //HTTP返回的各种信息集合,包含协议http标准,httpcode状态码 BufferedReader br = null; // try { HttpGet httpGet = new HttpGet(urlpojo.getUrl()); response = httpClient.execute(httpGet); HttpEntity entity = response.getEntity(); //获取输入流 InputStreamReader isr = new InputStreamReader(entity.getContent(),"utf-8"); //字节流转化为字符流,设置编码 br =new BufferedReader(isr); String line =null; StringBuilder context = new StringBuilder(); while((line=br.readLine())!=null){ context.append(line+"\n"); } crawlResultPojo.setSuccess(true); crawlResultPojo.setPageContent(context.toString()); return crawlResultPojo; } catch (IOException e) { e.printStackTrace(); crawlResultPojo.setSuccess(false); }finally { try { if (br!=null) br.close(); //关闭流 if(response!=null) response.close(); } catch (IOException e) { e.printStackTrace(); } } return crawlResultPojo; } /** * 带参数post的urlpojo * */ public CrawlResultPojo crawl4Post(UrlPojo urlPojo){ if(urlPojo==null||urlPojo.getUrl()==null){ return null; } CrawlResultPojo crawlResultPojo = new CrawlResultPojo(); BufferedReader br= null; try { RequestBuilder rb = RequestBuilder.post().setUri(new URI(urlPojo.getUrl())); Map<String,Object> parasMap = urlPojo.getParasMap() ; if(parasMap!=null){ for(Entry<String,Object> entry:parasMap.entrySet()){ rb.addParameter(entry.getKey(),entry.getValue().toString()); } } HttpUriRequest httpUriRequest = rb.build(); HttpEntity entity =httpClient.execute(httpUriRequest).getEntity(); InputStreamReader isr=new InputStreamReader(entity.getContent(),"utf-8"); br = new BufferedReader(isr); String line = null; StringBuilder stringBuilder = new StringBuilder(); while((line=br.readLine())!=null){ stringBuilder.append(line+"\n"); } crawlResultPojo.setPageContent(stringBuilder.toString()); crawlResultPojo.setSuccess(true); return crawlResultPojo; } catch (URISyntaxException e) { e.printStackTrace(); } catch (ClientProtocolException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if(br!=null) br.close(); } catch (IOException e) { e.printStackTrace(); } } crawlResultPojo.setSuccess(false); return crawlResultPojo; } public static void main(String []args){ HttpClientCrawlerImpl httpClientCrawlerImpl = new HttpClientCrawlerImpl(); String url = "http://www.wangdaizhijia.com/front_select-plat"; UrlPojo urlPojo = new UrlPojo(url); Map<String, Object> parasMap = new HashMap<String, Object>(); int max_page_number = 1000; parasMap.put("currPage", 30); parasMap.put("params", ""); parasMap.put("sort", 0); urlPojo.setParasMap(parasMap); CrawlResultPojo resultPojo = httpClientCrawlerImpl.crawl4Post(urlPojo); print(resultPojo); resultPojo=httpClientCrawlerImpl.crawl(urlPojo); print(resultPojo); } public static void print(Object s){ System.out.println(s); }}
0 0
- java爬虫之基于httpclient的简单Demo(二)
- HttpClient的Demo(二)
- 基于selenium的简单Java爬虫
- webservice的简单demo(基于Java)
- Java爬虫学习:利用HttpClient和Jsoup库实现简单的Java爬虫程序
- Python爬虫简单的demo
- 关于使用Java实现的简单网络爬虫Demo
- 关于使用Java实现的简单网络爬虫Demo
- 关于使用Java实现的简单网络爬虫Demo
- HttpClient实现简单的网络爬虫功能
- Scala结合HttpClient实现简单的爬虫
- JAVA爬虫初识之httpclient与jsoup
- JAVA爬虫初识之httpclient与jsoup
- 安卓HttpClient请求的简单Demo
- HttpClient简单爬虫
- 网络爬虫之httpclient的使用
- 基于scrapy的简单爬虫
- java HttpClient 的get使用的Demo
- 浙大数据结构基础4-1 Level-order Traversal (10分)
- 更改网络ip后如何重新配置Oracle数据库监听
- Codeforces Round #377(Div. 2)D.Exams(二分+思维)
- USB设备多个配置描述符的获取过程
- python自学笔记15之实例之绘图、dataframe操作、读写csv,excle
- java爬虫之基于httpclient的简单Demo(二)
- 通过md5值检查文件信息是否相同
- Python3基础(六) 深入list列表
- 数据结构基础:线性表的应用(1)
- Python中函数的参数传递与可变长参数
- 初识caffe
- 小程序WXML之模板
- Flask用reqparse实现post方法时,parser.parse_args()的值为None的解决办法
- 死锁以及避免死锁