Java网络爬虫crawler4j学习笔记<24> PageFetchResult类

来源:互联网 发布:java 线程局部变量 编辑:程序博客网 时间:2024/05/21 05:07

源代码

package edu.uci.ics.crawler4j.fetcher;import java.io.EOFException;import java.io.IOException;import org.apache.http.Header;import org.apache.http.HttpEntity;import org.apache.http.util.EntityUtils;import edu.uci.ics.crawler4j.crawler.Page;import org.slf4j.Logger;import org.slf4j.LoggerFactory;/** * @author Yasser Ganjisaffar [lastname at gmail dot com] */// 使用httpclient包fetch page之后存储的结果public class PageFetchResult {  protected static final Logger logger = LoggerFactory.getLogger(PageFetchResult.class);  protected int statusCode;     // 状态码  protected HttpEntity entity = null;   // httpEntity对象  protected Header[] responseHeaders = null;    // 响应消息头  protected String fetchedUrl = null;   // url链接  protected String movedToUrl = null;  public int getStatusCode() {    return statusCode;  }  public void setStatusCode(int statusCode) {    this.statusCode = statusCode;  }  public HttpEntity getEntity() {    return entity;  }  public void setEntity(HttpEntity entity) {    this.entity = entity;  }  public Header[] getResponseHeaders() {    return responseHeaders;  }  public void setResponseHeaders(Header[] responseHeaders) {    this.responseHeaders = responseHeaders;  }  public String getFetchedUrl() {    return fetchedUrl;  }  public void setFetchedUrl(String fetchedUrl) {    this.fetchedUrl = fetchedUrl;  }  public boolean fetchContent(Page page) {    try {      // 将fetch后的结果解析转换成page对象        page.load(entity);      page.setFetchResponseHeaders(responseHeaders);      return true;    } catch (Exception e) {      logger.info("Exception while fetching content for: {} [{}]", page.getWebURL().getURL(), e.getMessage());    }    return false;  }  // 忽略content,不进行处理  public void discardContentIfNotConsumed() {    try {      if (entity != null) {        EntityUtils.consume(entity);      }    } catch (IOException e) {      // We can EOFException (extends IOException) exception. It can happen on compressed streams which are not repeatable      // We can ignore this exception. It can happen if the stream is closed.    } catch (Exception e) {      logger.warn("Unexpected error occurred while trying to discard content", e);    }  }  public String getMovedToUrl() {    return movedToUrl;  }  public void setMovedToUrl(String movedToUrl) {    this.movedToUrl = movedToUrl;  }}
0 0