flume组件HTTPSource解析

来源:互联网 发布:aster数据 编辑:程序博客网 时间:2024/05/22 15:27

HTTPSource是flume的一个专门监听http请求的组件,主要负责在机器中打开某个端口,接收日志请求,并将日志发送到chanel中。
HTTPSource的源码如下

package org.apache.flume.source.http;import com.google.common.base.Preconditions;import com.google.common.base.Throwables;import org.apache.flume.ChannelException;import org.apache.flume.Context;import org.apache.flume.Event;import org.apache.flume.EventDrivenSource;import org.apache.flume.conf.Configurable;import org.apache.flume.instrumentation.SourceCounter;import org.apache.flume.source.AbstractSource;import org.apache.flume.tools.HTTPServerConstraintUtil;import org.mortbay.jetty.Connector;import org.mortbay.jetty.Server;import org.mortbay.jetty.nio.SelectChannelConnector;import org.mortbay.jetty.security.SslSocketConnector;import org.mortbay.jetty.servlet.ServletHolder;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import javax.net.ssl.SSLServerSocket;import javax.servlet.http.HttpServlet;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import java.io.IOException;import java.net.ServerSocket;import java.util.*;/** * A source which accepts Flume Events by HTTP POST and GET. GET should be used * for experimentation only. HTTP requests are converted into flume events by a * pluggable "handler" which must implement the * {@linkplain HTTPSourceHandler} interface. This handler takes a * {@linkplain HttpServletRequest} and returns a list of flume events. * * The source accepts the following parameters: <p> <tt>port</tt>: port to which * the server should bind. Mandatory <p> <tt>handler</tt>: the class that * deserializes a HttpServletRequest into a list of flume events. This class * must implement HTTPSourceHandler. Default: * {@linkplain JSONHandler}. <p> <tt>handler.*</tt> Any configuration * to be passed to the handler. <p> * * All events deserialized from one Http request are committed to the channel in * one transaction, thus allowing for increased efficiency on channels like the * file channel. If the handler throws an exception this source will return * a HTTP status of 400. If the channel is full, or the source is unable to * append events to the channel, the source will return a HTTP 503 - Temporarily * unavailable status. * * A JSON handler which converts JSON objects to Flume events is provided. * */public class HTTPSource extends AbstractSource implements        EventDrivenSource, Configurable {  /*   * There are 2 ways of doing this:   * a. Have a static server instance and use connectors in each source   *    which binds to the port defined for that source.   * b. Each source starts its own server instance, which binds to the source's   *    port.   *   * b is more efficient than a because Jetty does not allow binding a   * servlet to a connector. So each request will need to go through each   * each of the handlers/servlet till the correct one is found.   *   */  private static final Logger LOG = LoggerFactory.getLogger(HTTPSource.class);  /**  *监听的端口  */  private volatile Integer port;  /**  *监听器的名字  */  private volatile Server srv;  /**  *监听的地址  */  private volatile String host;  /**  *http的解析器  */  private HTTPSourceHandler handler;  /**  *全局对象  */  private SourceCounter sourceCounter;  // SSL configuration variable  /**  *keyStore文件保存的地址  */  private volatile String keyStorePath;  /**  *keyStore打开的密码  */  private volatile String keyStorePassword;  /**  *是否使用ssl加密  */  private volatile Boolean sslEnabled;  private final List<String> excludedProtocols = new LinkedList<String>();/***初始化配置*/  @Override  public void configure(Context context) {    try {      // SSL related config      //是否使用ssl加密连接      sslEnabled = context.getBoolean(HTTPSourceConfigurationConstants.SSL_ENABLED, false);      //监听的端口      port = context.getInteger(HTTPSourceConfigurationConstants.CONFIG_PORT);      //监听的地址      host = context.getString(HTTPSourceConfigurationConstants.CONFIG_BIND,        HTTPSourceConfigurationConstants.DEFAULT_BIND);     //执行检查      Preconditions.checkState(host != null && !host.isEmpty(),                "HTTPSource hostname specified is empty");      Preconditions.checkNotNull(port, "HTTPSource requires a port number to be"        + " specified");      //获得http的解析类,如果没有使用默认的解析类      String handlerClassName = context.getString(              HTTPSourceConfigurationConstants.CONFIG_HANDLER,              HTTPSourceConfigurationConstants.DEFAULT_HANDLER).trim();      //是否使用ssl加密,如果使用读取keyStore的地址      if(sslEnabled) {        LOG.debug("SSL configuration enabled");        keyStorePath = context.getString(HTTPSourceConfigurationConstants.SSL_KEYSTORE);        Preconditions.checkArgument(keyStorePath != null && !keyStorePath.isEmpty(),                                        "Keystore is required for SSL Conifguration" );        keyStorePassword = context.getString(HTTPSourceConfigurationConstants.SSL_KEYSTORE_PASSWORD);        Preconditions.checkArgument(keyStorePassword != null,          "Keystore password is required for SSL Configuration");        String excludeProtocolsStr = context.getString(HTTPSourceConfigurationConstants          .EXCLUDE_PROTOCOLS);        if (excludeProtocolsStr == null) {          excludedProtocols.add("SSLv3");        } else {          excludedProtocols.addAll(Arrays.asList(excludeProtocolsStr.split(" ")));          if (!excludedProtocols.contains("SSLv3")) {            excludedProtocols.add("SSLv3");          }        }      }      @SuppressWarnings("unchecked")      /**      *实例化http解析类      */      Class<? extends HTTPSourceHandler> clazz =              (Class<? extends HTTPSourceHandler>)              Class.forName(handlerClassName);      handler = clazz.getDeclaredConstructor().newInstance();      //ref: http://docs.codehaus.org/display/JETTY/Embedding+Jetty      //ref: http://jetty.codehaus.org/jetty/jetty-6/apidocs/org/mortbay/jetty/servlet/Context.html      Map<String, String> subProps =              context.getSubProperties(              HTTPSourceConfigurationConstants.CONFIG_HANDLER_PREFIX);      handler.configure(new Context(subProps));    } catch (ClassNotFoundException ex) {      LOG.error("Error while configuring HTTPSource. Exception follows.", ex);      Throwables.propagate(ex);    } catch (ClassCastException ex) {      LOG.error("Deserializer is not an instance of HTTPSourceHandler."              + "Deserializer must implement HTTPSourceHandler.");      Throwables.propagate(ex);    } catch (Exception ex) {      LOG.error("Error configuring HTTPSource!", ex);      Throwables.propagate(ex);    }    if (sourceCounter == null) {      sourceCounter = new SourceCounter(getName());    }  }  private void checkHostAndPort() {    Preconditions.checkState(host != null && !host.isEmpty(),      "HTTPSource hostname specified is empty");    Preconditions.checkNotNull(port, "HTTPSource requires a port number to be"      + " specified");  }/***开始监听*/  @Override  public void start() {    Preconditions.checkState(srv == null,            "Running HTTP Server found in source: " + getName()            + " before I started one."            + "Will not attempt to start.");    /**    *实例化一个jetty服务器对象    */    srv = new Server();    // Connector Array    //实例化一个监听请求地址    Connector[] connectors = new Connector[1];    //是否打开ssl连接    if (sslEnabled) {      SslSocketConnector sslSocketConnector = new HTTPSourceSocketConnector(excludedProtocols);      sslSocketConnector.setKeystore(keyStorePath);      sslSocketConnector.setKeyPassword(keyStorePassword);      sslSocketConnector.setReuseAddress(true);      connectors[0] = sslSocketConnector;    } else {      SelectChannelConnector connector = new SelectChannelConnector();      connector.setReuseAddress(true);      connectors[0] = connector;    }    //设置监听的域名    connectors[0].setHost(host);    //设置监听的端口    connectors[0].setPort(port);    srv.setConnectors(connectors);    try {      org.mortbay.jetty.servlet.Context root =        new org.mortbay.jetty.servlet.Context(          srv, "/", org.mortbay.jetty.servlet.Context.SESSIONS);          //设置servlet对象      root.addServlet(new ServletHolder(new FlumeHTTPServlet()), "/");      HTTPServerConstraintUtil.enforceConstraints(root);      srv.start();      Preconditions.checkArgument(srv.getHandler().equals(root));    } catch (Exception ex) {      LOG.error("Error while starting HTTPSource. Exception follows.", ex);      Throwables.propagate(ex);    }    Preconditions.checkArgument(srv.isRunning());    sourceCounter.start();    super.start();  }//停止监听  @Override  public void stop() {    try {      srv.stop();      srv.join();      srv = null;    } catch (Exception ex) {      LOG.error("Error while stopping HTTPSource. Exception follows.", ex);    }    sourceCounter.stop();    LOG.info("Http source {} stopped. Metrics: {}", getName(), sourceCounter);  }//实际监听处理类  private class FlumeHTTPServlet extends HttpServlet {    private static final long serialVersionUID = 4891924863218790344L;    @Override    public void doPost(HttpServletRequest request, HttpServletResponse response)            throws IOException {      List<Event> events = Collections.emptyList(); //create empty list      try {      //处理监听取得的对象        events = handler.getEvents(request);      } catch (HTTPBadRequestException ex) {        LOG.warn("Received bad request from client. ", ex);        response.sendError(HttpServletResponse.SC_BAD_REQUEST,                "Bad request from client. "                + ex.getMessage());        return;      } catch (Exception ex) {        LOG.warn("Deserializer threw unexpected exception. ", ex);        response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,                "Deserializer threw unexpected exception. "                + ex.getMessage());        return;      }      sourceCounter.incrementAppendBatchReceivedCount();      sourceCounter.addToEventReceivedCount(events.size());      try {      //将取得的处理结果放入chanel中,交给下一个节点        getChannelProcessor().processEventBatch(events);      } catch (ChannelException ex) {        LOG.warn("Error appending event to channel. "                + "Channel might be full. Consider increasing the channel "                + "capacity or make sure the sinks perform faster.", ex);        response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE,                "Error appending event to channel. Channel might be full."                + ex.getMessage());        return;      } catch (Exception ex) {        LOG.warn("Unexpected error appending event to channel. ", ex);        response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,                "Unexpected error while appending event to channel. "                + ex.getMessage());        return;      }      //设置返回给客户端的消息      response.setCharacterEncoding(request.getCharacterEncoding());      response.setStatus(HttpServletResponse.SC_OK);      response.flushBuffer();      sourceCounter.incrementAppendBatchAcceptedCount();      sourceCounter.addToEventAcceptedCount(events.size());    }    @Override    public void doGet(HttpServletRequest request, HttpServletResponse response)            throws IOException {      doPost(request, response);    }  }  private static class HTTPSourceSocketConnector extends SslSocketConnector {    private final List<String> excludedProtocols;    HTTPSourceSocketConnector(List<String> excludedProtocols) {      this.excludedProtocols = excludedProtocols;    }    @Override    public ServerSocket newServerSocket(String host, int port,      int backlog) throws IOException {      SSLServerSocket socket = (SSLServerSocket)super.newServerSocket(host,        port, backlog);      String[] protocols = socket.getEnabledProtocols();      List<String> newProtocols = new ArrayList<String>(protocols.length);      for(String protocol: protocols) {        if (!excludedProtocols.contains(protocol)) {          newProtocols.add(protocol);        }      }      socket.setEnabledProtocols(        newProtocols.toArray(new String[newProtocols.size()]));      return socket;    }  }}

HTTPSource默认配置保存在HTTPSourceConfigurationConstants

package org.apache.flume.source.http;/** * */public class HTTPSourceConfigurationConstants {  public static final String CONFIG_PORT = "port";  public static final String CONFIG_HANDLER = "handler";  public static final String CONFIG_HANDLER_PREFIX =          CONFIG_HANDLER + ".";  public static final String CONFIG_BIND = "bind";  public static final String DEFAULT_BIND = "0.0.0.0";  public static final String DEFAULT_HANDLER =          "org.apache.flume.source.http.JSONHandler";  public static final String SSL_KEYSTORE = "keystore";  public static final String SSL_KEYSTORE_PASSWORD = "keystorePassword";  public static final String SSL_ENABLED = "enableSSL";  public static final String EXCLUDE_PROTOCOLS = "excludeProtocols";}

HTTPSourceHandler是httpsource的默认接口,只需要重写getEvents(HttpServletRequest request)方法即可

package org.apache.flume.source.http;import java.util.List;import javax.servlet.http.HttpServletRequest;import org.apache.flume.Event;import org.apache.flume.conf.Configurable;/** * */public interface HTTPSourceHandler extends Configurable {  /**   * Takes an {@linkplain HttpServletRequest} and returns a list of Flume   * Events. If this request cannot be parsed into Flume events based on the   * format this method will throw an exception. This method may also throw an   * exception if there is some sort of other error. <p>   *   * @param request The request to be parsed into Flume events.   * @return List of Flume events generated from the request.   * @throws HTTPBadRequestException If the was not parsed correctly into an   * event because the request was not in the expected format.   * @throws Exception If there was an unexpected error.   */  public List<Event> getEvents(HttpServletRequest request) throws          HTTPBadRequestException, Exception;}

HTTPSourceHandler 的默认实现类是JSONHandler

package org.apache.flume.source.http;import com.google.gson.Gson;import com.google.gson.GsonBuilder;import com.google.gson.JsonSyntaxException;import com.google.gson.reflect.TypeToken;import java.io.BufferedReader;import java.lang.reflect.Type;import java.nio.charset.UnsupportedCharsetException;import java.util.ArrayList;import java.util.List;import javax.servlet.http.HttpServletRequest;import org.apache.flume.Context;import org.apache.flume.Event;import org.apache.flume.event.EventBuilder;import org.apache.flume.event.JSONEvent;import org.slf4j.Logger;import org.slf4j.LoggerFactory;/** * * JSONHandler for HTTPSource that accepts an array of events. * * This handler throws exception if the deserialization fails because of bad * format or any other reason. * * * Each event must be encoded as a map with two key-value pairs. <p> 1. headers * - the key for this key-value pair is "headers". The value for this key is * another map, which represent the event headers. These headers are inserted * into the Flume event as is. <p> 2. body - The body is a string which * represents the body of the event. The key for this key-value pair is "body". * All key-value pairs are considered to be headers. An example: <p> [{"headers" * : {"a":"b", "c":"d"},"body": "random_body"}, {"headers" : {"e": "f"},"body": * "random_body2"}] <p> would be interpreted as the following two flume events: * <p> * Event with body: "random_body" (in UTF-8/UTF-16/UTF-32 encoded bytes) * and headers : (a:b, c:d) <p> * * Event with body: "random_body2" (in UTF-8/UTF-16/UTF-32 encoded bytes) and * headers : (e:f) <p> * * The charset of the body is read from the request and used. If no charset is * set in the request, then the charset is assumed to be JSON's default - UTF-8. * The JSON handler supports UTF-8, UTF-16 and UTF-32. * * To set the charset, the request must have content type specified as * "application/json; charset=UTF-8" (replace UTF-8 with UTF-16 or UTF-32 as * required). * * One way to create an event in the format expected by this handler, is to * use {@linkplain JSONEvent} and use {@linkplain Gson} to create the JSON * string using the * {@linkplain Gson#toJson(java.lang.Object, java.lang.reflect.Type) } * method. The type token to pass as the 2nd argument of this method * for list of events can be created by: <p> * * Type type = new TypeToken<List<JSONEvent>>() {}.getType(); <p> * */public class JSONHandler implements HTTPSourceHandler {  private static final Logger LOG = LoggerFactory.getLogger(JSONHandler.class);  private final Type listType =          new TypeToken<List<JSONEvent>>() {          }.getType();  private final Gson gson;  public JSONHandler() {    gson = new GsonBuilder().disableHtmlEscaping().create();  }  /**   * {@inheritDoc}   */  @Override  public List<Event> getEvents(HttpServletRequest request) throws Exception {    BufferedReader reader = request.getReader();    String charset = request.getCharacterEncoding();    //UTF-8 is default for JSON. If no charset is specified, UTF-8 is to    //be assumed.    if (charset == null) {      LOG.debug("Charset is null, default charset of UTF-8 will be used.");      charset = "UTF-8";    } else if (!(charset.equalsIgnoreCase("utf-8")            || charset.equalsIgnoreCase("utf-16")            || charset.equalsIgnoreCase("utf-32"))) {      LOG.error("Unsupported character set in request {}. "              + "JSON handler supports UTF-8, "              + "UTF-16 and UTF-32 only.", charset);      throw new UnsupportedCharsetException("JSON handler supports UTF-8, "              + "UTF-16 and UTF-32 only.");    }    /*     * Gson throws Exception if the data is not parseable to JSON.     * Need not catch it since the source will catch it and return error.     */    List<Event> eventList = new ArrayList<Event>(0);    try {      eventList = gson.fromJson(reader, listType);    } catch (JsonSyntaxException ex) {      throw new HTTPBadRequestException("Request has invalid JSON Syntax.", ex);    }    for (Event e : eventList) {      ((JSONEvent) e).setCharset(charset);    }    return getSimpleEvents(eventList);  }  @Override  public void configure(Context context) {  }  private List<Event> getSimpleEvents(List<Event> events) {    List<Event> newEvents = new ArrayList<Event>(events.size());    for(Event e:events) {      newEvents.add(EventBuilder.withBody(e.getBody(), e.getHeaders()));    }    return newEvents;  }}

source和chanel交换数据的默认类型是Event接口

package org.apache.flume;import java.util.Map;/** * Basic representation of a data object in Flume. * Provides access to data as it flows through the system. */public interface Event {  /**   * Returns a map of name-value pairs describing the data stored in the body.   */  public Map<String, String> getHeaders();  /**   * Set the event headers   * @param headers Map of headers to replace the current headers.   */  public void setHeaders(Map<String, String> headers);  /**   * Returns the raw byte array of the data contained in this event.   */  public byte[] getBody();  /**   * Sets the raw byte array of the data contained in this event.   * @param body The data.   */  public void setBody(byte[] body);}

httpsouce默认的Event实现类是JSONEvent

package org.apache.flume.event;import java.io.UnsupportedEncodingException;import java.util.Map;import org.apache.flume.Event;import org.apache.flume.FlumeException;/** * */public class JSONEvent implements Event{  private Map<String, String> headers;  private String body;  private transient String charset = "UTF-8";  @Override  public Map<String, String> getHeaders() {    return headers;  }  @Override  public void setHeaders(Map<String, String> headers) {    this.headers = headers;  }  @Override  public byte[] getBody() {    if(body != null) {      try {        return body.getBytes(charset);      } catch (UnsupportedEncodingException ex) {        throw new FlumeException(String.format("%s encoding not supported", charset), ex);      }    } else {      return new byte[0];    }  }  @Override  public void setBody(byte[] body) {    if(body != null) {      this.body = new String(body);    } else {      this.body = "";    }  }  public void setCharset(String charset) {    this.charset = charset;  }}

接口source是所有source必须实现的接口,它继承至接口LifecycleAware, NamedComponent

package org.apache.flume;import org.apache.flume.annotations.InterfaceAudience;import org.apache.flume.annotations.InterfaceStability;import org.apache.flume.channel.ChannelProcessor;import org.apache.flume.lifecycle.LifecycleAware;/** * <p> * A source generates {@plainlink Event events} and calls methods on the * configured {@link ChannelProcessor} to persist those events into the * configured {@linkplain Channel channels}. * </p> * * <p> * Sources are associated with unique {@linkplain NamedComponent names} that can * be used for separating configuration and working namespaces. * </p> * * <p> * No guarantees are given regarding thread safe access. * </p> * * @see org.apache.flume.Channel * @see org.apache.flume.Sink */@InterfaceAudience.Public@InterfaceStability.Stablepublic interface Source extends LifecycleAware, NamedComponent {  /**   * Specifies which channel processor will handle this source's events.   *   * @param channelProcessor   */  public void setChannelProcessor(ChannelProcessor channelProcessor);  /**   * Returns the channel processor that will handle this source's events.   */  public ChannelProcessor getChannelProcessor();}

使用httpsource可以很方便的满足客户监听日志的需求,只需要实现接口HTTPSourceHandler即可,所以如果想快速开发就可以使用httpsource来完成日常的开发!

0 0
原创粉丝点击