flume组件HTTPSource解析
来源:互联网 发布:aster数据 编辑:程序博客网 时间:2024/05/22 15:27
HTTPSource是flume的一个专门监听http请求的组件,主要负责在机器中打开某个端口,接收日志请求,并将日志发送到chanel中。
HTTPSource的源码如下
package org.apache.flume.source.http;import com.google.common.base.Preconditions;import com.google.common.base.Throwables;import org.apache.flume.ChannelException;import org.apache.flume.Context;import org.apache.flume.Event;import org.apache.flume.EventDrivenSource;import org.apache.flume.conf.Configurable;import org.apache.flume.instrumentation.SourceCounter;import org.apache.flume.source.AbstractSource;import org.apache.flume.tools.HTTPServerConstraintUtil;import org.mortbay.jetty.Connector;import org.mortbay.jetty.Server;import org.mortbay.jetty.nio.SelectChannelConnector;import org.mortbay.jetty.security.SslSocketConnector;import org.mortbay.jetty.servlet.ServletHolder;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import javax.net.ssl.SSLServerSocket;import javax.servlet.http.HttpServlet;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import java.io.IOException;import java.net.ServerSocket;import java.util.*;/** * A source which accepts Flume Events by HTTP POST and GET. GET should be used * for experimentation only. HTTP requests are converted into flume events by a * pluggable "handler" which must implement the * {@linkplain HTTPSourceHandler} interface. This handler takes a * {@linkplain HttpServletRequest} and returns a list of flume events. * * The source accepts the following parameters: <p> <tt>port</tt>: port to which * the server should bind. Mandatory <p> <tt>handler</tt>: the class that * deserializes a HttpServletRequest into a list of flume events. This class * must implement HTTPSourceHandler. Default: * {@linkplain JSONHandler}. <p> <tt>handler.*</tt> Any configuration * to be passed to the handler. <p> * * All events deserialized from one Http request are committed to the channel in * one transaction, thus allowing for increased efficiency on channels like the * file channel. If the handler throws an exception this source will return * a HTTP status of 400. If the channel is full, or the source is unable to * append events to the channel, the source will return a HTTP 503 - Temporarily * unavailable status. * * A JSON handler which converts JSON objects to Flume events is provided. * */public class HTTPSource extends AbstractSource implements EventDrivenSource, Configurable { /* * There are 2 ways of doing this: * a. Have a static server instance and use connectors in each source * which binds to the port defined for that source. * b. Each source starts its own server instance, which binds to the source's * port. * * b is more efficient than a because Jetty does not allow binding a * servlet to a connector. So each request will need to go through each * each of the handlers/servlet till the correct one is found. * */ private static final Logger LOG = LoggerFactory.getLogger(HTTPSource.class); /** *监听的端口 */ private volatile Integer port; /** *监听器的名字 */ private volatile Server srv; /** *监听的地址 */ private volatile String host; /** *http的解析器 */ private HTTPSourceHandler handler; /** *全局对象 */ private SourceCounter sourceCounter; // SSL configuration variable /** *keyStore文件保存的地址 */ private volatile String keyStorePath; /** *keyStore打开的密码 */ private volatile String keyStorePassword; /** *是否使用ssl加密 */ private volatile Boolean sslEnabled; private final List<String> excludedProtocols = new LinkedList<String>();/***初始化配置*/ @Override public void configure(Context context) { try { // SSL related config //是否使用ssl加密连接 sslEnabled = context.getBoolean(HTTPSourceConfigurationConstants.SSL_ENABLED, false); //监听的端口 port = context.getInteger(HTTPSourceConfigurationConstants.CONFIG_PORT); //监听的地址 host = context.getString(HTTPSourceConfigurationConstants.CONFIG_BIND, HTTPSourceConfigurationConstants.DEFAULT_BIND); //执行检查 Preconditions.checkState(host != null && !host.isEmpty(), "HTTPSource hostname specified is empty"); Preconditions.checkNotNull(port, "HTTPSource requires a port number to be" + " specified"); //获得http的解析类,如果没有使用默认的解析类 String handlerClassName = context.getString( HTTPSourceConfigurationConstants.CONFIG_HANDLER, HTTPSourceConfigurationConstants.DEFAULT_HANDLER).trim(); //是否使用ssl加密,如果使用读取keyStore的地址 if(sslEnabled) { LOG.debug("SSL configuration enabled"); keyStorePath = context.getString(HTTPSourceConfigurationConstants.SSL_KEYSTORE); Preconditions.checkArgument(keyStorePath != null && !keyStorePath.isEmpty(), "Keystore is required for SSL Conifguration" ); keyStorePassword = context.getString(HTTPSourceConfigurationConstants.SSL_KEYSTORE_PASSWORD); Preconditions.checkArgument(keyStorePassword != null, "Keystore password is required for SSL Configuration"); String excludeProtocolsStr = context.getString(HTTPSourceConfigurationConstants .EXCLUDE_PROTOCOLS); if (excludeProtocolsStr == null) { excludedProtocols.add("SSLv3"); } else { excludedProtocols.addAll(Arrays.asList(excludeProtocolsStr.split(" "))); if (!excludedProtocols.contains("SSLv3")) { excludedProtocols.add("SSLv3"); } } } @SuppressWarnings("unchecked") /** *实例化http解析类 */ Class<? extends HTTPSourceHandler> clazz = (Class<? extends HTTPSourceHandler>) Class.forName(handlerClassName); handler = clazz.getDeclaredConstructor().newInstance(); //ref: http://docs.codehaus.org/display/JETTY/Embedding+Jetty //ref: http://jetty.codehaus.org/jetty/jetty-6/apidocs/org/mortbay/jetty/servlet/Context.html Map<String, String> subProps = context.getSubProperties( HTTPSourceConfigurationConstants.CONFIG_HANDLER_PREFIX); handler.configure(new Context(subProps)); } catch (ClassNotFoundException ex) { LOG.error("Error while configuring HTTPSource. Exception follows.", ex); Throwables.propagate(ex); } catch (ClassCastException ex) { LOG.error("Deserializer is not an instance of HTTPSourceHandler." + "Deserializer must implement HTTPSourceHandler."); Throwables.propagate(ex); } catch (Exception ex) { LOG.error("Error configuring HTTPSource!", ex); Throwables.propagate(ex); } if (sourceCounter == null) { sourceCounter = new SourceCounter(getName()); } } private void checkHostAndPort() { Preconditions.checkState(host != null && !host.isEmpty(), "HTTPSource hostname specified is empty"); Preconditions.checkNotNull(port, "HTTPSource requires a port number to be" + " specified"); }/***开始监听*/ @Override public void start() { Preconditions.checkState(srv == null, "Running HTTP Server found in source: " + getName() + " before I started one." + "Will not attempt to start."); /** *实例化一个jetty服务器对象 */ srv = new Server(); // Connector Array //实例化一个监听请求地址 Connector[] connectors = new Connector[1]; //是否打开ssl连接 if (sslEnabled) { SslSocketConnector sslSocketConnector = new HTTPSourceSocketConnector(excludedProtocols); sslSocketConnector.setKeystore(keyStorePath); sslSocketConnector.setKeyPassword(keyStorePassword); sslSocketConnector.setReuseAddress(true); connectors[0] = sslSocketConnector; } else { SelectChannelConnector connector = new SelectChannelConnector(); connector.setReuseAddress(true); connectors[0] = connector; } //设置监听的域名 connectors[0].setHost(host); //设置监听的端口 connectors[0].setPort(port); srv.setConnectors(connectors); try { org.mortbay.jetty.servlet.Context root = new org.mortbay.jetty.servlet.Context( srv, "/", org.mortbay.jetty.servlet.Context.SESSIONS); //设置servlet对象 root.addServlet(new ServletHolder(new FlumeHTTPServlet()), "/"); HTTPServerConstraintUtil.enforceConstraints(root); srv.start(); Preconditions.checkArgument(srv.getHandler().equals(root)); } catch (Exception ex) { LOG.error("Error while starting HTTPSource. Exception follows.", ex); Throwables.propagate(ex); } Preconditions.checkArgument(srv.isRunning()); sourceCounter.start(); super.start(); }//停止监听 @Override public void stop() { try { srv.stop(); srv.join(); srv = null; } catch (Exception ex) { LOG.error("Error while stopping HTTPSource. Exception follows.", ex); } sourceCounter.stop(); LOG.info("Http source {} stopped. Metrics: {}", getName(), sourceCounter); }//实际监听处理类 private class FlumeHTTPServlet extends HttpServlet { private static final long serialVersionUID = 4891924863218790344L; @Override public void doPost(HttpServletRequest request, HttpServletResponse response) throws IOException { List<Event> events = Collections.emptyList(); //create empty list try { //处理监听取得的对象 events = handler.getEvents(request); } catch (HTTPBadRequestException ex) { LOG.warn("Received bad request from client. ", ex); response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Bad request from client. " + ex.getMessage()); return; } catch (Exception ex) { LOG.warn("Deserializer threw unexpected exception. ", ex); response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, "Deserializer threw unexpected exception. " + ex.getMessage()); return; } sourceCounter.incrementAppendBatchReceivedCount(); sourceCounter.addToEventReceivedCount(events.size()); try { //将取得的处理结果放入chanel中,交给下一个节点 getChannelProcessor().processEventBatch(events); } catch (ChannelException ex) { LOG.warn("Error appending event to channel. " + "Channel might be full. Consider increasing the channel " + "capacity or make sure the sinks perform faster.", ex); response.sendError(HttpServletResponse.SC_SERVICE_UNAVAILABLE, "Error appending event to channel. Channel might be full." + ex.getMessage()); return; } catch (Exception ex) { LOG.warn("Unexpected error appending event to channel. ", ex); response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, "Unexpected error while appending event to channel. " + ex.getMessage()); return; } //设置返回给客户端的消息 response.setCharacterEncoding(request.getCharacterEncoding()); response.setStatus(HttpServletResponse.SC_OK); response.flushBuffer(); sourceCounter.incrementAppendBatchAcceptedCount(); sourceCounter.addToEventAcceptedCount(events.size()); } @Override public void doGet(HttpServletRequest request, HttpServletResponse response) throws IOException { doPost(request, response); } } private static class HTTPSourceSocketConnector extends SslSocketConnector { private final List<String> excludedProtocols; HTTPSourceSocketConnector(List<String> excludedProtocols) { this.excludedProtocols = excludedProtocols; } @Override public ServerSocket newServerSocket(String host, int port, int backlog) throws IOException { SSLServerSocket socket = (SSLServerSocket)super.newServerSocket(host, port, backlog); String[] protocols = socket.getEnabledProtocols(); List<String> newProtocols = new ArrayList<String>(protocols.length); for(String protocol: protocols) { if (!excludedProtocols.contains(protocol)) { newProtocols.add(protocol); } } socket.setEnabledProtocols( newProtocols.toArray(new String[newProtocols.size()])); return socket; } }}
HTTPSource默认配置保存在HTTPSourceConfigurationConstants
package org.apache.flume.source.http;/** * */public class HTTPSourceConfigurationConstants { public static final String CONFIG_PORT = "port"; public static final String CONFIG_HANDLER = "handler"; public static final String CONFIG_HANDLER_PREFIX = CONFIG_HANDLER + "."; public static final String CONFIG_BIND = "bind"; public static final String DEFAULT_BIND = "0.0.0.0"; public static final String DEFAULT_HANDLER = "org.apache.flume.source.http.JSONHandler"; public static final String SSL_KEYSTORE = "keystore"; public static final String SSL_KEYSTORE_PASSWORD = "keystorePassword"; public static final String SSL_ENABLED = "enableSSL"; public static final String EXCLUDE_PROTOCOLS = "excludeProtocols";}
HTTPSourceHandler是httpsource的默认接口,只需要重写getEvents(HttpServletRequest request)方法即可
package org.apache.flume.source.http;import java.util.List;import javax.servlet.http.HttpServletRequest;import org.apache.flume.Event;import org.apache.flume.conf.Configurable;/** * */public interface HTTPSourceHandler extends Configurable { /** * Takes an {@linkplain HttpServletRequest} and returns a list of Flume * Events. If this request cannot be parsed into Flume events based on the * format this method will throw an exception. This method may also throw an * exception if there is some sort of other error. <p> * * @param request The request to be parsed into Flume events. * @return List of Flume events generated from the request. * @throws HTTPBadRequestException If the was not parsed correctly into an * event because the request was not in the expected format. * @throws Exception If there was an unexpected error. */ public List<Event> getEvents(HttpServletRequest request) throws HTTPBadRequestException, Exception;}
HTTPSourceHandler 的默认实现类是JSONHandler
package org.apache.flume.source.http;import com.google.gson.Gson;import com.google.gson.GsonBuilder;import com.google.gson.JsonSyntaxException;import com.google.gson.reflect.TypeToken;import java.io.BufferedReader;import java.lang.reflect.Type;import java.nio.charset.UnsupportedCharsetException;import java.util.ArrayList;import java.util.List;import javax.servlet.http.HttpServletRequest;import org.apache.flume.Context;import org.apache.flume.Event;import org.apache.flume.event.EventBuilder;import org.apache.flume.event.JSONEvent;import org.slf4j.Logger;import org.slf4j.LoggerFactory;/** * * JSONHandler for HTTPSource that accepts an array of events. * * This handler throws exception if the deserialization fails because of bad * format or any other reason. * * * Each event must be encoded as a map with two key-value pairs. <p> 1. headers * - the key for this key-value pair is "headers". The value for this key is * another map, which represent the event headers. These headers are inserted * into the Flume event as is. <p> 2. body - The body is a string which * represents the body of the event. The key for this key-value pair is "body". * All key-value pairs are considered to be headers. An example: <p> [{"headers" * : {"a":"b", "c":"d"},"body": "random_body"}, {"headers" : {"e": "f"},"body": * "random_body2"}] <p> would be interpreted as the following two flume events: * <p> * Event with body: "random_body" (in UTF-8/UTF-16/UTF-32 encoded bytes) * and headers : (a:b, c:d) <p> * * Event with body: "random_body2" (in UTF-8/UTF-16/UTF-32 encoded bytes) and * headers : (e:f) <p> * * The charset of the body is read from the request and used. If no charset is * set in the request, then the charset is assumed to be JSON's default - UTF-8. * The JSON handler supports UTF-8, UTF-16 and UTF-32. * * To set the charset, the request must have content type specified as * "application/json; charset=UTF-8" (replace UTF-8 with UTF-16 or UTF-32 as * required). * * One way to create an event in the format expected by this handler, is to * use {@linkplain JSONEvent} and use {@linkplain Gson} to create the JSON * string using the * {@linkplain Gson#toJson(java.lang.Object, java.lang.reflect.Type) } * method. The type token to pass as the 2nd argument of this method * for list of events can be created by: <p> * * Type type = new TypeToken<List<JSONEvent>>() {}.getType(); <p> * */public class JSONHandler implements HTTPSourceHandler { private static final Logger LOG = LoggerFactory.getLogger(JSONHandler.class); private final Type listType = new TypeToken<List<JSONEvent>>() { }.getType(); private final Gson gson; public JSONHandler() { gson = new GsonBuilder().disableHtmlEscaping().create(); } /** * {@inheritDoc} */ @Override public List<Event> getEvents(HttpServletRequest request) throws Exception { BufferedReader reader = request.getReader(); String charset = request.getCharacterEncoding(); //UTF-8 is default for JSON. If no charset is specified, UTF-8 is to //be assumed. if (charset == null) { LOG.debug("Charset is null, default charset of UTF-8 will be used."); charset = "UTF-8"; } else if (!(charset.equalsIgnoreCase("utf-8") || charset.equalsIgnoreCase("utf-16") || charset.equalsIgnoreCase("utf-32"))) { LOG.error("Unsupported character set in request {}. " + "JSON handler supports UTF-8, " + "UTF-16 and UTF-32 only.", charset); throw new UnsupportedCharsetException("JSON handler supports UTF-8, " + "UTF-16 and UTF-32 only."); } /* * Gson throws Exception if the data is not parseable to JSON. * Need not catch it since the source will catch it and return error. */ List<Event> eventList = new ArrayList<Event>(0); try { eventList = gson.fromJson(reader, listType); } catch (JsonSyntaxException ex) { throw new HTTPBadRequestException("Request has invalid JSON Syntax.", ex); } for (Event e : eventList) { ((JSONEvent) e).setCharset(charset); } return getSimpleEvents(eventList); } @Override public void configure(Context context) { } private List<Event> getSimpleEvents(List<Event> events) { List<Event> newEvents = new ArrayList<Event>(events.size()); for(Event e:events) { newEvents.add(EventBuilder.withBody(e.getBody(), e.getHeaders())); } return newEvents; }}
source和chanel交换数据的默认类型是Event接口
package org.apache.flume;import java.util.Map;/** * Basic representation of a data object in Flume. * Provides access to data as it flows through the system. */public interface Event { /** * Returns a map of name-value pairs describing the data stored in the body. */ public Map<String, String> getHeaders(); /** * Set the event headers * @param headers Map of headers to replace the current headers. */ public void setHeaders(Map<String, String> headers); /** * Returns the raw byte array of the data contained in this event. */ public byte[] getBody(); /** * Sets the raw byte array of the data contained in this event. * @param body The data. */ public void setBody(byte[] body);}
httpsouce默认的Event实现类是JSONEvent
package org.apache.flume.event;import java.io.UnsupportedEncodingException;import java.util.Map;import org.apache.flume.Event;import org.apache.flume.FlumeException;/** * */public class JSONEvent implements Event{ private Map<String, String> headers; private String body; private transient String charset = "UTF-8"; @Override public Map<String, String> getHeaders() { return headers; } @Override public void setHeaders(Map<String, String> headers) { this.headers = headers; } @Override public byte[] getBody() { if(body != null) { try { return body.getBytes(charset); } catch (UnsupportedEncodingException ex) { throw new FlumeException(String.format("%s encoding not supported", charset), ex); } } else { return new byte[0]; } } @Override public void setBody(byte[] body) { if(body != null) { this.body = new String(body); } else { this.body = ""; } } public void setCharset(String charset) { this.charset = charset; }}
接口source是所有source必须实现的接口,它继承至接口LifecycleAware, NamedComponent
package org.apache.flume;import org.apache.flume.annotations.InterfaceAudience;import org.apache.flume.annotations.InterfaceStability;import org.apache.flume.channel.ChannelProcessor;import org.apache.flume.lifecycle.LifecycleAware;/** * <p> * A source generates {@plainlink Event events} and calls methods on the * configured {@link ChannelProcessor} to persist those events into the * configured {@linkplain Channel channels}. * </p> * * <p> * Sources are associated with unique {@linkplain NamedComponent names} that can * be used for separating configuration and working namespaces. * </p> * * <p> * No guarantees are given regarding thread safe access. * </p> * * @see org.apache.flume.Channel * @see org.apache.flume.Sink */@InterfaceAudience.Public@InterfaceStability.Stablepublic interface Source extends LifecycleAware, NamedComponent { /** * Specifies which channel processor will handle this source's events. * * @param channelProcessor */ public void setChannelProcessor(ChannelProcessor channelProcessor); /** * Returns the channel processor that will handle this source's events. */ public ChannelProcessor getChannelProcessor();}
使用httpsource可以很方便的满足客户监听日志的需求,只需要实现接口HTTPSourceHandler即可,所以如果想快速开发就可以使用httpsource来完成日常的开发!
0 0
- flume组件HTTPSource解析
- flume使用之httpSource
- Flume 小记(二) HttpSource 存入Hive
- Flume 小记(三) HttpSource 存入本地
- Flume 源码解析:组件生命周期
- Flume-ng源码解析之Channel组件
- Flume-ng源码解析之Source组件
- Flume-ng源码解析之Channel组件
- Flume-ng源码解析之Source组件
- Flume-ng源码解析之Channel组件
- Flume-ng源码解析之Channel组件
- Flume-ng源码解析之Channel组件
- Flume-ng源码解析之Source组件
- Flume-ng源码解析之Channel组件
- Flume-ng源码解析之Source组件
- Flume-ng源码解析之Channel组件
- Flume-ng源码解析之Channel组件
- Flume-ng源码解析之Source组件
- 找7
- HDU-2844-Coins(多重背包 二进制优化)
- public int data { get; set; } Console.WriteLine("{0}",obj.data);
- 小程序开发参考连接
- Java IO流
- flume组件HTTPSource解析
- HDU ACM 11 2034 人见人恨的A-B(此题题目有问题,不要纠结代码为何可以AC)
- SeetaFace开源人脸识别引擎介绍
- cocos2d-js 获取网络图片cc.loader.loadImg
- C/C++求职宝典21个重点笔记(常考笔试面试点)
- oracle数据库使用过程中问题汇总
- Ambari2.1安装HDP2.3
- 如何理解Hibernate中的HibernateSessionFactory类
- Unity定时器Timer类