flume文件名interceptor
来源:互联网 发布:水泥胶砂强度试验数据 编辑:程序博客网 时间:2024/06/05 04:39
从文件名提取日期、小时信息,决定数据发送到hdfs哪天哪小时的分区目录。
需要自定义一个拦截器
package interceptor;import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.apache.flume.Context; import org.apache.flume.Event;import org.apache.flume.interceptor.Interceptor;import org.apache.flume.interceptor.RegexExtractorInterceptorPassThroughSerializer; import org.apache.flume.interceptor.RegexExtractorInterceptorSerializer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Charsets; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; import com.google.common.collect.Lists; /** * Interceptor that extracts matches using a specified regular expression and * appends the matches to the event headers using the specified serializers</p> * Note that all regular expression matching occurs through Java's built in * java.util.regex package</p>. Properties: * <p> * regex: The regex to use * <p> * serializers: Specifies the group the serializer will be applied to, and the * name of the header that will be added. If no serializer is specified for a * group the default {@link RegexExtractorInterceptorPassThroughSerializer} will * be used * <p> * Sample config: * <p> * agent.sources.r1.channels = c1 * <p> * agent.sources.r1.type = SEQ * <p> * agent.sources.r1.interceptors = i1 * <p> * agent.sources.r1.interceptors.i1.type = REGEX_EXTRACTOR * <p> * agent.sources.r1.interceptors.i1.regex = (WARNING)|(ERROR)|(FATAL) * <p> * agent.sources.r1.interceptors.i1.serializers = s1 s2 * agent.sources.r1.interceptors.i1.serializers.s1.type = com.blah.SomeSerializer * agent.sources.r1.interceptors.i1.serializers.s1.name = warning * agent.sources.r1.interceptors.i1.serializers.s2.type = org.apache.flume.interceptor.RegexExtractorInterceptorTimestampSerializer * agent.sources.r1.interceptors.i1.serializers.s2.name = error * agent.sources.r1.interceptors.i1.serializers.s2.dateFormat = yyyy-MM-dd * </code> * </p> * <pre> * Example 1: * </p> * EventBody: 1:2:3.4foobar5</p> Configuration: * agent.sources.r1.interceptors.i1.regex = (\\d):(\\d):(\\d) * </p> * agent.sources.r1.interceptors.i1.serializers = s1 s2 s3 * agent.sources.r1.interceptors.i1.serializers.s1.name = one * agent.sources.r1.interceptors.i1.serializers.s2.name = two * agent.sources.r1.interceptors.i1.serializers.s3.name = three * </p> * results in an event with the the following * * body: 1:2:3.4foobar5 headers: one=>1, two=>2, three=3 * * Example 2: * * EventBody: 1:2:3.4foobar5 * * Configuration: agent.sources.r1.interceptors.i1.regex = (\\d):(\\d):(\\d) * <p> * agent.sources.r1.interceptors.i1.serializers = s1 s2 * agent.sources.r1.interceptors.i1.serializers.s1.name = one * agent.sources.r1.interceptors.i1.serializers.s2.name = two * <p> * * results in an event with the the following * * body: 1:2:3.4foobar5 headers: one=>1, two=>2 * </pre> */public class RegexExtractorExtInterceptor implements Interceptor { static final String REGEX = "regex"; static final String SERIALIZERS = "serializers"; // 增加代码开始 static final String EXTRACTOR_HEADER = "extractorHeader"; static final boolean DEFAULT_EXTRACTOR_HEADER = false; static final String EXTRACTOR_HEADER_KEY = "extractorHeaderKey"; // 增加代码结束 private static final Logger logger = LoggerFactory .getLogger(RegexExtractorExtInterceptor.class); private final Pattern regex; private final List<NameAndSerializer> serializers; // 增加代码开始 private final boolean extractorHeader; private final String extractorHeaderKey; // 增加代码结束 private RegexExtractorExtInterceptor(Pattern regex, List<NameAndSerializer> serializers, boolean extractorHeader, String extractorHeaderKey) { this.regex = regex; this.serializers = serializers; this.extractorHeader = extractorHeader; this.extractorHeaderKey = extractorHeaderKey; } @Override public void initialize() { // NO-OP... } @Override public void close() { // NO-OP... } @Override public Event intercept(Event event) { String tmpStr; if(extractorHeader) { tmpStr = event.getHeaders().get(extractorHeaderKey); } else { tmpStr=new String(event.getBody(), Charsets.UTF_8); } Matcher matcher = regex.matcher(tmpStr); Map<String, String> headers = event.getHeaders(); if (matcher.find()) { for (int group = 0, count = matcher.groupCount(); group < count; group++) { int groupIndex = group + 1; if (groupIndex > serializers.size()) { if (logger.isDebugEnabled()) { logger.debug( "Skipping group {} to {} due to missing serializer", group, count); } break; } NameAndSerializer serializer = serializers.get(group); if (logger.isDebugEnabled()) { logger.debug("Serializing {} using {}", serializer.headerName, serializer.serializer); } headers.put(serializer.headerName, serializer.serializer .serialize(matcher.group(groupIndex))); } } return event; } @Override public List<Event> intercept(List<Event> events) { List<Event> intercepted = Lists.newArrayListWithCapacity(events.size()); for (Event event : events) { Event interceptedEvent = intercept(event); if (interceptedEvent != null) { intercepted.add(interceptedEvent); } } return intercepted; } public static class Builder implements Interceptor.Builder { private Pattern regex; private List<NameAndSerializer> serializerList; // 增加代码开始 private boolean extractorHeader; private String extractorHeaderKey; // 增加代码结束 private final RegexExtractorInterceptorSerializer defaultSerializer = new RegexExtractorInterceptorPassThroughSerializer(); @Override public void configure(Context context) { String regexString = context.getString(REGEX); Preconditions.checkArgument(!StringUtils.isEmpty(regexString), "Must supply a valid regex string"); regex = Pattern.compile(regexString); regex.pattern(); regex.matcher("").groupCount(); configureSerializers(context); // 增加代码开始 extractorHeader = context.getBoolean(EXTRACTOR_HEADER, DEFAULT_EXTRACTOR_HEADER); if (extractorHeader) { extractorHeaderKey = context.getString(EXTRACTOR_HEADER_KEY); Preconditions.checkArgument( !StringUtils.isEmpty(extractorHeaderKey), "必须指定要抽取内容的header key"); } // 增加代码结束 } private void configureSerializers(Context context) { String serializerListStr = context.getString(SERIALIZERS); Preconditions.checkArgument( !StringUtils.isEmpty(serializerListStr), "Must supply at least one name and serializer"); String[] serializerNames = serializerListStr.split("\\s+"); Context serializerContexts = new Context( context.getSubProperties(SERIALIZERS + ".")); serializerList = Lists .newArrayListWithCapacity(serializerNames.length); for (String serializerName : serializerNames) { Context serializerContext = new Context( serializerContexts.getSubProperties(serializerName + ".")); String type = serializerContext.getString("type", "DEFAULT"); String name = serializerContext.getString("name"); Preconditions.checkArgument(!StringUtils.isEmpty(name), "Supplied name cannot be empty."); if ("DEFAULT".equals(type)) { serializerList.add(new NameAndSerializer(name, defaultSerializer)); } else { serializerList.add(new NameAndSerializer(name, getCustomSerializer(type, serializerContext))); } } } private RegexExtractorInterceptorSerializer getCustomSerializer( String clazzName, Context context) { try { RegexExtractorInterceptorSerializer serializer = (RegexExtractorInterceptorSerializer) Class .forName(clazzName).newInstance(); serializer.configure(context); return serializer; } catch (Exception e) { logger.error("Could not instantiate event serializer.", e); Throwables.propagate(e); } return defaultSerializer; } @Override public Interceptor build() { Preconditions.checkArgument(regex != null, "Regex pattern was misconfigured"); Preconditions.checkArgument(serializerList.size() > 0, "Must supply a valid group match id list"); return new RegexExtractorExtInterceptor(regex, serializerList, extractorHeader, extractorHeaderKey); } } static class NameAndSerializer { private final String headerName; private final RegexExtractorInterceptorSerializer serializer; public NameAndSerializer(String headerName, RegexExtractorInterceptorSerializer serializer) { this.headerName = headerName; this.serializer = serializer; } } }项目的pom.xml文件
<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.flume-dev</groupId> <artifactId>com.flume-dev</artifactId> <name>com.flume-dev</name> <version>1.0.0</version> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> </plugin> </plugins> </build> <dependencies> <dependency> <groupId>org.apache.flume</groupId> <artifactId>flume-ng-sdk</artifactId> <version>1.5.0</version> </dependency> <dependency> <groupId>org.apache.flume</groupId> <artifactId>flume-ng-core</artifactId> <version>1.5.0</version> </dependency> <dependency> <groupId>org.apache.flume</groupId> <artifactId>flume-ng-configuration</artifactId> <version>1.5.0</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>1.6.1</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.10</version> <scope>test</scope> </dependency> </dependencies></project>
创建flume的插件存放目录
cd $FLUME_HOMEmkdir plugins.dmkdir plugins.d/RegexExtractorExtInterceptorcd plugins.d/RegexExtractorExtInterceptormkdir lib libext native
然后将自已的jar包扔进lib目录,写flume数据流配置文件
fileheader.propertiesagent-1.channels.ch-1.type = fileagent-1.channels.ch-1.checkpointDir= /root/temp/fileheader/checkpointagent-1.channels.ch-1.dataDirs= /root/temp/fileheader/dataagent-1.sources.src-1.type = spooldiragent-1.sources.src-1.channels = ch-1agent-1.sources.src-1.spoolDir = /root/testagent-1.sources.src-1.deletePolicy= neveragent-1.sources.src-1.fileHeader = trueagent-1.sources.src-1.basenameHeader = trueagent-1.sources.src-1.interceptors =i1 hostiagent-1.sources.src-1.interceptors.i1.type = interceptor.RegexExtractorExtInterceptor$Builderagent-1.sources.src-1.interceptors.i1.regex=(.*)\\.(.*)\\.(.*)agent-1.sources.src-1.interceptors.i1.extractorHeader=trueagent-1.sources.src-1.interceptors.i1.extractorHeaderKey=basenameagent-1.sources.src-1.interceptors.i1.serializers=s1 s2 s3agent-1.sources.src-1.interceptors.i1.serializers.s1.name=oneagent-1.sources.src-1.interceptors.i1.serializers.s2.name=twoagent-1.sources.src-1.interceptors.i1.serializers.s3.name=threeagent-1.sources.src-1.interceptors.hosti.type = hostagent-1.sources.src-1.interceptors.hosti.useIP=falseagent-1.sinks.sink_hdfs.channel = ch-1agent-1.sinks.sink_hdfs.type = hdfsagent-1.sinks.sink_hdfs.hdfs.path = hdfs://xxx:port/tmp/events110/fileheader/%{three}agent-1.sinks.sink_hdfs.hdfs.filePrefix = logs.%{host}agent-1.sinks.sink_hdfs.hdfs.inUsePrefix = .agent-1.sinks.sink_hdfs.hdfs.rollInterval = 30agent-1.sinks.sink_hdfs.hdfs.rollSize = 0agent-1.sinks.sink_hdfs.hdfs.rollCount = 0agent-1.sinks.sink_hdfs.hdfs.batchSize = 100agent-1.sinks.sink_hdfs.hdfs.writeFormat = textagent-1.sinks.sink_hdfs.hdfs.fileType = DataStream#agent-1.sinks.sink_hdfs.hdfs.fileType = CompressedStream#agent-1.sinks.sink_hdfs.hdfs.codeC = lzopagent-1.channels = ch-1agent-1.sources = src-1agent-1.sinks = sink_hdfs如文件名为data.log.20151111 ,则写入分区20151111
最后执行bin/flume-ng agent -c /usr/local/flume/conf -f /usr/local/flume/conf/fileheader.properties -n agent-1 -Dflume.root.logger=INFO,console
参考 http://blog.csdn.net/xiao_jun_0820/article/details/38333171
0 0
- flume文件名interceptor
- flume 拦截器(interceptor)
- Flume NG之Interceptor简介
- Flume拦截器(Interceptor)
- flume学习(十):使用Morphline Interceptor
- flume学习(十):使用Morphline Interceptor
- flume自定义interceptor和hbase sink
- flume学习(十):使用Morphline Interceptor
- flume学习(九):使用Morphline Interceptor
- flume开发-自定义拦截器(Interceptor)
- Flume-NG源码阅读之Interceptor
- flume开发-自定义拦截器(Interceptor)
- Flume-NG源码阅读之Interceptor
- flume开发-自定义拦截器(Interceptor)
- 自定义flume 拦截器(interceptor)
- Flume中的拦截器(Interceptor)介绍与使用
- Flume自定义Source、Sink和Interceptor(简单功能实现)
- interceptor
- android自带图片资源图标一览,android.R.drawable
- dns 服务不稳定,解析有问题时,解决办法
- echarts 2.2.7部署
- 查看当前系统的glibc版本
- linux下的expr命令(shell的算数运算问题)
- flume文件名interceptor
- 小数在计算机中的存储形式
- 关于 多进程epoll 与 “惊群”问题
- Java RandomAccessFile用法
- symfony安装使用
- CocoaPods 安装和应用
- PHPcms v9分栏目搜索功能记录
- Qt Creator的下载、安装及试用
- 冷门实用的定律(一):登门槛效应