flume-ng配置文件详解(三)

来源:互联网 发布:网络推广代理平台 编辑:程序博客网 时间:2024/06/06 04:28

8 Replicating Channel Selector


flume从一个agent1将event发送到多个通道,称为数据的复制

关键配置:agent1.sources.r1.selector.type = replicating


在192.168.100.200节点上

[root@localhost apache-flume-1.7.0-bin]# vi conf/replicate.conf

agent1.sources = r1
agent1.sinks = k1 k2
agent1.channels = c1 c2
   
agent1.sources.r1.type = syslogtcp
agent1.sources.r1.port = 50000
agent1.sources.r1.host = 192.168.100.200
agent1.sources.r1.selector.type = replicating
agent1.sources.r1.channels = c1 c2
   
agent1.sinks.k1.type = avro
agent1.sinks.k1.channel = c1
agent1.sinks.k1.hostname = 192.168.100.201
agent1.sinks.k1.port = 50000
   
agent1.sinks.k2.type = avro
agent1.sinks.k2.channel = c2
agent1.sinks.k2.hostname = 192.168.100.202
agent1.sinks.k2.port = 50000

agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 1000
agent1.channels.c1.transactionCapacity = 100
   
agent1.channels.c2.type = memory
agent1.channels.c2.capacity = 1000
agent1.channels.c2.transactionCapacity = 100


在192.168.100.201节点上

[root@localhost apache-flume-1.7.0-bin]# vi conf/replicate.conf

agent2.sources = r1
agent2.sinks = k1
agent2.channels = c1
   
agent2.sources.r1.type = avro
agent2.sources.r1.channels = c1
agent2.sources.r1.bind = 192.168.100.201
agent2.sources.r1.port = 50000
   
agent2.sinks.k1.type = logger
agent2.sinks.k1.channel = c1
   
agent2.channels.c1.type = memory
agent2.channels.c1.capacity = 1000
agent2.channels.c1.transactionCapacity = 100


在192.168.100.202节点上

[root@localhost apache-flume-1.7.0-bin]# vi conf/replicate.conf

agent3.sources = r1
agent3.sinks = k1
agent3.channels = c1
   
agent3.sources.r1.type = avro
agent3.sources.r1.channels = c1
agent3.sources.r1.bind = 192.168.100.202
agent3.sources.r1.port = 50000
   
agent3.sinks.k1.type = logger
agent3.sinks.k1.channel = c1
   
agent3.channels.c1.type = memory
agent3.channels.c1.capacity = 1000
agent3.channels.c1.transactionCapacity = 100


启动

先启动201和202

[root@localhost apache-flume-1.7.0-bin]# bin/flume-ng agent --conf conf --conf-file conf/replicate.conf --name agent2 -Dfilume.root.loggger=INFO,console

[root@localhost apache-flume-1.7.0-bin]# bin/flume-ng agent --conf conf --conf-file conf/replicate.conf --name agent3 -Dfilume.root.loggger=INFO,console

[root@localhost apache-flume-1.7.0-bin]#  bin/flume-ng agent --conf conf --conf-file conf/replicate.conf --name agent1 -Dfilume.root.loggger=INFO,console


在200节点上发送消息

[root@localhost ~]# echo "hello flume"| nc 192.168.100.200 50000

在201和202查看日记文件

[root@localhost apache-flume-1.7.0-bin]# vi logs/flume.log

22 Nov 2016 02:20:45,549 INFO  [SinkRunner-PollingRunner-DefaultSinkProcessor] (org.apache.flume.sink.LoggerSink.process:95)  - Event: { headers:{Severity=0, flume.syslog.status=Invalid, Facility=0} body: 68 65 6C 6C 6F 20 66 6C 75 6D 65                hello flume }


9 multiplexing Channel Selector


flume通过筛选将event复用到不用的渠道

关键配置:agent1.sources.r1.selector.type = multiplexing 


在192.168.100.200节点上

[root@localhost apache-flume-1.7.0-bin]# vi conf/multi.conf

agent1.sources= r1
agent1.sinks= k1 k2
agent1.channels= c1 c2
   
agent1.sources.r1.type= org.apache.flume.source.http.HTTPSource
agent1.sources.r1.port= 50000
agent1.sources.r1.host= 192.168.100.200
agent1.sources.r1.selector.type= multiplexing
agent1.sources.r1.channels= c1 c2
   
agent1.sources.r1.selector.header= state
agent1.sources.r1.selector.mapping.ZH= c1
agent1.sources.r1.selector.mapping.US= c2
agent1.sources.r1.selector.default= c1
   
agent1.sinks.k1.type= avro
agent1.sinks.k1.channel= c1
agent1.sinks.k1.hostname= 192.168.100.201
agent1.sinks.k1.port= 50000
   
agent1.sinks.k2.type= avro
agent1.sinks.k2.channel= c2
agent1.sinks.k2.hostname= 192.168.100.202
agent1.sinks.k2.port= 50000

agent1.channels.c1.type= memory
agent1.channels.c1.capacity= 1000
agent1.channels.c1.transactionCapacity= 100
   
agent1.channels.c2.type= memory
agent1.channels.c2.capacity= 1000
agent1.channels.c2.transactionCapacity= 100


这里配置了2个channel和2个sink,当http请求头部信息属性为ZH时候,流进sink1;当http请求头部信息属性为US时候,流进sink2;其它都到sink1


201和202节点上沿用上个案例配置

启动

先启动201和202

[root@localhost apache-flume-1.7.0-bin]# bin/flume-ng agent --conf conf --conf-file conf/replicate.conf --name agent2 -Dfilume.root.loggger=INFO,console

[root@localhost apache-flume-1.7.0-bin]# bin/flume-ng agent --conf conf --conf-file conf/replicate.conf --name agent3 -Dfilume.root.loggger=INFO,console

[root@localhost apache-flume-1.7.0-bin]#  bin/flume-ng agent --conf conf --conf-file conf/multi.conf --name agent1 -Dfilume.root.loggger=INFO,console


在200节点上发送消息

[root@localhost ~]# curl -X POST -d '[{"headers" :{"state" : "ZH"},"body" :"CHINA"}]' http://192.168.100.200:50000

[root@localhost ~]# curl -X POST -d '[{"headers" :{"state" : "US"},"body" :"US"}]' http://192.168.100.200:50000

[root@localhost ~]# curl -X POST -d '[{"headers" :{"state" : "EU"},"body" :"EU"}]' http://192.168.100.200:50000


在201上查看日记文件

[root@localhost apache-flume-1.7.0-bin]# vi logs/flume.log

22 Nov 2016 02:46:16,049 INFO  [SinkRunner-PollingRunner-DefaultSinkProcessor] (org.apache.flume.sink.LoggerSink.process:95)  - Event: { headers:{state=ZH} body: 43 48 49 4E 41                                  CHINA }

22 Nov 2016 02:47:12,319 INFO  [SinkRunner-PollingRunner-DefaultSinkProcessor] (org.apache.flume.sink.LoggerSink.process:95)  - Event: { headers:{state=EU} body: 45 55                                           EU }


在202上查看日记文件

[root@localhost apache-flume-1.7.0-bin]# vi logs/flume.log 

22 Nov 2016 02:46:58,000 INFO  [SinkRunner-PollingRunner-DefaultSinkProcessor] (org.apache.flume.sink.LoggerSink.process:95)  - Event: { headers:{state=US} body: 55 53                                           US }


10 Flume Sink Failover


failover是当一个sink不可用的时候,自动发送到下一个sink


在192.168.100.200节点上

[root@localhost apache-flume-1.7.0-bin]# vi conf/failover.conf

agent1.sources = r1
agent1.sinks = k1 k2
agent1.channels = c1 c2


agent1.sinkgroups = g1
agent1.sinkgroups.g1.sinks = k1 k2

agent1.sinkgroups.g1.processor.type = failover

agent1.sinkgroups.g1.processor.priority.k1 = 5
agent1.sinkgroups.g1.processor.priority.k2 = 10
agent1.sinkgroups.g1.processor.maxpenalty = 10000
 
agent1.sources.r1.type = syslogtcp
agent1.sources.r1.host = 192.168.100.200
agent1.sources.r1.port = 5000
agent1.sources.r1.channels = c1 c2
agent1.sources.r1.selector.type = replicating
 
agent1.sinks.k1.type = avro
agent1.sinks.k1.channel = c1
agent1.sinks.k1.hostname = 192.168.100.201
agent1.sinks.k1.port = 5000
 
agent1.sinks.k2.type = avro
agent1.sinks.k2.channel = c2
agent1.sinks.k2.hostname = 192.168.100.202
agent1.sinks.k2.port = 5000
 
agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 1000
agent1.channels.c1.transactionCapacity = 100
 
agent1.channels.c2.type = memory
agent1.channels.c2.capacity = 1000
agent1.channels.c2.transactionCapacity = 100


配置了一个sink group,并且将sink group中的每一个sink安排优先级


在192.168.100.201节点上

[root@localhost apache-flume-1.7.0-bin]# vi conf/avro.conf

agent2.sources = r1
agent2.sinks = k1
agent2.channels = c1
 
agent2.sources.r1.type = avro
agent2.sources.r1.channels = c1
agent2.sources.r1.bind = 192.168.100.201
agent2.sources.r1.port = 5000
 
agent2.sinks.k1.type = logger
 
agent2.channels.c1.type = memory
agent2.channels.c1.capacity = 1000
agent2.channels.c1.transactionCapacity = 100
 
agent2.sources.r1.channels = c1
agent2.sinks.k1.channel = c1


在192.168.100.202节点上

[root@localhost apache-flume-1.7.0-bin]# vi conf/avro.conf

agent3.sources = r1
agent3.sinks = k1
agent3.channels = c1
 
agent3.sources.r1.type = avro
agent3.sources.r1.channels = c1
agent3.sources.r1.bind = 192.168.100.202
agent3.sources.r1.port = 5000
 
agent3.sinks.k1.type = logger
 
agent3.channels.c1.type = memory
agent3.channels.c1.capacity = 1000
agent3.channels.c1.transactionCapacity = 100
 
agent3.sources.r1.channels = c1
agent3.sinks.k1.channel = c1


启动

[root@localhost apache-flume-1.7.0-bin]# bin/flume-ng agent --conf conf --conf-file conf/avro.conf --name agent2 -Dfilume.root.loggger=INFO,console

[root@localhost apache-flume-1.7.0-bin]# bin/flume-ng agent --conf conf --conf-file conf/avro.conf --name agent3 -Dfilume.root.loggger=INFO,console

[root@localhost apache-flume-1.7.0-bin]#  bin/flume-ng agent --conf conf --conf-file conf/failover.conf --name agent1 -Dfilume.root.loggger=INFO,console


[root@localhost ~]# echo "flume failover" | nc 192.168.100.200 5000

在192.168.100.202上,优先级高

[root@localhost apache-flume-1.7.0-bin]# vi logs/flume.log

22 Nov 2016 03:09:53,693 INFO  [SinkRunner-PollingRunner-DefaultSinkProcessor] (org.apache.flume.sink.LoggerSink.process:95)  - Event: { headers:{Severity=0, flume.syslog.status=Invalid, Facility=0} body: 66 6C 75 6D 65 20 66 61 69 6C 6F 76 65 72       flume failover }

将202关闭

[root@localhost ~]# echo "flume failover1" | nc 192.168.100.200 5000

在192.168.100.201

[root@localhost apache-flume-1.7.0-bin]# vi logs/flume.log

22 Nov 2016 03:13:49,251 INFO  [SinkRunner-PollingRunner-DefaultSinkProcessor] (org.apache.flume.sink.LoggerSink.process:95)  - Event: { headers:{Severity=0, flume.syslog.status=Invalid, Facility=0} body: 66 6C 75 6D 65 20 66 61 69 6C 6F 76 65 72 31    flume failover1 }


10 Load balancing Sink


load balance和failover不同的是:有两种选择策略:轮询和随机,当选择的sink不可用的时候,自动选用下一个sink

在192.168.100.200上

[root@localhost apache-flume-1.7.0-bin]# vi conf/loadbalance.conf

agent1.sources = r1
agent1.sinks = k1 k2
agent1.channels = c1
 
#这个是配置Load balancing的关键,需要有一个sink group
agent1.sinkgroups = g1
agent1.sinkgroups.g1.sinks = k1 k2
agent1.sinkgroups.g1.processor.type = load_balance
agent1.sinkgroups.g1.processor.backoff = true
agent1.sinkgroups.g1.processor.selector = round_robin
 
agent1.sources.r1.type = syslogtcp
agent1.sources.r1.host = 192.168.100.200
agent1.sources.r1.port = 5000
agent1.sources.r1.channels = c1
 
 
agent1.sinks.k1.type = avro
agent1.sinks.k1.channel = c1
agent1.sinks.k1.hostname = 192.168.100.201
agent1.sinks.k1.port = 5000
 
agent1.sinks.k2.type = avro
agent1.sinks.k2.channel = c1
agent1.sinks.k2.hostname = 192.168.100.202
agent1.sinks.k2.port = 5000
 
agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 1000
agent1.channels.c1.transactionCapacity = 100


201和202上沿用上文avro.conf

启动

[root@localhost apache-flume-1.7.0-bin]# bin/flume-ng agent --conf conf --conf-file conf/avro.conf --name agent2 -Dfilume.root.loggger=INFO,console

[root@localhost apache-flume-1.7.0-bin]# bin/flume-ng agent --conf conf --conf-file conf/avro.conf --name agent3 -Dfilume.root.loggger=INFO,console

[root@localhost apache-flume-1.7.0-bin]#  bin/flume-ng agent --conf conf --conf-file conf/loadbalance.conf --name agent1 -Dfilume.root.loggger=INFO,console


[root@localhost ~]# echo "flume test1" | nc 192.168.100.200 5000

[root@localhost ~]# echo "flume test2" | nc 192.168.100.200 5000

[root@localhost ~]# echo "flume test3" | nc 192.168.100.200 5000

[root@localhost ~]# echo "flume test4" | nc 192.168.100.200 5000


查看日记文件在201和202上数据

各有2条数据

0 0