flume-NG整合hdfs和kafka

来源:互联网 发布:下载布鲁帝同志软件 编辑:程序博客网 时间:2024/05/17 22:55

flume版本:apache-flume-1.7.0-bin.tar
hadoop版本:hadoop-2.7.3
kafka版本:kafka_2.11-0.10.2.1
zookeeper版本:zookeeper-3.4.6
最近在安装搭建flume和kafka这两款软件,网上有很多这方面的简介,在这里,我把flume—NG和kafka、hdfs整合在一起。flume作为消息采集和传输系统,将数据落地到hdfs进行备份,然后就是kafka作为消息中间件为spark-streaming提供数据支持。当我们搜集某个网站的日志的时候,我们就可以使用flume监控log的一个文件或者是一个目录,每当有新的log,flume就可以将其持久化到hdfs,然后将这个消息发给kafka,kafka在对消息进行分发,处理,实时计算等等。
在这里我准备了5台服务器,为了方便介绍,我画了一幅图,不是很好看,意思到位就行。
这里写图片描述
配置文件:
flume-kafka-hdfs-client.properties

# set agent nameagent.sources = r1  agent.channels = c_kafka c_hdfs  agent.sinks = s_kafka_k1 s_kafka_k2 s_kafka_k3 s_hdfs_k1 s_hdfs_k2# set groupagent1.sinkgroups = g_kafka g_hdfs# set sourcesagent.sources.r1.channels = c_kafka c_hdfs  agent.sources.r1.type = exec  agent.sources.r1.command = tail -F /root/logs/a.txt agent.sources.r1.inputCharset = UTF-8# set kafka channelsagent.channels.c_kafka.type = memoryagent.channels.c_kafka.capacity = 1000agent.channels.c_kafka.transactionCapacity = 100# set hdfs channelsagent.channels.c_hdfs.type = memoryagent.channels.c_hdfs.capacity = 1000agent.channels.c_hdfs.transactionCapacity = 100# set kafka sink1agent.sinks.s_kafka_k1.channel = c_kafkaagent.sinks.s_kafka_k1.type = avroagent.sinks.s_kafka_k1.hostname = 192.168.183.103agent.sinks.s_kafka_k1.port = 52021# set kafka sink2agent.sinks.s_kafka_k2.channel = c_kafkaagent.sinks.s_kafka_k2.type = avroagent.sinks.s_kafka_k2.hostname = 192.168.183.104agent.sinks.s_kafka_k2.port = 52021# set kafka sink3agent.sinks.s_kafka_k3.channel = c_kafkaagent.sinks.s_kafka_k3.type = avroagent.sinks.s_kafka_k3.hostname = 192.168.183.105agent.sinks.s_kafka_k3.port = 52021# set hdfs sink1agent.sinks.s_hdfs_k1.channel = c_hdfsagent.sinks.s_hdfs_k1.type = avroagent.sinks.s_hdfs_k1.hostname = 192.168.183.102agent.sinks.s_hdfs_k1.port = 52020# set hdfs sink2agent.sinks.s_hdfs_k1.channel = c_hdfsagent.sinks.s_hdfs_k1.type = avroagent.sinks.s_hdfs_k1.hostname = 192.168.183.103agent.sinks.s_hdfs_k1.port = 52020# set sink groupagent.sinkgroups.g_kafka.sinks = s_kafka_k1 s_kafka_k2 s_kafka_k3agent.sinkgroups.g_hdfs.sinks = s_hdfs_k1 s_hdfs_k2# set failover_kafkaagent.sinkgroups.g_kafka.processor.type = failoveragent.sinkgroups.g_kafka.processor.priority.s_kafka_k1 = 1agent.sinkgroups.g_kafka.processor.priority.s_kafka_k2 = 10agent.sinkgroups.g_kafka.processor.priority.s_kafka_k3 = 100agent.sinkgroups.g_kafka.processor.maxpenalty = 10000# set failover_hdfsagent.sinkgroups.g_hdfs.processor.type = failoveragent.sinkgroups.g_hdfs.processor.priority.s_hdfs_k1 = 1agent.sinkgroups.g_hdfs.processor.priority.s_kafka_k2 = 10agent.sinkgroups.g_hdfs.processor.maxpenalty = 10000 

flume-hdfs-server1.properties

#set Agent namehdfs1.sources = r1hdfs1.channels = c1hdfs1.sinks = k1#set channelhdfs1.channels.c1.type = memoryhdfs1.channels.c1.capacity = 1000hdfs1.channels.c1.transactionCapacity = 100# set sourceshdfs1.sources.r1.type = avrohdfs1.sources.r1.bind = 192.168.183.102hdfs1.sources.r1.port = 52020hdfs1.sources.r1.channels = c1#set sink to hdfshdfs1.sinks.k1.type=hdfshdfs1.sinks.k1.hdfs.path=hdfs://192.168.183.101:9000/flume/logs/%Y/%m/%dhdfs1.sinks.k1.hdfs.fileType=DataStreamhdfs1.sinks.k1.hdfs.writeFormat=TEXThdfs1.sinks.k1.custom.encoding = UTF-8hdfs1.sinks.k1.channel=c1hdfs1.sinks.k1.hdfs.filePrefix=%Y-%m-%dhdfs1.sinks.k1.hdfs.fileSuffix=.txthdfs1.sinks.k1.hdfs.rollInterval=60hdfs1.sinks.k1.hdfs.rollSize=1024hdfs1.sinks.k1.hdfs.rollCount=0hdfs1.sinks.k1.hdfs.idleTimeout=60hdfs1.sinks.k1.hdfs.useLocalTimeStamp = true

flume-hdfs-server2.properties

#set Agent namehdfs2.sources = r1hdfs2.channels = c1hdfs2.sinks = k1#set channelhdfs2.channels.c1.type = memoryhdfs2.channels.c1.capacity = 1000hdfs2.channels.c1.transactionCapacity = 100# set sourceshdfs2.sources.r1.type = avrohdfs2.sources.r1.bind = 192.168.183.103hdfs2.sources.r1.port = 52020hdfs2.sources.r1.channels = c1#set sink to hdfshdfs2.sinks.k1.type=hdfshdfs2.sinks.k1.hdfs.path=hdfs://192.168.183.101:9000/flume/logs/%Y/%m/%dhdfs2.sinks.k1.hdfs.fileType=DataStreamhdfs2.sinks.k1.hdfs.writeFormat=TEXThdfs2.sinks.k1.custom.encoding = UTF-8hdfs2.sinks.k1.channel=c1hdfs2.sinks.k1.hdfs.filePrefix=%Y-%m-%dhdfs2.sinks.k1.hdfs.fileSuffix=.txthdfs2.sinks.k1.hdfs.rollInterval=60hdfs2.sinks.k1.hdfs.rollSize=1024hdfs2.sinks.k1.hdfs.rollCount=0hdfs2.sinks.k1.hdfs.idleTimeout=60hdfs2.sinks.k1.hdfs.useLocalTimeStamp = true

flume-kafka-server1.properties

#set kafka1 namekafka1.sources = r1kafka1.channels = c1kafka1.sinks = k1#set channelkafka1.channels.c1.type = memorykafka1.channels.c1.capacity = 10000kafka1.channels.c1.transactionCapacity = 1000# set sourceskafka1.sources.r1.type = avrokafka1.sources.r1.bind = 192.168.183.103kafka1.sources.r1.port = 52021kafka1.sources.r1.channels = c1# set sink to kafkakafka1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink  kafka1.sinks.k1.metadata.broker.list= node3:9092,node4:9092,node5:9092kafka1.sinks.k1.kafka.bootstrap.servers = node3:9092,node4:9092,node5:9092kafka1.sinks.k1.partition.key=0  kafka1.sinks.k1.partitioner.class=org.apache.flume.plugins.SinglePartition  kafka1.sinks.k1.serializer.class=kafka.serializer.StringEncoder  kafka1.sinks.k1.request.required.acks=0  kafka1.sinks.k1.max.message.size=1000000  kafka1.sinks.k1.producer.type=synckafka1.sinks.k1.custom.encoding=UTF-8  #kafka1.sinks.k1.custom.topic.name=testkafka1.sinks.k1.kafka.topic=testkafka1.sinks.k1.channel = c1kafka1.sinks.k1.zkconnect = node1:2181,node2:2181,node3:2181,node4:2181,node5:2181

flume-kafka-server2.properties

#set kafka2 namekafka2.sources = r1kafka2.channels = c1kafka2.sinks = k1#set channelkafka2.channels.c1.type = memorykafka2.channels.c1.capacity = 10000kafka2.channels.c1.transactionCapacity = 1000# set sourceskafka2.sources.r1.type = avrokafka2.sources.r1.bind = 192.168.183.104kafka2.sources.r1.port = 52021kafka2.sources.r1.channels = c1# set sink to kafkakafka2.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink  kafka2.sinks.k1.metadata.broker.list= node3:9092,node4:9092,node5:9092  kafka2.sinks.k1.kafka.bootstrap.servers = node3:9092,node4:9092,node5:9092kafka2.sinks.k1.partition.key=0  kafka2.sinks.k1.partitioner.class=org.apache.flume.plugins.SinglePartition  kafka2.sinks.k1.serializer.class=kafka.serializer.StringEncoder  kafka2.sinks.k1.request.required.acks=0  kafka2.sinks.k1.max.message.size=1000000  kafka2.sinks.k1.producer.type=sync kafka2.sinks.k1.custom.encoding=UTF-8  #kafka2.sinks.k1.custom.topic.name=testkafka2.sinks.k1.kafka.topic=testkafka2.sinks.k1.channel = c1kafka2.sinks.k1.zkconnect = node1:2181,node2:2181,node3:2181,node4:2181,node5:2181

flume-kafka-server3.properties

#set kafka3 namekafka3.sources = r1kafka3.channels = c1kafka3.sinks = k1#set channelkafka3.channels.c1.type = memorykafka3.channels.c1.capacity = 10000kafka3.channels.c1.transactionCapacity = 1000# set sourceskafka3.sources.r1.type = avrokafka3.sources.r1.bind = 192.168.183.105kafka3.sources.r1.port = 52021kafka3.sources.r1.channels = c1# set sink to kafkakafka3.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink kafka3.sinks.k1.metadata.broker.list= node3:9092,node4:9092,node5:9092 kafka3.sinks.k1.kafka.bootstrap.servers = node3:9092,node4:9092,node5:9092 kafka3.sinks.k1.partition.key=0  kafka3.sinks.k1.partitioner.class=org.apache.flume.plugins.SinglePartition  kafka3.sinks.k1.serializer.class=kafka.serializer.StringEncoder  kafka3.sinks.k1.request.required.acks=0  kafka3.sinks.k1.max.message.size=1000000  kafka3.sinks.k1.producer.type=sync kafka3.sinks.k1.custom.encoding=UTF-8  #kafka3.sinks.k1.custom.topic.name=testkafka3.sinks.k1.kafka.topic=testkafka3.sinks.k1.channel = c1kafka3.sinks.k1.zkconnect = node1:2181,node2:2181,node3:2181,node4:2181,node5:2181

程序启动顺序:
1、启动zookeeper
2、启动hdfs
3、启动kafka
4、首先启动flume的server

启动命令:1、node2节点flume-ng agent --conf conf --conf-file /root/myInstall/flume-1.7.0-bin/properties/flume-hdfs-server1.properties --name hdfs1 -Dflume.root.logger=INFO,console > /root/myInstall/flume-1.7.0-bin/logs/flume-hdfs-server1.log 2>&1 & 2、node3节点flume-ng agent --conf conf --conf-file /root/myInstall/flume-1.7.0-bin/properties/flume-hdfs-server2.properties --name hdfs2 -Dflume.root.logger=INFO,console > /root/myInstall/flume-1.7.0-bin/logs/flume-hdfs-server2.log 2>&1 & flume-ng agent --conf conf --conf-file /root/myInstall/flume-1.7.0-bin/properties/flume-kafka-server1.properties --name kafka1 -Dflume.root.logger=INFO,console > /root/myInstall/flume-1.7.0-bin/logs/flume-kafka-server1.log 2>&1 & 3、node4节点flume-ng agent --conf conf --conf-file /root/myInstall/flume-1.7.0-bin/properties/flume-kafka-server2.properties --name kafka2 -Dflume.root.logger=INFO,console > /root/myInstall/flume-1.7.0-bin/logs/flume-kafka-server2.log 2>&1 & 4、node5节点flume-ng agent --conf conf --conf-file /root/myInstall/flume-1.7.0-bin/properties/flume-kafka-server3.properties --name kafka3 -Dflume.root.logger=INFO,console > /root/myInstall/flume-1.7.0-bin/logs/flume-kafka-server3.log 2>&1 & 

5、然后启动flume的client

1、node1节点flume-ng agent --conf conf --conf-file /root/myInstall/flume-1.7.0-bin/properties/flume-kafka-hdfs-client.properties --name agent -Dflume.root.logger=INFO,console > /root/myInstall/flume-1.7.0-bin/logs/flume-kafka-hdfs-client.log 2>&1 & 2、node2节点flume-ng agent --conf conf --conf-file /root/myInstall/flume-1.7.0-bin/properties/flume-kafka-hdfs-client.properties --name agent -Dflume.root.logger=INFO,console > /root/myInstall/flume-1.7.0-bin/logs/flume-kafka-hdfs-client.log 2>&1 & 

好了,至此flume整合hdfs和kafka就搭建好了!

原创粉丝点击