Spark-Streaming 使用flume的push方式进行流式处理

来源:互联网 发布:多重网络 编辑:程序博客网 时间:2024/06/13 03:07
import org.apache.spark.SparkConfimport org.apache.spark.streaming.flume.FlumeUtilsimport org.apache.spark.streaming.{Seconds, StreamingContext}/**  * Created by ZX on 2015/6/22.  */object FlumePushWordCount {  def main(args: Array[String]) {    val host = args(0)    val port = args(1).toInt    LoggerLevels.setStreamingLogLevels()    val conf = new SparkConf().setAppName("FlumeWordCount")//.setMaster("local[2]")    val ssc = new StreamingContext(conf, Seconds(5))    //推送方式: flume向spark发送数据    val flumeStream = FlumeUtils.createStream(ssc, host, port)    //flume中的数据通过event.getBody()才能拿到真正的内容    val words = flumeStream.flatMap(x => new String(x.event.getBody().array()).split(" ")).map((_, 1))    val results = words.reduceByKey(_ + _)    results.print()    ssc.start()    ssc.awaitTermination()  }}


<dependency>    <groupId>org.apache.spark</groupId>    <artifactId>spark-streaming-flume_2.10</artifactId>    <version>${spark.version}</version></dependency>

缺点: 只有一个端口接收数据

阅读全文
0 0
原创粉丝点击