Spark Streaming基础学习【二】数值累加

来源:互联网 发布:女权癌 知乎 编辑:程序博客网 时间:2024/06/05 20:53
package day05.dimport org.apache.spark.{HashPartitioner, SparkConf, SparkContext}import org.apache.spark.streaming.{Seconds, StreamingContext}object StateFulWordCount {    //分好组的数据  val updateFunc = (iter: Iterator[(String, Seq[Int], Option[Int])]) => {    //iter.flatMap(it=>Some(it._2.sum + it._3.getOrElse(0)).map(x=>(it._1,x)))    //iter.map{case(x,y,z)=>Some(y.sum + z.getOrElse(0)).map(m=>(x, m))}    //iter.map(t => (t._1, t._2.sum + t._3.getOrElse(0)))    iter.map{ case(word, current_count, history_count) => (word, current_count.sum + history_count.getOrElse(0)) }  }  def main(args: Array[String]): Unit = {    LoggerLevels.setStreamingLogLevels()    //StreamingContext    val conf=new SparkConf().setAppName("StreamingWordCount").setMaster("local[2]")    val sc=new SparkContext(conf)    sc.setCheckpointDir("f://ss")    val ssc=new StreamingContext(sc,Seconds(5))    //接收数据    val ds=ssc.socketTextStream("192.168.123.151",8888)    //DStream是一个特殊的RDD    val result=ds.flatMap(_.split(" ")).map((_,1)).updateStateByKey(updateFunc,new HashPartitioner(sc.defaultParallelism),true)    result.print()    ssc.start()    //等待结束    ssc.awaitTermination()  }}

原创粉丝点击