SparkStreaming的实时单词统计小例子

来源:互联网 发布:恒腾网络 百度百科 编辑:程序博客网 时间:2024/05/22 03:16
package com.yc.spark


import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{ StructType, StructField, StringType, IntegerType }
import java.util.Properties
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.Seconds
import org.apache.spark.graphx.Graph


object SparkStreaming1 {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[2]").setAppName("Stream")
    val ssc = new StreamingContext(conf, Seconds(5)) //spark实时数据流处理对象
    ssc.sparkContext.setLogLevel("error")


   // val ds = ssc.textFileStream("/data/stream") //DStream 是spark streaming的数据处理类型
    
    var ds = ssc.socketTextStream("master", 7777)


    val words = ds.flatMap(_.split(" ")).map((_, 1)).reduceByKey(_ + _)
    words.print()


    Graph
    ssc.start()
    ssc.awaitTermination()
  }
}

原创粉丝点击