用sparkstreaming按天计算地区销售额简单模版
来源:互联网 发布:c语言 time.h库函数 编辑:程序博客网 时间:2024/06/06 05:57
producer端:
import java.util.HashMapimport org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}import org.apache.spark.SparkConfimport org.apache.spark.streaming._import org.apache.spark.streaming.kafka._import scala.util.Random/** * Created by zengxiaosen on 16/9/26. *//*在命令行输入 kafka-console-consumer.sh --zookeeper slave1:2181 --topic orderTopic来看看我们生产的数据 */object OrderProductor { def main(args: Array[String]): Unit = { val topic = "orderTopic" val brokers = "master:9092,slave1:9092" val props = new HashMap[String, Object]() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer") val producer = new KafkaProducer[String, String](props) //生产10条订单 while(true){ (1 to 10).foreach{ messageNum => //地区ID,订单id,订单金额,订单时间 val str = messageNum+","+Random.nextInt(10)+","+Math.round(Random.nextDouble()*100)+","+DateUtils.getCurrentDateTime val message = new ProducerRecord[String, String](topic, null, str) producer.send(message) } Thread.sleep(1000) } }}consumer端:
import org.apache.spark.SparkConfimport org.apache.spark.streaming.kafka.KafkaUtilsimport org.apache.spark.streaming.{Seconds, StreamingContext}/** * Created by zengxiaosen on 16/9/26. */object AreaAmt { //每批次的wordcount def main(args: Array[String]): Unit = { /* 对kafka来讲,groupid的作用是: 我们想多个作业同时消费同一个topic时, 1每个作业拿到完整数据,计算互不干扰; 2每个作业拿到一部分数据,相当于实现负载均衡 当多个作业groupid相同时,属于2 否则属于情况1 */ val zkQuorum = "slave1:2181" val group = "g1" val topics = "logTopic" val numThreads = 2 //setmaster的核数至少给2,如果给1,资源不够则无法计算,至少需要一个核进行维护,一个计算 val sparkConf = new SparkConf().setAppName("AreaAmt").setMaster("local[2]") val ssc = new StreamingContext(sparkConf, Seconds(2))//两秒一个批次 ssc.checkpoint("hdfs://192:168.75.130:8020/user/root/checkpoint/AreaAmt")//设置有状态检查点 val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap //val topicMap2 = Map(topics->2) //得出写到kafka里面每一行每一行的数据 //每个时间段批次 val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) //产生我们需要的pair rdd val linerdd = lines.map{row =>{ val arr = row.split(",") //按日期按地区计算销售额2016-09-04_Area /* 继续细分到城市,无非是key该表一下,其他地方都是一样的 */ val key = arr(3).substring(0,10)+"_"+arr(0) val amt = arr(2).toInt (key, amt) }} val addFunc = (currValues: Seq[Int], preValueState: Option[Int]) =>{ //通过spark内部的reducebykey按key规约,然后这里传入某key当前批次的seq,再计算key的总和 val currentCount = currValues.sum //已经累加的值 val previousCount = preValueState.getOrElse(0) //返回累加后的结果,是一个Option[Int]类型 Some(currentCount + previousCount) } linerdd.updateStateByKey[Int](addFunc).print() ssc.start() ssc.awaitTermination() }}关于DateUtil:
自己随意写,这里写两个模版:
import java.util.Calendarimport java.text.SimpleDateFormat/** * Created by zengxiaosen on 16/9/26. */object DateUtils { def getCurrentDateTime: String = getCurrentDateTime("K:mm aa") def getCurrentDate: String = getCurrentDateTime("EEEE, MMMM d") private def getCurrentDateTime(dateTimeFormat: String): String = { val dateFormat = new SimpleDateFormat(dateTimeFormat) val cal = Calendar.getInstance() dateFormat.format(cal.getTime()) }}另外模版:
import java.text.SimpleDateFormatimport java.util.Calendar/** * Created by zengxiaosen on 16/9/26. */object DateUtils01 { def getCurrentTime(): String = { val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") val c = Calendar.getInstance() sdf.format(c.getTime) } def main(args: Array[String]): Unit = { println("2016-09-04 15:19:09".substring(0, 10)) }}
- 顶
- 0
- 踩
0 0
- 用sparkstreaming按天计算地区销售额简单模版
- 用sparkstreaming按天计算地区销售额简单模版
- SparkStreaming计算WordCount简单示例
- SparkStreaming计算WordCount简单示例
- 销售额计算(Calculating Sales)
- Tiles模版简单用
- kafka通过sparkstreaming连oracle模版
- 实战SparkStream+Kafka+Redis实时计算商品销售额
- kafka+sparkstreaming实现每批次的wordcount统计模版
- sparkstreaming中的mapWithState和updateStateBykey代码模版对比
- sparkStreaming
- sparkStreaming
- sparkstreaming
- SparkStreaming
- sparkstreaming之实时数据流计算实例
- 使用MVC模式开发一简单的销售额查询系统
- 【模版】计算几何
- 计算几何模版
- tcpdump
- 深入理解Java类型信息(Class对象)与反射机制
- SpringMVC return forward redirect疑问
- 深入理解linux系统的目录结构
- 机器学习sklearn指东
- 用sparkstreaming按天计算地区销售额简单模版
- 部署Servlet(二)——Servlet3.0
- linux(centos7)安装jdk
- 文章标题
- UVA 12108 特别困的学生
- javascript入门(一)
- Boost智能指针——shared_ptr
- (区间dp)南阳理工 acm 737 石子合并(一)
- Minimal Ratio Tree