spark streaming 同时处理两个不同kafka集群的数据
来源:互联网 发布:linux编辑器 编辑:程序博客网 时间:2024/05/01 23:53
如题,总是不那么完美,要处理的数据在两个不同的kafka集群里面,日子得过,问题也得解决,我们创建两个DStream,连接两个不同的kafka集群的不同topic,然后再把这两个DStream union在一起处理,代码如下:
package com.kingnetimport java.utilimport org.apache.spark.SparkConfimport org.apache.spark.streaming.kafka.KafkaUtilsimport org.apache.spark.streaming.{Seconds, StreamingContext}import org.joda.time.DateTimeimport org.joda.time.format.DateTimeFormatimport scala.collection.JavaConversions._/** * * */object IOSChannelNewActiveDids { def createContext(params: KafkaStreamingParams) = { // {"batchTime":5,"sources":[{"zookeeper":"name85:2181,name86:2181,name87:2181","group":"group1","topics":"test1","numThreads":"1"},{"zookeeper":"name85:2181,name86:2181,name87:2181","group":"group1","topics":"test2","numThreads":"1"}]} val sparkConf = new SparkConf().setAppName("IOSChannelNewActiveDids") val ssc = new StreamingContext(sparkConf, Seconds(params.getBatchTime.toInt)) // ssc.checkpoint(checkpointDirectory) val rawdata = params.getSources.map(p => { val topicMap = p.getTopics.split(",").map((_, p.getNumThreads.toInt)).toMap KafkaUtils.createStream(ssc, p.getZookeeper, p.getGroup, topicMap).map(_._2) }).toSeq //把多个DStream union在一起处理。 val union_rawdata = ssc.union(rawdata) union_rawdata.print() ssc } def main(args: Array[String]) { if (args.length < 1) { System.err.println("Usage: com.kingnet.IOSChannelNewActiveDids {\"batchTime\":5,\"sources\":[{\"zookeeper\":\"name85:2181,name86:2181,name87:2181\",\"group\":\"group1\",\"topics\":\"test1\",\"numThreads\":1},{\"zookeeper\":\"name85:2181,name86:2181,name87:2181\",\"group\":\"group1\",\"topics\":\"test2\",\"numThreads\":1}]}") System.exit(1) } val params = GsonObject.getInstance().fromJson(args(0), classOf[KafkaStreamingParams]) params.getSources.foreach(p => { println(p.getTopics) }) val ssc = createContext(params) ssc.start() ssc.awaitTermination() }}
我们向args里面传递了一个json字符串作为参数,json字符串中配置了一个sources列表,里面指定了两个连接信息(我这里是测试,所以两个配置的zookerlist是相同的),然后我把这个json解析成了一个java对象:
package com.kingnet;import java.util.List;/** * Created by xiaoj on 2016/7/13. */public class KafkaStreamingParams { private String batchTime; private List<KafkaParams> sources; public String getBatchTime() { return batchTime; } public void setBatchTime(String batchTime) { this.batchTime = batchTime; } public List<KafkaParams> getSources() { return sources; } public void setSources(List<KafkaParams> sources) { this.sources = sources; } @Override public String toString() { return "KafkaStreamingParams{" + "batchTime='" + batchTime + '\'' + ", sources=" + sources + '}'; } class KafkaParams{ private String zookeeper; private String group; private String topics; private String numThreads; public String getZookeeper() { return zookeeper; } public void setZookeeper(String zookeeper) { this.zookeeper = zookeeper; } public String getGroup() { return group; } public void setGroup(String group) { this.group = group; } public String getTopics() { return topics; } public void setTopics(String topics) { this.topics = topics; } public String getNumThreads() { return numThreads; } public void setNumThreads(String numThreads) { this.numThreads = numThreads; } @Override public String toString() { return "KafkaParams{" + "zookeeper='" + zookeeper + '\'' + ", group='" + group + '\'' + ", topics='" + topics + '\'' + ", numThreads='" + numThreads + '\'' + '}'; } }}
好吧,我经常这么干,在scala项目中创建java类,得益于强大的IDEA开发工具。
package com.kingnetimport java.utilimport com.google.gson.{Gson, GsonBuilder}/** * Created by xiaoj on 2016/5/5. */object GsonObject { @volatile private var instance: Gson = null def getInstance(): Gson = { if (instance == null) { synchronized { if (instance == null) { instance = new GsonBuilder().create() } } } instance } def fromJson(s: String): Option[util.HashMap[String, Any]] = { try { Some(getInstance().fromJson(s,classOf[util.HashMap[String, Any]])) } catch { case e: Exception => e.printStackTrace() None } } def toJson(src:Any) = { getInstance().toJson(src) }}
运行程序,传递一个json参数:{\"batchTime\":\"10\",\"sources\":[{\"zookeeper\":\"name85:2181,name86:2181,name87:2181\",\"group\":\"group1\",\"topics\":\"test1\",\"numThreads\":"1"},{\"zookeeper\":\"name85:2181,name86:2181,name87:2181\",\"group\":\"group1\",\"topics\":\"test2\",\"numThreads\":"1"}]}
打开两个kafka 的console producer分别往test1和test2两个topic里面写数据,然后在streaming程序控制台就会打印出接收到的消息了。
1 0
- spark streaming 同时处理两个不同kafka集群的数据
- Spark Streaming结合 Kafka 两种不同的数据接收方式比较
- Spark Streaming结合 Kafka 两种不同的数据接收方式比较
- Spark Streaming结合 Kafka 两种不同的数据接收方式比较
- Spark Streaming +Kafka 使用底层API直接读取Kafka的Partition数据,手动更新Offset到Zookeeper集群
- 关于Spark Streaming微批次,Flink真正流处理 消费Kafka数据,处理数据的差距对比
- spark-streaming系列------- 3. Kafka DirectDStream方式数据的接收
- spark-streaming系列------- 3. Kafka DirectDStream方式数据的接收
- spark-streaming系列------- 3. Kafka DirectDStream方式数据的接收
- spark streaming 通过zookeeper读取kafka上的数据
- spark streaming统计kafka数据计数不准的问题
- spark streaming集成kafka接收数据的方式
- Spark Streaming获取kafka数据的两种方式
- spark streaming接kafka数据方式汇总
- spark streaming 输出数据到kafka
- spark streaming 读取kafka数据问题
- spark streaming读取kafka数据,记录offset
- spark streaming 从kafka拉数据限速
- Gradle 知识普及
- 什么是CGI、FastCGI、PHP-CGI、PHP-FPM、Spawn-FCGI
- **JAVAXCRIPT** DOM对象
- 文章标题
- 简历网站_问题_mysql
- spark streaming 同时处理两个不同kafka集群的数据
- LeetCode[345] Reverse Vowels of a String
- AngularJS1.x学习(仿微信图片手势缩放效果实现)
- Android之NDK开发
- 关于AlphaGo和机器人云的一些想法
- HYSBZ 3668: [Noi2014]起床困难综合症
- 全景图片(鱼眼)的平面映射矫正
- 并发条件下,数据更新丢失解析
- SSM框架 +MYSQL数据库 配置事务控制的方法和注意点