Spark directStream保存/读取kafka offset
来源:互联网 发布:淘宝商城cf装备 编辑:程序博客网 时间:2024/06/01 08:24
RT。代码备忘。
1.Constant.java
package com.sparktest.util;public class Constant {public static String master = "yarn-client";public static String topic = "pj";public static String appName = "sparktest";public static long duration = 10000;public static String zookeeper = "10.67.2.20:2181,10.67.2.21:2181";public static String brokerlist = "10.67.2.20:9092,10.67.2.21:9092";public static String groupId = "com.sparktest";public static int partitions = 10;}
2.App.java
package com.sparktest.app;import java.io.Serializable;import java.util.HashMap;import java.util.HashSet;import java.util.Map;import java.util.Set;import java.util.concurrent.atomic.AtomicReference;import kafka.common.TopicAndPartition;import kafka.message.MessageAndMetadata;import kafka.serializer.DefaultDecoder;import kafka.serializer.StringDecoder;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function;import org.apache.spark.streaming.Duration;import org.apache.spark.streaming.api.java.JavaInputDStream;import org.apache.spark.streaming.api.java.JavaStreamingContext;import org.apache.spark.streaming.kafka.HasOffsetRanges;import org.apache.spark.streaming.kafka.KafkaCluster;import org.apache.spark.streaming.kafka.KafkaUtils;import org.apache.spark.streaming.kafka.OffsetRange;import scala.Predef;import scala.Tuple2;import scala.collection.JavaConversions;import com.sparktest.util.Constant;public class App implements Serializable{private KafkaCluster kafkaCluster = null;private Map<String, String> kafkaParams = new HashMap<String, String>();private Set<String> topics = new HashSet<String>();private Duration duration = new Duration(Constant.duration);private java.util.Map<kafka.common.TopicAndPartition, Long> fromOffsets = new java.util.HashMap<kafka.common.TopicAndPartition, Long>();private static final AtomicReference<OffsetRange[]> offsetRanges = new AtomicReference<OffsetRange[]>();public App() {kafkaParams.put("metadata.broker.list", Constant.brokerlist);kafkaParams.put("group.id", Constant.groupId);scala.collection.mutable.Map<String, String> mutableKafkaParam = JavaConversions.mapAsScalaMap(kafkaParams);scala.collection.immutable.Map<String, String> immutableKafkaParam = mutableKafkaParam.toMap(new Predef.$less$colon$less<Tuple2<String, String>, Tuple2<String, String>>() {public Tuple2<String, String> apply(Tuple2<String, String> v1) {return v1;}});this.kafkaCluster = new KafkaCluster(immutableKafkaParam);this.topics.add(Constant.topic);}public void startApp() {JavaSparkContext ctx = new JavaSparkContext(Constant.master,Constant.appName);JavaStreamingContext jsctx = new JavaStreamingContext(ctx, duration);scala.collection.mutable.Set<String> mutableTopics = JavaConversions.asScalaSet(this.topics);scala.collection.immutable.Set<String> immutableTopics = mutableTopics.toSet();scala.collection.immutable.Set<TopicAndPartition> scalaTopicAndPartitionSet = kafkaCluster.getPartitions(immutableTopics).right().get();// 首次消费,默认设置为0if (kafkaCluster.getConsumerOffsets(kafkaParams.get("group.id"),scalaTopicAndPartitionSet).isLeft()) {Set<TopicAndPartition> javaTopicAndPartitionSet = JavaConversions.setAsJavaSet(scalaTopicAndPartitionSet);for (TopicAndPartition topicAndPartition : javaTopicAndPartitionSet) {this.fromOffsets.put(topicAndPartition, 0L);}} else {scala.collection.immutable.Map<TopicAndPartition, Object> consumerOffsetsTemp = kafkaCluster.getConsumerOffsets(kafkaParams.get("group.id"),scalaTopicAndPartitionSet).right().get();Map<TopicAndPartition, Object> consumerOffsets = JavaConversions.mapAsJavaMap(consumerOffsetsTemp);Set<TopicAndPartition> javaTopicAndPartitionSet = JavaConversions.setAsJavaSet(scalaTopicAndPartitionSet);for (TopicAndPartition topicAndPartition : javaTopicAndPartitionSet) {Long offset = (Long) consumerOffsets.get(topicAndPartition);this.fromOffsets.put(topicAndPartition, offset);}}JavaInputDStream<byte[]> stream = KafkaUtils.createDirectStream(jsctx,String.class, byte[].class, StringDecoder.class,DefaultDecoder.class, byte[].class, kafkaParams,this.fromOffsets,new Function<MessageAndMetadata<String, byte[]>, byte[]>() {public byte[] call(MessageAndMetadata<String, byte[]> v1)throws Exception {return v1.message();}});stream.foreachRDD(new Function<JavaRDD<byte[]>, Void>() {public Void call(JavaRDD<byte[]> arg0) throws Exception {OffsetRange[] offsets = ((HasOffsetRanges) arg0.rdd()).offsetRanges();for(OffsetRange o: offsets){// 封装topic.partition 与 offset对应关系 java Map TopicAndPartition topicAndPartition = new TopicAndPartition(o.topic(), o.partition()); Map<TopicAndPartition, Object> topicAndPartitionObjectMap = new HashMap<TopicAndPartition, Object>(); topicAndPartitionObjectMap.put(topicAndPartition, o.untilOffset()); // 转换java map to scala immutable.map scala.collection.mutable.Map<TopicAndPartition, Object> map = JavaConversions.mapAsScalaMap(topicAndPartitionObjectMap); scala.collection.immutable.Map<TopicAndPartition, Object> scalatopicAndPartitionObjectMap = map.toMap(new Predef.$less$colon$less<Tuple2<TopicAndPartition, Object>, Tuple2<TopicAndPartition, Object>>() { public Tuple2<TopicAndPartition, Object> apply(Tuple2<TopicAndPartition, Object> v1) { return v1; } }); // 更新offset到kafkaCluster kafkaCluster.setConsumerOffsets(Constant.groupId, scalatopicAndPartitionObjectMap);}System.out.println("==========================" + arg0.count()+ "==================================");return null;}});jsctx.start();jsctx.awaitTermination();}public static void main(String[] args) {App app = new App();app.startApp();}}
3.pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>sparktest</groupId> <artifactId>sparktest</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>sparktest</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <build> <plugins> <plugin> <artifactId>maven-assembly-plugin</artifactId> <configuration> <archive> <manifest> <mainClass>com.allen.capturewebdata.Main</mainClass> </manifest> </archive> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> </configuration> </plugin> </plugins> </build> <dependencies> <dependency> <groupId>jdk.tools</groupId> <artifactId>jdk.tools</artifactId> <version>1.7</version> <scope>system</scope> <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath></dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_2.10</artifactId> <version>1.3.0</version></dependency><dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka_2.10</artifactId> <version>1.3.0</version></dependency><dependency><groupId>org.apache.spark</groupId><artifactId>spark-yarn_2.10</artifactId><version>1.3.0</version></dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> </dependencies></project>
0 0
- Spark directStream保存/读取kafka offset
- spark createDirectStream保存kafka offset(JAVA实现)
- spark createDirectStream保存kafka offset(JAVA实现)
- spark createDirectStream保存kafka offset(JAVA实现)
- spark createDirectStream保存kafka offset(JAVA实现)
- spark createDirectStream保存kafka offset(JAVA实现)
- spark streaming 读取kafka的offset
- spark streaming读取kafka数据,记录offset
- 将 Spark Streaming + Kafka direct 的 offset 保存进入Zookeeper
- Spark Streaming createDirectStream保存kafka offset(JAVA实现)
- spark streaming 自定义kafka读取topic的offset(python)
- Spark Streaming 中使用kafka低级api+zookeeper 保存 offset 并重用 以及 相关代码整合
- Spark Streaming 中使用kafka低级api+zookeeper 保存 offset 并重用 以及 相关代码整合
- 将 Spark Streaming + Kafka direct 的 offset 保存进入Zookeeper(二)
- Spark Streaming +Kafka 使用底层API直接读取Kafka的Partition数据,手动更新Offset到Zookeeper集群
- Spark Kafka(createDirectStream)自己管理offset
- sparkstreaming保存的kafka数据offset
- Kafka 如何读取offset topic内容 (__consumer_offsets)
- JavaScript基础——引用类型
- 首次配置eclipse+tomcat+axis2
- java动态绑定
- QT中实现图片淡出淡入的效果
- WPF Combox selectedItem”不能绑定”的问题
- Spark directStream保存/读取kafka offset
- Android Studio常用插件——codota
- PLSQL developer 连接64位Oracle 的解决方法
- Owen的天使追寻之路
- 第十一周 项目1-4哈夫曼树
- android BitmapFacty.Options的用法
- TCP协议疑难杂症全景解析
- Android4.0之后添加虚拟按键方法
- leetcode DFS