createDirectStream 实现offset管理
来源:互联网 发布:申请淘宝店铺号步骤 编辑:程序博客网 时间:2024/06/07 19:13
public class MainBak2_ {
private static Logger logger = LoggerFactory.getLogger(MainBak2_.class);
private static KafkaCluster kafkaCluster = null;
public static void main(String[] args) {
String zookeeper = ConfigMgr.getZookeeper();
final String groupId = ConfigMgr.getKakfaGroupId();
String topicStr = ConfigMgr.getKafkaTopic();
String sparkAppName = ConfigMgr.getSparkAppName();
String master = ConfigMgr.getSparkMaster();
int duration = ConfigMgr.getSparkDuration();
String metadataBrokerList = ConfigMgr.getProByKey("metadata.broker.list");
String autoOffsetReset = ConfigMgr.getProByKey("auto.offset.reset");
logger.info("zk===========================" + zookeeper);
logger.info("groupId======================" + groupId);
logger.info("topic========================" + topicStr);
logger.info("sparkAppName=================" + sparkAppName);
logger.info("master=======================" + master);
logger.info("duration=====================" + duration);
logger.info("metadataBrokerList===========" + metadataBrokerList);
SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
Set<String> topicSet = new HashSet<String>();
for (String topic : topicStr.split(",")) {
topicSet.add(topic);
}
Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);
scala.collection.mutable.Map<String, String> tmpScalaKafkaMap = JavaConversions.mapAsScalaMap(kafkaParams);
scala.collection.immutable.Map<String, String> scalaKafkaParams = tmpScalaKafkaMap
.toMap(new Predef.$less$colon$less<Tuple2<String, String>, Tuple2<String, String>>() {
private static final long serialVersionUID = 1L;
public Tuple2<String, String> apply(Tuple2<String, String> v1) {
return v1;
}
});
kafkaCluster = new KafkaCluster(scalaKafkaParams);
scala.collection.mutable.Set<String> tmpMutableTopicsSet = JavaConversions.asScalaSet(topicSet);
scala.collection.immutable.Set<String> tmpImmutableTopicsSet = tmpMutableTopicsSet.toSet();
scala.collection.immutable.Set<TopicAndPartition> tpScalaSet = kafkaCluster.getPartitions(tmpImmutableTopicsSet)
.right().get();
Map<TopicAndPartition, Long> offsetMap = new HashMap<TopicAndPartition, Long>();
if (kafkaCluster.getConsumerOffsets(groupId, tpScalaSet).isLeft()) {
System.out.println(" left : " + kafkaCluster.getConsumerOffsets(groupId, tpScalaSet).left().get());
Set<TopicAndPartition> tmpSet = JavaConversions.setAsJavaSet(tpScalaSet);
for (TopicAndPartition topicAndPartition : tmpSet) {
offsetMap.put(topicAndPartition, 0L);
}
} else {
scala.collection.immutable.Map<TopicAndPartition, Object> scalaOffsetMap = kafkaCluster
.getConsumerOffsets(groupId, tpScalaSet).right().get();
Map<TopicAndPartition, Object> consumerOffsets = JavaConversions.mapAsJavaMap(scalaOffsetMap);
Set<TopicAndPartition> tmpSet = JavaConversions.setAsJavaSet(tpScalaSet);
for (TopicAndPartition topicAndPartition : tmpSet) {
Long offset = (Long) consumerOffsets.get(topicAndPartition);
offsetMap.put(topicAndPartition, offset);
System.out.println("first getOffset:" + topicAndPartition.partition() + ":" + offset);
}
}
JavaInputDStream<String> jid = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, String.class, kafkaParams, offsetMap,
new Function<kafka.message.MessageAndMetadata<String, String>, String>() {
private static final long serialVersionUID = -6590667828252772663L;
@Override
public String call(MessageAndMetadata<String, String> arg0) throws Exception {
return arg0.message();
}
});
final VoidFunction<String> func0 = new VoidFunction<String>() {
private static final long serialVersionUID = -2520206838533422786L;
@Override
public void call(String arg0) throws Exception {
System.out.println(arg0);
}
};
VoidFunction<JavaRDD<String>> func = new VoidFunction<JavaRDD<String>>() {
private static final long serialVersionUID = 7679681553001908774L;
@Override
public void call(JavaRDD<String> arg0) throws Exception {
OffsetRange[] offsets = ((HasOffsetRanges) arg0.rdd()).offsetRanges();
for (OffsetRange o : offsets) {
TopicAndPartition tp = new TopicAndPartition(o.topic(), o.partition());
Map<TopicAndPartition, Object> tpMap = new HashMap<TopicAndPartition, Object>();
tpMap.put(tp, o.untilOffset());
System.out.println("write getOffset:" + tp.partition() + ":" + o.untilOffset());
scala.collection.mutable.Map<TopicAndPartition, Object> tpScalaMap = JavaConversions
.mapAsScalaMap(tpMap);
scala.collection.immutable.Map<TopicAndPartition, Object> scalaOffsetMap = tpScalaMap.toMap(
new Predef.$less$colon$less<Tuple2<TopicAndPartition, Object>, Tuple2<TopicAndPartition, Object>>() {
private static final long serialVersionUID = -3520206838533422786L;
public Tuple2<TopicAndPartition, Object> apply(Tuple2<TopicAndPartition, Object> v1) {
return v1;
}
});
kafkaCluster.setConsumerOffsets(groupId, scalaOffsetMap);
}
arg0.foreach(func0);
}
};
jid.foreachRDD(func);
jssc.start();
jssc.awaitTermination();
}
}
private static Logger logger = LoggerFactory.getLogger(MainBak2_.class);
private static KafkaCluster kafkaCluster = null;
public static void main(String[] args) {
String zookeeper = ConfigMgr.getZookeeper();
final String groupId = ConfigMgr.getKakfaGroupId();
String topicStr = ConfigMgr.getKafkaTopic();
String sparkAppName = ConfigMgr.getSparkAppName();
String master = ConfigMgr.getSparkMaster();
int duration = ConfigMgr.getSparkDuration();
String metadataBrokerList = ConfigMgr.getProByKey("metadata.broker.list");
String autoOffsetReset = ConfigMgr.getProByKey("auto.offset.reset");
logger.info("zk===========================" + zookeeper);
logger.info("groupId======================" + groupId);
logger.info("topic========================" + topicStr);
logger.info("sparkAppName=================" + sparkAppName);
logger.info("master=======================" + master);
logger.info("duration=====================" + duration);
logger.info("metadataBrokerList===========" + metadataBrokerList);
SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
Set<String> topicSet = new HashSet<String>();
for (String topic : topicStr.split(",")) {
topicSet.add(topic);
}
Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);
scala.collection.mutable.Map<String, String> tmpScalaKafkaMap = JavaConversions.mapAsScalaMap(kafkaParams);
scala.collection.immutable.Map<String, String> scalaKafkaParams = tmpScalaKafkaMap
.toMap(new Predef.$less$colon$less<Tuple2<String, String>, Tuple2<String, String>>() {
private static final long serialVersionUID = 1L;
public Tuple2<String, String> apply(Tuple2<String, String> v1) {
return v1;
}
});
kafkaCluster = new KafkaCluster(scalaKafkaParams);
scala.collection.mutable.Set<String> tmpMutableTopicsSet = JavaConversions.asScalaSet(topicSet);
scala.collection.immutable.Set<String> tmpImmutableTopicsSet = tmpMutableTopicsSet.toSet();
scala.collection.immutable.Set<TopicAndPartition> tpScalaSet = kafkaCluster.getPartitions(tmpImmutableTopicsSet)
.right().get();
Map<TopicAndPartition, Long> offsetMap = new HashMap<TopicAndPartition, Long>();
if (kafkaCluster.getConsumerOffsets(groupId, tpScalaSet).isLeft()) {
System.out.println(" left : " + kafkaCluster.getConsumerOffsets(groupId, tpScalaSet).left().get());
Set<TopicAndPartition> tmpSet = JavaConversions.setAsJavaSet(tpScalaSet);
for (TopicAndPartition topicAndPartition : tmpSet) {
offsetMap.put(topicAndPartition, 0L);
}
} else {
scala.collection.immutable.Map<TopicAndPartition, Object> scalaOffsetMap = kafkaCluster
.getConsumerOffsets(groupId, tpScalaSet).right().get();
Map<TopicAndPartition, Object> consumerOffsets = JavaConversions.mapAsJavaMap(scalaOffsetMap);
Set<TopicAndPartition> tmpSet = JavaConversions.setAsJavaSet(tpScalaSet);
for (TopicAndPartition topicAndPartition : tmpSet) {
Long offset = (Long) consumerOffsets.get(topicAndPartition);
offsetMap.put(topicAndPartition, offset);
System.out.println("first getOffset:" + topicAndPartition.partition() + ":" + offset);
}
}
JavaInputDStream<String> jid = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, String.class, kafkaParams, offsetMap,
new Function<kafka.message.MessageAndMetadata<String, String>, String>() {
private static final long serialVersionUID = -6590667828252772663L;
@Override
public String call(MessageAndMetadata<String, String> arg0) throws Exception {
return arg0.message();
}
});
final VoidFunction<String> func0 = new VoidFunction<String>() {
private static final long serialVersionUID = -2520206838533422786L;
@Override
public void call(String arg0) throws Exception {
System.out.println(arg0);
}
};
VoidFunction<JavaRDD<String>> func = new VoidFunction<JavaRDD<String>>() {
private static final long serialVersionUID = 7679681553001908774L;
@Override
public void call(JavaRDD<String> arg0) throws Exception {
OffsetRange[] offsets = ((HasOffsetRanges) arg0.rdd()).offsetRanges();
for (OffsetRange o : offsets) {
TopicAndPartition tp = new TopicAndPartition(o.topic(), o.partition());
Map<TopicAndPartition, Object> tpMap = new HashMap<TopicAndPartition, Object>();
tpMap.put(tp, o.untilOffset());
System.out.println("write getOffset:" + tp.partition() + ":" + o.untilOffset());
scala.collection.mutable.Map<TopicAndPartition, Object> tpScalaMap = JavaConversions
.mapAsScalaMap(tpMap);
scala.collection.immutable.Map<TopicAndPartition, Object> scalaOffsetMap = tpScalaMap.toMap(
new Predef.$less$colon$less<Tuple2<TopicAndPartition, Object>, Tuple2<TopicAndPartition, Object>>() {
private static final long serialVersionUID = -3520206838533422786L;
public Tuple2<TopicAndPartition, Object> apply(Tuple2<TopicAndPartition, Object> v1) {
return v1;
}
});
kafkaCluster.setConsumerOffsets(groupId, scalaOffsetMap);
}
arg0.foreach(func0);
}
};
jid.foreachRDD(func);
jssc.start();
jssc.awaitTermination();
}
}
0 0
- createDirectStream 实现offset管理
- Spark Kafka(createDirectStream)自己管理offset
- spark createDirectStream保存kafka offset(JAVA实现)
- spark createDirectStream保存kafka offset(JAVA实现)
- spark createDirectStream保存kafka offset(JAVA实现)
- spark createDirectStream保存kafka offset(JAVA实现)
- spark createDirectStream保存kafka offset(JAVA实现)
- Spark Streaming createDirectStream保存kafka offset(JAVA实现)
- Kafka AdminClient 管理Kafka Offset代码实现
- sparkstreaming直连kafka Java实现 自己管理offset
- spark streaming 实现kafka的createDirectStream方式!!不坑
- KafkaUtils.createDirectStream
- offset
- offset
- offset
- Kafka 0.8.2 新的offset管理
- scala版本kafka createDirectStream
- java版本kafka createDirectStream
- linux zookeeper3.4.9集群搭建图文详解
- postgis初探
- 数据库<7>
- 服务器上安装tmux
- RequireJs初步了解和使用
- createDirectStream 实现offset管理
- 数据结构实验——基于数组表的实验
- Android 关于七牛的上传图片遇到的问题
- 87.ajax提交 后台返回中文乱码问题
- 欢迎使用CSDN-markdown编辑器
- POJ 3984 迷宫问题(记录路径的搜索)
- error:crosses initialization of ...的解决办法
- Python基础入门(十六)-迭代
- 6年java工程师心路分享-欢迎交流