createDirectStream 实现offset管理

来源：互联网发布：申请淘宝店铺号步骤编辑：程序博客网时间：2024/06/07 19:13

public class MainBak2_ {

private static Logger logger = LoggerFactory.getLogger(MainBak2_.class);

private static KafkaCluster kafkaCluster = null;

public static void main(String[] args) {

String zookeeper = ConfigMgr.getZookeeper();
final String groupId = ConfigMgr.getKakfaGroupId();
String topicStr = ConfigMgr.getKafkaTopic();

String sparkAppName = ConfigMgr.getSparkAppName();
String master = ConfigMgr.getSparkMaster();
int duration = ConfigMgr.getSparkDuration();
String metadataBrokerList = ConfigMgr.getProByKey("metadata.broker.list");
String autoOffsetReset = ConfigMgr.getProByKey("auto.offset.reset");

logger.info("zk===========================" + zookeeper);
logger.info("groupId======================" + groupId);
logger.info("topic========================" + topicStr);

logger.info("sparkAppName=================" + sparkAppName);
logger.info("master=======================" + master);
logger.info("duration=====================" + duration);
logger.info("metadataBrokerList===========" + metadataBrokerList);

SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));

Set<String> topicSet = new HashSet<String>();
for (String topic : topicStr.split(",")) {
topicSet.add(topic);
}

Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);

scala.collection.mutable.Map<String, String> tmpScalaKafkaMap = JavaConversions.mapAsScalaMap(kafkaParams);
scala.collection.immutable.Map<String, String> scalaKafkaParams = tmpScalaKafkaMap
.toMap(new Predef.$less$colon$less<Tuple2<String, String>, Tuple2<String, String>>() {
private static final long serialVersionUID = 1L;

public Tuple2<String, String> apply(Tuple2<String, String> v1) {
return v1;
}
});

kafkaCluster = new KafkaCluster(scalaKafkaParams);

scala.collection.mutable.Set<String> tmpMutableTopicsSet = JavaConversions.asScalaSet(topicSet);
scala.collection.immutable.Set<String> tmpImmutableTopicsSet = tmpMutableTopicsSet.toSet();
scala.collection.immutable.Set<TopicAndPartition> tpScalaSet = kafkaCluster.getPartitions(tmpImmutableTopicsSet)
.right().get();
Map<TopicAndPartition, Long> offsetMap = new HashMap<TopicAndPartition, Long>();

if (kafkaCluster.getConsumerOffsets(groupId, tpScalaSet).isLeft()) {
System.out.println(" left : " + kafkaCluster.getConsumerOffsets(groupId, tpScalaSet).left().get());
Set<TopicAndPartition> tmpSet = JavaConversions.setAsJavaSet(tpScalaSet);
for (TopicAndPartition topicAndPartition : tmpSet) {
offsetMap.put(topicAndPartition, 0L);
}
} else {
scala.collection.immutable.Map<TopicAndPartition, Object> scalaOffsetMap = kafkaCluster
.getConsumerOffsets(groupId, tpScalaSet).right().get();
Map<TopicAndPartition, Object> consumerOffsets = JavaConversions.mapAsJavaMap(scalaOffsetMap);
Set<TopicAndPartition> tmpSet = JavaConversions.setAsJavaSet(tpScalaSet);
for (TopicAndPartition topicAndPartition : tmpSet) {
Long offset = (Long) consumerOffsets.get(topicAndPartition);
offsetMap.put(topicAndPartition, offset);
System.out.println("first getOffset:" + topicAndPartition.partition() + ":" + offset);
}

}

JavaInputDStream<String> jid = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, String.class, kafkaParams, offsetMap,
new Function<kafka.message.MessageAndMetadata<String, String>, String>() {

private static final long serialVersionUID = -6590667828252772663L;

@Override
public String call(MessageAndMetadata<String, String> arg0) throws Exception {
return arg0.message();
}

});

final VoidFunction<String> func0 = new VoidFunction<String>() {

private static final long serialVersionUID = -2520206838533422786L;

@Override
public void call(String arg0) throws Exception {
System.out.println(arg0);
}

};

VoidFunction<JavaRDD<String>> func = new VoidFunction<JavaRDD<String>>() {
private static final long serialVersionUID = 7679681553001908774L;

@Override
public void call(JavaRDD<String> arg0) throws Exception {
OffsetRange[] offsets = ((HasOffsetRanges) arg0.rdd()).offsetRanges();
for (OffsetRange o : offsets) {
TopicAndPartition tp = new TopicAndPartition(o.topic(), o.partition());
Map<TopicAndPartition, Object> tpMap = new HashMap<TopicAndPartition, Object>();
tpMap.put(tp, o.untilOffset());
System.out.println("write getOffset:" + tp.partition() + ":" + o.untilOffset());
scala.collection.mutable.Map<TopicAndPartition, Object> tpScalaMap = JavaConversions
.mapAsScalaMap(tpMap);

scala.collection.immutable.Map<TopicAndPartition, Object> scalaOffsetMap = tpScalaMap.toMap(
new Predef.$less$colon$less<Tuple2<TopicAndPartition, Object>, Tuple2<TopicAndPartition, Object>>() {

private static final long serialVersionUID = -3520206838533422786L;

public Tuple2<TopicAndPartition, Object> apply(Tuple2<TopicAndPartition, Object> v1) {
return v1;
}
});

kafkaCluster.setConsumerOffsets(groupId, scalaOffsetMap);
}

arg0.foreach(func0);
}

};

jid.foreachRDD(func);

jssc.start();
jssc.awaitTermination();
}
}

0 0