kafka+ss create stream

来源:互联网 发布:java鱼雷公路车 编辑:程序博客网 时间:2024/05/17 09:05
createStream 只有一个executors 分多个(可配置)receiver 从kafka 拉数据,然后分发给其他executor执行。 这点通过thread dump得到论证。
createDirectStream 每个executors都会从Kafka拉数据,每个executor 从kafka的一个分区拉数据。这点通过在kafka单个节点上执行iftop -n -i em1可以看到。thread dump中没发现。


createStream==========================================================================================
SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);


Map<String, Integer> topicMap = new HashMap<String, Integer>();
String[] topicsArr = topicStr.split(",");
int n = topicsArr.length;
for (int i = 0; i < n; i++) {
topicMap.put(topicsArr[i], ConfigMgr.getIntByKey("spark.thread.num"));
}


JavaPairReceiverInputDStream<String, String> lines = KafkaUtils.createStream(jssc, zookeeper,
ConfigMgr.getKakfaGroupId(), topicMap);


VoidFunction<JavaPairRDD<String, String>> func = new VoidFunction<JavaPairRDD<String, String>>() {
private static final long serialVersionUID = -7821297251721419326L;


private Logger logger = LoggerFactory.getLogger(VoidFunction.class);


@Override
public void call(JavaPairRDD<String, String> arg0) throws Exception {
try {


arg0.foreach(new VoidFunction<Tuple2<String, String>>() {


private static final long serialVersionUID = -8745159565584246451L;


@Override
public void call(Tuple2<String, String> arg0) throws Exception {
try {
execute(arg0._2);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
});
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}


};


lines.foreachRDD(func);


jssc.start();
jssc.awaitTermination();




createDirectStream==========================================================================================
SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));


Set<String> topicSet = new HashSet<String>();
for (String topic : topicStr.split(",")) {
topicSet.add(topic);
}


Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);


JavaPairInputDStream<String, String> pairInput = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, kafkaParams, topicSet);


VoidFunction<JavaPairRDD<String, String>> func = new VoidFunction<JavaPairRDD<String, String>>() {
private static final long serialVersionUID = -7821297251721419326L;


private Logger logger = LoggerFactory.getLogger(VoidFunction.class);


@Override
public void call(JavaPairRDD<String, String> arg0) throws Exception {
try {
arg0.foreach(new VoidFunction<Tuple2<String, String>>() {


private static final long serialVersionUID = -8745159565584246451L;


@Override
public void call(Tuple2<String, String> arg0) throws Exception {
try {
System.out.println(arg0._2);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
});
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
};


pairInput.foreachRDD(func);


jssc.start();
jssc.awaitTermination();

createDirectStream + custom offset==========================================================================================
SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));


Set<String> topicSet = new HashSet<String>();
for (String topic : topicStr.split(",")) {
topicSet.add(topic);
}


Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);


long[] off = new long[] { 3316538, 2767422, 3332371, 3330540, 3863203, 3315774, 3867953, 3328188, 3325543,
3892565 };


Map<TopicAndPartition, Long> fromOffsets = new HashMap<TopicAndPartition, Long>();
for (int i = 0; i < 10; i++) {
fromOffsets.put(new TopicAndPartition(topicStr, i), off[i]);
}


JavaInputDStream<String> jid = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, String.class, kafkaParams, fromOffsets,
new Function<kafka.message.MessageAndMetadata<String, String>, String>() {


private static final long serialVersionUID = -6590667828252772663L;


@Override
public String call(MessageAndMetadata<String, String> arg0) throws Exception {
return arg0.message();
}


});


final VoidFunction<String> func0 = new VoidFunction<String>() {


private static final long serialVersionUID = -2520206838533422786L;


@Override
public void call(String arg0) throws Exception {
tugBoat.execute(JSONObject.parseObject(arg0));
}


};


VoidFunction<JavaRDD<String>> func = new VoidFunction<JavaRDD<String>>() {
private static final long serialVersionUID = 7679681553001908774L;


@Override
public void call(JavaRDD<String> arg0) throws Exception {
arg0.foreach(func0);
}


};


jid.foreachRDD(func);


jssc.start();
jssc.awaitTermination();
0 0