kafka+ss create stream
来源:互联网 发布:java鱼雷公路车 编辑:程序博客网 时间:2024/05/17 09:05
createStream 只有一个executors 分多个(可配置)receiver 从kafka 拉数据,然后分发给其他executor执行。 这点通过thread dump得到论证。
createDirectStream 每个executors都会从Kafka拉数据,每个executor 从kafka的一个分区拉数据。这点通过在kafka单个节点上执行iftop -n -i em1可以看到。thread dump中没发现。
createStream==========================================================================================
SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);
Map<String, Integer> topicMap = new HashMap<String, Integer>();
String[] topicsArr = topicStr.split(",");
int n = topicsArr.length;
for (int i = 0; i < n; i++) {
topicMap.put(topicsArr[i], ConfigMgr.getIntByKey("spark.thread.num"));
}
JavaPairReceiverInputDStream<String, String> lines = KafkaUtils.createStream(jssc, zookeeper,
ConfigMgr.getKakfaGroupId(), topicMap);
VoidFunction<JavaPairRDD<String, String>> func = new VoidFunction<JavaPairRDD<String, String>>() {
private static final long serialVersionUID = -7821297251721419326L;
private Logger logger = LoggerFactory.getLogger(VoidFunction.class);
@Override
public void call(JavaPairRDD<String, String> arg0) throws Exception {
try {
arg0.foreach(new VoidFunction<Tuple2<String, String>>() {
private static final long serialVersionUID = -8745159565584246451L;
@Override
public void call(Tuple2<String, String> arg0) throws Exception {
try {
execute(arg0._2);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
});
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
};
lines.foreachRDD(func);
jssc.start();
jssc.awaitTermination();
createDirectStream==========================================================================================
SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
Set<String> topicSet = new HashSet<String>();
for (String topic : topicStr.split(",")) {
topicSet.add(topic);
}
Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);
JavaPairInputDStream<String, String> pairInput = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, kafkaParams, topicSet);
VoidFunction<JavaPairRDD<String, String>> func = new VoidFunction<JavaPairRDD<String, String>>() {
private static final long serialVersionUID = -7821297251721419326L;
private Logger logger = LoggerFactory.getLogger(VoidFunction.class);
@Override
public void call(JavaPairRDD<String, String> arg0) throws Exception {
try {
arg0.foreach(new VoidFunction<Tuple2<String, String>>() {
private static final long serialVersionUID = -8745159565584246451L;
@Override
public void call(Tuple2<String, String> arg0) throws Exception {
try {
System.out.println(arg0._2);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
});
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
};
pairInput.foreachRDD(func);
jssc.start();
jssc.awaitTermination();
createDirectStream + custom offset==========================================================================================
SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
Set<String> topicSet = new HashSet<String>();
for (String topic : topicStr.split(",")) {
topicSet.add(topic);
}
Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);
long[] off = new long[] { 3316538, 2767422, 3332371, 3330540, 3863203, 3315774, 3867953, 3328188, 3325543,
3892565 };
Map<TopicAndPartition, Long> fromOffsets = new HashMap<TopicAndPartition, Long>();
for (int i = 0; i < 10; i++) {
fromOffsets.put(new TopicAndPartition(topicStr, i), off[i]);
}
JavaInputDStream<String> jid = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, String.class, kafkaParams, fromOffsets,
new Function<kafka.message.MessageAndMetadata<String, String>, String>() {
private static final long serialVersionUID = -6590667828252772663L;
@Override
public String call(MessageAndMetadata<String, String> arg0) throws Exception {
return arg0.message();
}
});
final VoidFunction<String> func0 = new VoidFunction<String>() {
private static final long serialVersionUID = -2520206838533422786L;
@Override
public void call(String arg0) throws Exception {
tugBoat.execute(JSONObject.parseObject(arg0));
}
};
VoidFunction<JavaRDD<String>> func = new VoidFunction<JavaRDD<String>>() {
private static final long serialVersionUID = 7679681553001908774L;
@Override
public void call(JavaRDD<String> arg0) throws Exception {
arg0.foreach(func0);
}
};
jid.foreachRDD(func);
jssc.start();
jssc.awaitTermination();
createDirectStream 每个executors都会从Kafka拉数据,每个executor 从kafka的一个分区拉数据。这点通过在kafka单个节点上执行iftop -n -i em1可以看到。thread dump中没发现。
createStream==========================================================================================
SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);
Map<String, Integer> topicMap = new HashMap<String, Integer>();
String[] topicsArr = topicStr.split(",");
int n = topicsArr.length;
for (int i = 0; i < n; i++) {
topicMap.put(topicsArr[i], ConfigMgr.getIntByKey("spark.thread.num"));
}
JavaPairReceiverInputDStream<String, String> lines = KafkaUtils.createStream(jssc, zookeeper,
ConfigMgr.getKakfaGroupId(), topicMap);
VoidFunction<JavaPairRDD<String, String>> func = new VoidFunction<JavaPairRDD<String, String>>() {
private static final long serialVersionUID = -7821297251721419326L;
private Logger logger = LoggerFactory.getLogger(VoidFunction.class);
@Override
public void call(JavaPairRDD<String, String> arg0) throws Exception {
try {
arg0.foreach(new VoidFunction<Tuple2<String, String>>() {
private static final long serialVersionUID = -8745159565584246451L;
@Override
public void call(Tuple2<String, String> arg0) throws Exception {
try {
execute(arg0._2);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
});
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
};
lines.foreachRDD(func);
jssc.start();
jssc.awaitTermination();
createDirectStream==========================================================================================
SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
Set<String> topicSet = new HashSet<String>();
for (String topic : topicStr.split(",")) {
topicSet.add(topic);
}
Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);
JavaPairInputDStream<String, String> pairInput = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, kafkaParams, topicSet);
VoidFunction<JavaPairRDD<String, String>> func = new VoidFunction<JavaPairRDD<String, String>>() {
private static final long serialVersionUID = -7821297251721419326L;
private Logger logger = LoggerFactory.getLogger(VoidFunction.class);
@Override
public void call(JavaPairRDD<String, String> arg0) throws Exception {
try {
arg0.foreach(new VoidFunction<Tuple2<String, String>>() {
private static final long serialVersionUID = -8745159565584246451L;
@Override
public void call(Tuple2<String, String> arg0) throws Exception {
try {
System.out.println(arg0._2);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
});
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
};
pairInput.foreachRDD(func);
jssc.start();
jssc.awaitTermination();
createDirectStream + custom offset==========================================================================================
SparkConf sparkConf = new SparkConf().setAppName(sparkAppName).setMaster(master);
JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(duration));
Set<String> topicSet = new HashSet<String>();
for (String topic : topicStr.split(",")) {
topicSet.add(topic);
}
Map<String, String> kafkaParams = new HashMap<String, String>();
kafkaParams.put("group.id", groupId);
kafkaParams.put("metadata.broker.list", metadataBrokerList);
kafkaParams.put("auto.offset.reset", autoOffsetReset);
long[] off = new long[] { 3316538, 2767422, 3332371, 3330540, 3863203, 3315774, 3867953, 3328188, 3325543,
3892565 };
Map<TopicAndPartition, Long> fromOffsets = new HashMap<TopicAndPartition, Long>();
for (int i = 0; i < 10; i++) {
fromOffsets.put(new TopicAndPartition(topicStr, i), off[i]);
}
JavaInputDStream<String> jid = KafkaUtils.createDirectStream(jssc, String.class, String.class,
StringDecoder.class, StringDecoder.class, String.class, kafkaParams, fromOffsets,
new Function<kafka.message.MessageAndMetadata<String, String>, String>() {
private static final long serialVersionUID = -6590667828252772663L;
@Override
public String call(MessageAndMetadata<String, String> arg0) throws Exception {
return arg0.message();
}
});
final VoidFunction<String> func0 = new VoidFunction<String>() {
private static final long serialVersionUID = -2520206838533422786L;
@Override
public void call(String arg0) throws Exception {
tugBoat.execute(JSONObject.parseObject(arg0));
}
};
VoidFunction<JavaRDD<String>> func = new VoidFunction<JavaRDD<String>>() {
private static final long serialVersionUID = 7679681553001908774L;
@Override
public void call(JavaRDD<String> arg0) throws Exception {
arg0.foreach(func0);
}
};
jid.foreachRDD(func);
jssc.start();
jssc.awaitTermination();
0 0
- kafka+ss create stream
- Kafka Stream
- kafka stream
- ss+kafka 小坑
- Java8-Stream Create Example
- Kafka Stream介绍
- Kafka Stream介绍(—)
- kafka Stream概念
- kafka Stream的Wordcount
- Spring Cloud Stream + Kafka
- Kafka设计解析(七)- Kafka Stream
- kafka stream 简易示例代码
- kafka stream实现wordcount计数
- Kafka and Samza: Real-time stream processing
- Introducing Kafka Streams: Stream Processing Made Simple
- Kafka + spark stream +redis (createStream + createDirectStream)
- Spring Cloud Stream Binder Kafka Monitor
- ss
- C++ STL中容器的使用全面总结
- CSS3 background-size图片自适应
- Phoenix 配置
- Linux常用命令
- TextView跑马灯与EditText共存失效
- kafka+ss create stream
- spring国际化
- html文件中img的路径
- 第三天:c++中的cplex设计(初级)
- React 15.5.0更新
- 《挑战程序设计竞赛》2.3 记录结果再利用的动态规划(待续)
- VS中调试DLL工程的正确方法[转]
- Yii2.0 使用验证码
- Codeforces Round #408 (Div. 2) D. Police Stations