SprakStreaming整合Kafka2
来源:互联网 发布:电脑摇号软件 编辑:程序博客网 时间:2024/06/06 05:19
package com.uplooking.bigdata.streaming.p2;
import kafka.serializer.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import scala.Tuple2;
import java.util.*;
/**
* Created by thinkpad on 2017/4/21.
*/
public class JavaSparkStreamingDirectOps {
public static void main(String[] args) {
SparkConf conf = new SparkConf();
conf.setMaster("local[2]");
conf.setAppName(JavaSparkStreamingDirectOps.class.getSimpleName());
JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(2));
// jsc.checkpoint("E:/test/spark/streaming/ck1");
/**
* 使用Direct方式直接让SparkStreaming去Kafka中拉取数据,而且能够保证数据只被消费一次。
* 在这种方式中就不需要在使用zk来记录kafka中的数据被消费的偏移量offset,为了保证程序恢复之后能够正常的读取
* 争取offset对应的数据,所以需要通过StreamingContext来创建一个checkpoint来保存消费的偏移量。
*
* @param jssc JavaStreamingContext object
* @param keyClass Class of the keys in the Kafka records
* @param valueClass Class of the values in the Kafka records
* @param keyDecoderClass Class of the key decoder
* @param valueDecoderClass Class type of the value decoder
* @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
* configuration parameters</a>. Requires "metadata.broker.list" or "bootstrap.servers"
* to be set with Kafka broker(s) (NOT zookeeper servers), specified in
* host1:port1,host2:port2 form.
* If not starting from a checkpoint, "auto.offset.reset" may be set to "largest" or "smallest"
* to determine where the stream starts (defaults to "largest")
* @param topics Names of the topics to consume
* @tparam K type of Kafka message key
* @tparam V type of Kafka message value
* @tparam KD type of Kafka message key decoder
* @tparam VD type of Kafka message value decoder
* @return DStream of (Kafka message key, Kafka message value)
*/
Map<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "master:9092,slave01:9092,slave02:9092");
Set<String> topics = new HashSet<>();
topics.add("spark-kafka");
/**
* java.lang.ClassCastException:
* kafka.cluster.BrokerEndPoint cannot be cast to kafka.cluster.Broker
* 因为Spark-Streaming用的版本是1.6.2,但是其所兼容的kafka的版本是0.8.2.1
* 所以对其它版本的kafka兼容性不是很好,导致了咱们这个问题。
*/
JavaPairInputDStream<String, String> kafkaDStream =
KafkaUtils.createDirectStream(
jsc,
String.class,
String.class,
StringDecoder.class,
StringDecoder.class,
kafkaParams,
topics
);
JavaDStream<String> wordsDStream = kafkaDStream.flatMap(t -> {
return Arrays.asList(t._2().split(" "));
});
JavaPairDStream<String, Integer> pairDStream = wordsDStream.mapToPair(word -> {
return new Tuple2<String, Integer>(word, 1);
});
JavaPairDStream<String, Integer> retDStream = pairDStream.reduceByKey((v1, v2) -> {
return v1 + v2;
});
retDStream.print();
jsc.start();
jsc.awaitTermination();
}
}
import kafka.serializer.StringDecoder;
import org.apache.spark.SparkConf;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaPairInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka.KafkaUtils;
import scala.Tuple2;
import java.util.*;
/**
* Created by thinkpad on 2017/4/21.
*/
public class JavaSparkStreamingDirectOps {
public static void main(String[] args) {
SparkConf conf = new SparkConf();
conf.setMaster("local[2]");
conf.setAppName(JavaSparkStreamingDirectOps.class.getSimpleName());
JavaStreamingContext jsc = new JavaStreamingContext(conf, Durations.seconds(2));
// jsc.checkpoint("E:/test/spark/streaming/ck1");
/**
* 使用Direct方式直接让SparkStreaming去Kafka中拉取数据,而且能够保证数据只被消费一次。
* 在这种方式中就不需要在使用zk来记录kafka中的数据被消费的偏移量offset,为了保证程序恢复之后能够正常的读取
* 争取offset对应的数据,所以需要通过StreamingContext来创建一个checkpoint来保存消费的偏移量。
*
* @param jssc JavaStreamingContext object
* @param keyClass Class of the keys in the Kafka records
* @param valueClass Class of the values in the Kafka records
* @param keyDecoderClass Class of the key decoder
* @param valueDecoderClass Class type of the value decoder
* @param kafkaParams Kafka <a href="http://kafka.apache.org/documentation.html#configuration">
* configuration parameters</a>. Requires "metadata.broker.list" or "bootstrap.servers"
* to be set with Kafka broker(s) (NOT zookeeper servers), specified in
* host1:port1,host2:port2 form.
* If not starting from a checkpoint, "auto.offset.reset" may be set to "largest" or "smallest"
* to determine where the stream starts (defaults to "largest")
* @param topics Names of the topics to consume
* @tparam K type of Kafka message key
* @tparam V type of Kafka message value
* @tparam KD type of Kafka message key decoder
* @tparam VD type of Kafka message value decoder
* @return DStream of (Kafka message key, Kafka message value)
*/
Map<String, String> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", "master:9092,slave01:9092,slave02:9092");
Set<String> topics = new HashSet<>();
topics.add("spark-kafka");
/**
* java.lang.ClassCastException:
* kafka.cluster.BrokerEndPoint cannot be cast to kafka.cluster.Broker
* 因为Spark-Streaming用的版本是1.6.2,但是其所兼容的kafka的版本是0.8.2.1
* 所以对其它版本的kafka兼容性不是很好,导致了咱们这个问题。
*/
JavaPairInputDStream<String, String> kafkaDStream =
KafkaUtils.createDirectStream(
jsc,
String.class,
String.class,
StringDecoder.class,
StringDecoder.class,
kafkaParams,
topics
);
JavaDStream<String> wordsDStream = kafkaDStream.flatMap(t -> {
return Arrays.asList(t._2().split(" "));
});
JavaPairDStream<String, Integer> pairDStream = wordsDStream.mapToPair(word -> {
return new Tuple2<String, Integer>(word, 1);
});
JavaPairDStream<String, Integer> retDStream = pairDStream.reduceByKey((v1, v2) -> {
return v1 + v2;
});
retDStream.print();
jsc.start();
jsc.awaitTermination();
}
}
0 0
- SprakStreaming整合Kafka2
- kafka2:性能优化
- kafka2:性能优化
- centos6.5 安装kafka2.11
- Kafka2.10<使用JavaAPI 批量发送生产数据>
- 整合
- 整合
- 整合
- 整合
- 整合
- 整合
- kafka2.9.2的伪分布式集群安装和demo(java api)测试
- kafka2.9.2的分布式集群安装和demo(java api)测试
- kafka2.9.2的分布式集群安装和demo(java api)测试
- kafka2.9.2的分布式集群安装和demo(java api)测试
- Kafka安装配置及简单通道传输操作(kafka2.9.2)
- kafka2.9.2的伪分布式集群安装和demo(java api)测试
- kafka2.9.2的分布式集群安装和demo(java api)测试
- Adaboost算法
- Maven 各种报错解决方案
- leetcode148 sort list
- Linux中rz和sz命令用法详解
- 过滤器,拦截器,监听器的定义及区别
- SprakStreaming整合Kafka2
- 【每日一个Linux命令】netstat
- js中获得当前时间是年份和月份,形如:201208
- rem,移动端适配心得2【转载】
- ECharts3使用入门
- java 垃圾回收机制详解
- 第十二周:( LeetCode576) Out of Boundary Paths(c++)
- 2004年-2016年软考系统分析师考试论文真题:
- usb-Transfer/Transaction/Packet