Linux搭建Kafka+Spark实时处理系统
来源:互联网 发布:ubuntu mate 15.04 编辑:程序博客网 时间:2024/06/08 19:51
服务器要求:jdk-8u121-linux-x64.tar.gz、kafka_2.12-0.10.2.1.tgz、spark-1.3.1-bin-hadoop2-without-hive.tgz
1、生产者—SpringMVC+Kafka
1.1、准备工作
所需资源:kafka_2.10-0.8.2.2.jar、kafka-clients-0.10.0.0.jar,把这两个jar包导入到项目中
1.2、配置
关于生产者Kafka服务器的配置,如下:
bootstrap.servers=172.17.0.2:9092acks=allretries=3batch.size=16384linger.ms=1buffer.memory=33554432key.serializer=org.apache.kafka.common.serialization.StringSerializervalue.serializer=org.apache.kafka.common.serialization.StringSerializer
1.3、编写代码
1.3.1、线程池工厂
import java.util.concurrent.ExecutorService;import java.util.concurrent.Executors;import java.util.concurrent.ThreadFactory;public class ExecutorServiceFactory {private ExecutorService executors;private ExecutorServiceFactory() {}public static final ExecutorServiceFactory getInstance() { return KafkaProducerPoolHolder.instance; }private static class KafkaProducerPoolHolder {private static final ExecutorServiceFactory instance = new ExecutorServiceFactory();}public ExecutorService createScheduledThreadPool() {// CPU个数 int availableProcessors = Runtime.getRuntime().availableProcessors(); // 创建 executors = Executors.newScheduledThreadPool(availableProcessors * 10, new KafkaProducterThreadFactory()); return executors;}public ExecutorService createSingleThreadExecutor() { // 创建 executors = Executors.newSingleThreadExecutor(new KafkaProducterThreadFactory()); return executors; }public ExecutorService createCachedThreadPool() { // 创建 executors = Executors.newCachedThreadPool(new KafkaProducterThreadFactory()); return executors; }public ExecutorService createFixedThreadPool(int count) { // 创建 executors = Executors.newFixedThreadPool(count, new KafkaProducterThreadFactory()); return executors; }private class KafkaProducterThreadFactory implements ThreadFactory {@Overridepublic Thread newThread(Runnable runnable) {SecurityManager s = System.getSecurityManager();ThreadGroup group = (s != null) ? s.getThreadGroup() : Thread.currentThread().getThreadGroup(); Thread t = new Thread(group, runnable); return t;}}}1.3.2 、生产者线程池
import java.util.concurrent.Callable;import java.util.concurrent.ExecutorService;import java.util.concurrent.Future;public class KafkaProducerPool {private ExecutorService executor;private KafkaProducerPool() {executor = ExecutorServiceFactory.getInstance().createFixedThreadPool(2);}public static final KafkaProducerPool getInstance() {return KafkaProducerPoolHolder.instance;}private static class KafkaProducerPoolHolder {private static final KafkaProducerPool instance = new KafkaProducerPool();}/** * 关闭线程池,这里要说明的是:调用关闭线程池方法后,线程池会执行完队列中的所有任务才退出 * * @author SHANHY * @date 2015年12月4日 */ public void shutdown(){ executor.shutdown(); } /** * 提交任务到线程池,可以接收线程返回值 * * @param task * @return * @author SHANHY * @date 2015年12月4日 */ public Future<?> submit(Runnable task) { return executor.submit(task); } /** * 提交任务到线程池,可以接收线程返回值 * * @param task * @return * @author SHANHY * @date 2015年12月4日 */ public Future<?> submit(Callable<?> task) { return executor.submit(task); } /** * 直接提交任务到线程池,无返回值 * * @param task * @author SHANHY * @date 2015年12月4日 */ public void execute(Runnable task){ executor.execute(task); }}1.3.3、生产者
import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStream;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.Properties;import org.apache.kafka.clients.producer.Callback;import org.apache.kafka.clients.producer.KafkaProducer;import org.apache.kafka.clients.producer.Producer;import org.apache.kafka.clients.producer.ProducerRecord;import org.apache.kafka.clients.producer.RecordMetadata;import com.cda91.common.HttpJResopnse;public class KafkaProducerT implements Runnable {private static Producer<String, String> producer; private String topic = "kafka_producer_title";private static Map<String, List<Object>> messages;private static List<String> keys;private String key = null;private Object value = null;private static Properties props;private static final String productPro = "/kafka.properties";private KafkaProducterListener listener;private KafkaProducerT() {System.out.println("KafkaProducerT构造函数被调用");props = new Properties();messages = new HashMap<String, List<Object>>();keys = new ArrayList<String>();try {InputStream is = KafkaProducerT.class.getResourceAsStream(productPro);props.load(is);is.close();is = null;} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}producer = new KafkaProducer<String, String>(props);}public static final KafkaProducerT getInstance() {return KafkaProducerTHolder.instance;}private static class KafkaProducerTHolder{ private static final KafkaProducerT instance = new KafkaProducerT(); }public void setTopic(String topic) {this.topic = topic;}public void push(String key,Object value){if(!keys.contains(key)) {keys.add(key);}if(!messages.containsKey(key)) {messages.put(key, new ArrayList<Object>());}messages.get(key).add(value);}public void push(String key,Object value,KafkaProducterListener listener) {if(!keys.contains(key)) {keys.add(key);}if(!messages.containsKey(key)) {messages.put(key, new ArrayList<Object>());}messages.get(key).add(value);this.listener = listener;}@Overridepublic void run() {while (true) {if(keys.size() == 0 || messages.size()==0) {producer.flush();continue;}key = keys.get(0);value = messages.get(key).get(0);messages.get(key).remove(value);if(messages.get(key).size() == 0) {messages.remove(key);}if(!messages.containsKey(key)) {keys.remove(key);}producer.send(new ProducerRecord<String, String>(topic,key+"=>"+value.toString()),new Callback() {public void onCompletion(RecordMetadata metadata, Exception exception) {System.out.println("topic=>"+topic+",key="+key+",value="+value);if(exception == null) {if(listener != null) {listener.onComplete(new HttpJResopnse());}}else {HttpJResopnse resopnse = new HttpJResopnse();resopnse.setStatus(HttpJResopnse.ERROR);resopnse.setMessage(exception.getMessage());listener.onComplete(resopnse);}}});}}public static void main(String[] args) {KafkaProducerT kafkaProducerT = KafkaProducerT.getInstance();KafkaProducerPool kafkaProducerPool = KafkaProducerPool.getInstance();kafkaProducerPool.execute(kafkaProducerT);kafkaProducerT.setTopic("test");kafkaProducerT.push("name", "wangyui");}public interface KafkaProducterListener {public void onComplete(HttpJResopnse resopnse);}}
2、消费者—Kafka+Maven
2.1、pom.xml相关
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion><groupId>com.cda</groupId> <artifactId>sparkcda</artifactId> <version>0.0.1</version> <packaging>jar</packaging> <name>sparkcda</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <dependency> <!-- Spark dependency --> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.10</artifactId> <version>1.3.1</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_2.10</artifactId> <version>1.3.1</version> <scope>provided</scope></dependency><dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka_2.10</artifactId> <version>0.8.2.1</version></dependency><dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka_2.10</artifactId> <version>1.3.1</version></dependency><dependency> <groupId>com.101tec</groupId> <artifactId>zkclient</artifactId> <version>0.10</version> </dependency> <dependency> <groupId>com.yammer.metrics</groupId> <artifactId>metrics-core</artifactId> <version>2.2.0</version> </dependency> </dependencies></project>2.2 代码编写
import java.util.Arrays;import java.util.HashMap;import java.util.HashSet;import java.util.Map;import java.util.Set;import java.util.regex.Pattern;import org.apache.spark.SparkConf;import org.apache.spark.api.java.function.FlatMapFunction;import org.apache.spark.api.java.function.Function;import org.apache.spark.api.java.function.Function2;import org.apache.spark.api.java.function.PairFunction;import org.apache.spark.streaming.Durations;import org.apache.spark.streaming.api.java.JavaDStream;import org.apache.spark.streaming.api.java.JavaPairDStream;import org.apache.spark.streaming.api.java.JavaPairInputDStream;import org.apache.spark.streaming.api.java.JavaStreamingContext;import org.apache.spark.streaming.kafka.KafkaUtils;import kafka.serializer.StringDecoder;import scala.Tuple2;public class AccessStream {private static final Pattern SPACE = Pattern.compile(" ");public static void main(String[] args) {consurmer();}public static void consurmer() {String brokers = "localhost:9092"; String topics = "cda_pv"; // Create context with a 2 seconds batch interval SparkConf sparkConf = new SparkConf().setAppName("AccessStream"); sparkConf.setMaster("local[*]"); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(20)); Set<String> topicsSet = new HashSet<String>(Arrays.asList(topics.split(","))); Map<String, String> kafkaParams = new HashMap<String, String>(); kafkaParams.put("metadata.broker.list", brokers); // Create direct kafka stream with brokers and topics JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream( jssc, String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet ); // Get the lines, split them into words, count the words and print JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() { /** * */private static final long serialVersionUID = 1L;@Override public String call(Tuple2<String, String> tuple2) { return tuple2._2(); } }); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { /** * */private static final long serialVersionUID = 1L;@Override public Iterable<String> call(String x) { return Arrays.asList(SPACE.split(x)); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { /** * */private static final long serialVersionUID = 1L;@Override public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s, 1); } }).reduceByKey( new Function2<Integer, Integer, Integer>() { /** * */private static final long serialVersionUID = 1L;@Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); // Start the computation jssc.start(); jssc.awaitTermination();}}
3、服务器搭建
3.1、软件安装
解压jdk-8u121-linux-x64.tar.gz、kafka_2.12-0.10.2.1.tgz、spark-1.3.1-bin-hadoop2-without-hive.tgz 到 /usrl/local,并加入到PATH中(这些都懂得,不必赘述)
3.2、相关配置
cho -e "export JAVA_HOME=$JAVA_HOME" >> $SPARK_HOME/sbin/spark-config.sh \echo -e \"advertised.listeners=PLAINTEXT://你的服务器IP:9092\" >> $KAFKA_HOME/config/server.properties
3.3、测试
spark-submit --class com.cda.stream.AccessStream --jars /usr/local/spark-1.3.1-bin-hadoop2-without-hive/lib/kafka_2.10-0.8.2.1.jar,/usr/local/spark-1.3.1-bin-hadoop2-without-hive/lib/spark-streaming-kafka_2.10-1.3.1.jar,/usr/local/kafka_2.12-0.10.2.1/libs/metrics-core-2.2.0.jar /opt/sparkcda-0.0.1.jar
阅读全文
0 0
- Linux搭建Kafka+Spark实时处理系统
- linux 系统下spark环境的搭建
- Windows系统搭建kafka
- linux 搭建 kafka集群
- flume+kafka+spark streaming日志流式处理系统搭建实验
- Spark Streaming实时处理本地数据流
- zookeeper在linux下搭建+kafka+Spark Streaing(sparkstreaming就下篇介绍了)
- Kafka+Spark Streaming+Redis实时系统实践
- Kafka+Spark Streaming+Redis实时系统实践
- Kafka+Spark Streaming+Redis实时系统实践
- Kafka+Spark Streaming+Redis实时系统实践
- Kafka+Spark Streaming+Redis实时系统实践
- Kafka+Spark Streaming+Redis实时系统实践
- Kafka+Spark Streaming+Redis实时系统实践
- Kafka+Spark Streaming+Redis实时系统实践
- Kafka+Spark Streaming+Redis实时系统实践
- 自己标注(不注意坑不少)-Spark+Kafka构建实时分析Dashboard案例——步骤三:Spark Streaming实时处理数据
- Apache Kafka:大数据的实时处理时代
- 通讯录管理系统终章
- System.getProperty
- 【C/C++】解决“不是有效的win32应用程序”思路
- Android构造函数有没有返回值?
- C#.NET:高级编程之匿名类、匿名方法与扩展方法
- Linux搭建Kafka+Spark实时处理系统
- springMvc实现处理多个input的表单对象封装name到对象
- python2.7 安装numpy no module name zlib
- Spring ModelAttribute注解
- hive 常用sql
- 使用 Node.js 对文本内容分词和关键词抽取
- ThinkSNS+如何计算字符显示长度?【社交系统研发日记五】
- Computed 、 Methods、Watchers
- MVC与三层架构