kafka 自定义分区实例
来源:互联网 发布:淘宝美工怎么自学 编辑:程序博客网 时间:2024/06/07 18:43
第一步:使用./kafka-topics.sh 命令创建topic及partitions 分区数
bin/kafka-topics.sh --create --zookeeper 192.168.31.130:2181 --replication-factor 2 --partitions 3 --topic Topic-test
第二步:实现org.apache.kafka.clients.producer.Partitioner
分区接口,以实现自定义的消息分区
package com.east.spark.kafka;import java.util.List;import java.util.Map;import org.apache.kafka.clients.producer.Partitioner;import org.apache.kafka.common.Cluster;import org.apache.kafka.common.PartitionInfo;import org.slf4j.Logger;import org.slf4j.LoggerFactory;public class MyPartition implements Partitioner {private static Logger LOG = LoggerFactory.getLogger(MyPartition.class);public MyPartition() {// TODO Auto-generated constructor stub}@Overridepublic void configure(Map<String, ?> configs) {// TODO Auto-generated method stub}@Overridepublic int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {// TODO Auto-generated method stubList<PartitionInfo> partitions = cluster.partitionsForTopic(topic);int numPartitions = partitions.size();int partitionNum = 0;try {partitionNum = Integer.parseInt((String) key);} catch (Exception e) {partitionNum = key.hashCode();}LOG.info("the message sendTo topic:" + topic + " and the partitionNum:" + partitionNum);return Math.abs(partitionNum % numPartitions);}@Overridepublic void close() {// TODO Auto-generated method stub}}
第三步:编写 producer
package com.east.spark.kafka;import java.util.Properties;import org.apache.kafka.clients.producer.KafkaProducer;import org.apache.kafka.clients.producer.ProducerRecord;public class Producer {public static void main(String[] args) {Properties props = new Properties();props.put("bootstrap.servers", "192.168.31.130:9092,192.168.31.131:9092,192.168.31.132:9092");// 该地址是集群的子集,用来探测集群。props.put("acks", "all");// 记录完整提交,最慢的但是最大可能的持久化props.put("retries", 3);// 请求失败重试的次数props.put("batch.size", 16384);// batch的大小props.put("linger.ms", 1);// 默认情况即使缓冲区有剩余的空间,也会立即发送请求,设置一段时间用来等待从而将缓冲区填的更多,单位为毫秒,producer发送数据会延迟1ms,可以减少发送到kafka服务器的请求数据props.put("buffer.memory", 33554432);// 提供给生产者缓冲内存总量props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");// 序列化的方式,props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");// 设置属性 自定义分区类props.put("partitioner.class", "com.east.spark.kafka.MyPartition");KafkaProducer<String, String> producer = new KafkaProducer(props);for (int i = 0; i < 10000; i++) {// 三个参数分别为topic, key,value,send()是异步的,添加到缓冲区立即返回,更高效。producer.send(new ProducerRecord<String, String>("Topic-test", Integer.toString(i), Integer.toString(i)));try {Thread.sleep(3000);} catch (InterruptedException e) {// TODO Auto-generated catch blocke.printStackTrace();}}producer.close();}}
第四步:编写 Consumer
package com.east.spark.kafka;import java.util.Arrays;import java.util.Properties;import org.apache.kafka.clients.consumer.ConsumerRecord;import org.apache.kafka.clients.consumer.ConsumerRecords;import org.apache.kafka.clients.consumer.KafkaConsumer;public class Consumer {public static void main(String[] args) {Properties props = new Properties();props.put("bootstrap.servers", "192.168.31.130:9092,192.168.31.131:9092,192.168.31.132:9092");// 该地址是集群的子集,用来探测集群。props.put("group.id", "test");// cousumer的分组idprops.put("enable.auto.commit", "true");// 自动提交offsetsprops.put("auto.commit.interval.ms", "1000");// 每隔1s,自动提交offsetsprops.put("session.timeout.ms", "30000");// Consumer向集群发送自己的心跳,超时则认为Consumer已经死了,kafka会把它的分区分配给其他进程props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");// 反序列化器props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");KafkaConsumer<String, String> consumer = new KafkaConsumer(props);consumer.subscribe(Arrays.asList("Topic-test"));// 订阅的topic,可以多个while (true) {ConsumerRecords<String, String> records = consumer.poll(100);for (ConsumerRecord<String, String> record : records) {System.out.printf("partition = %d , offset = %d, key = %s, value = %s", record.partition(),record.offset(), record.key(), record.value());System.out.println();}}}}
消费端的输出日志
partition = 0 , offset = 49, key = 21, value = 21partition = 0 , offset = 50, key = 0, value = 0partition = 2 , offset = 40, key = 20, value = 20partition = 1 , offset = 48, key = 1, value = 1partition = 2 , offset = 41, key = 2, value = 2partition = 0 , offset = 51, key = 3, value = 3partition = 1 , offset = 49, key = 4, value = 4partition = 2 , offset = 42, key = 5, value = 5partition = 0 , offset = 52, key = 6, value = 6partition = 1 , offset = 50, key = 7, value = 7partition = 2 , offset = 43, key = 8, value = 8partition = 0 , offset = 53, key = 9, value = 9partition = 1 , offset = 51, key = 10, value = 10partition = 2 , offset = 44, key = 11, value = 11partition = 0 , offset = 54, key = 12, value = 12partition = 1 , offset = 52, key = 13, value = 13partition = 2 , offset = 45, key = 14, value = 14partition = 0 , offset = 55, key = 15, value = 15partition = 1 , offset = 53, key = 16, value = 16partition = 2 , offset = 46, key = 17, value = 17partition = 0 , offset = 56, key = 18, value = 18partition = 1 , offset = 54, key = 19, value = 19partition = 2 , offset = 47, key = 20, value = 20partition = 0 , offset = 57, key = 21, value = 21partition = 1 , offset = 55, key = 22, value = 22partition = 2 , offset = 48, key = 23, value = 23partition = 0 , offset = 58, key = 24, value = 24partition = 1 , offset = 56, key = 25, value = 25
生产者端的输出日志
INFO | the message sendTo topic:Topic-test and the partitionNum:42 INFO | the message sendTo topic:Topic-test and the partitionNum:43 INFO | the message sendTo topic:Topic-test and the partitionNum:44 INFO | the message sendTo topic:Topic-test and the partitionNum:45 INFO | the message sendTo topic:Topic-test and the partitionNum:46 INFO | the message sendTo topic:Topic-test and the partitionNum:47 INFO | the message sendTo topic:Topic-test and the partitionNum:48 INFO | the message sendTo topic:Topic-test and the partitionNum:49 INFO | the message sendTo topic:Topic-test and the partitionNum:50 INFO | the message sendTo topic:Topic-test and the partitionNum:51 INFO | the message sendTo topic:Topic-test and the partitionNum:52 INFO | the message sendTo topic:Topic-test and the partitionNum:53 INFO | the message sendTo topic:Topic-test and the partitionNum:54 INFO | the message sendTo topic:Topic-test and the partitionNum:55 INFO | the message sendTo topic:Topic-test and the partitionNum:56 INFO | the message sendTo topic:Topic-test and the partitionNum:57 INFO | the message sendTo topic:Topic-test and the partitionNum:58 INFO | the message sendTo topic:Topic-test and the partitionNum:59
参考链接:https://my.oschina.net/u/1024107/blog/750146
0 0
- kafka 自定义分区实例
- 自定义Kafka分区器
- Kafka自定义分区规则
- Kafka分区策略及自定义
- MapReduce编程实例之自定义分区
- KAFKA调整分区
- kafka创建分区
- kafka分区原理图
- Kafka分区介绍
- Kafka分区与group
- kafka修改分区数量
- Kafka分区策略浅谈
- kafka 实例
- Kafka 实例
- 利用Flume拦截器(interceptors)实现Kafka Sink的自定义规则多分区写入
- 利用Flume拦截器(interceptors)实现Kafka Sink的自定义规则多分区写入
- Kafka分区分配计算(分区器Partitions)
- kafka减少分区的方法
- jQuery同步Ajax带来的UI线程阻塞问题及解决办法
- filter与servlet的比较
- C++第六次实验-项目2
- codeforces 807A Is it rated?
- Kafka相比于HDFS的优势
- kafka 自定义分区实例
- day1的笔记(python)
- RenderScript使用教程(一)
- Redis与Memcached的区别
- CRM系统的两大核心问题
- <opencv学习>之 error LNK2038 RuntimeLibrary 不匹配的解决办法
- Java解决URL中文乱码问题的文章
- JNI学习积累之一 ---- 常用函数大全
- 为 GridLayout 的 RecyclerView 设置 item 间距,实现所有 Item 靠边对齐,中间留白的效果