文章标题

来源：互联网发布：2017网络与新媒体专业编辑：程序博客网时间：2024/06/02 02:50

public class KafkaConsumer implements Runnable {

private static final Logger LOGGER = LoggerFactory.getLogger(KafkaConsumer.class);/** * Kafka数据消费对象 */private ConsumerConnector consumer ;/** * Kafka Topic */private String topic ;/** * 线程数量，一般就是Topic的分区数量 */private int numThreads ;/** * 线程池 */private ExecutorService executorPool ;/** * 构造函数 * @param topic      Kafka消息Topic主题 * @param numThreads 处理数据的线程数/可以理解为Topic的分区数 * @param zookeeper  Kafka的Zookeeper连接字符串 * @param groupId    该消费者所属group ID的值 */public KafkaConsumer(String topic,int numThreads,String zookeeper,String groupId){    // 1. 创建Kafka连接器    this.consumer = Consumer.createJavaConsumerConnector(createConsumerConfig(zookeeper,groupId));    // 2. 数据赋值    this.topic = topic ;    this.numThreads = numThreads ;}public void run() {    // 1. 指定Topic    Map<String, Integer> topicCountMap = new HashMap<String, Integer>();    topicCountMap.put(this.topic, this.numThreads);    // 2. 指定数据的解码器    StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());    StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());    // 3. 获取连接数据的迭代器对象集合    /**     * Key: Topic主题     * Value: 对应Topic的数据流读取器，大小是topicCountMap中指定的topic大小     */    Map<String, List<KafkaStream<String, String>>> consumerMap = this.consumer.createMessageStreams(topicCountMap, keyDecoder, valueDecoder);    // 4. 从返回结果中获取对应topic的数据流处理器    List<KafkaStream<String, String>> streams = consumerMap.get(this.topic);    // 5. 创建线程池    this.executorPool = Executors.newFixedThreadPool(this.numThreads);    // 6. 构建数据输出对象    int threadNumber = 0;    for (final KafkaStream<String, String> stream : streams) {        this.executorPool.submit(new ConsumerKafkaStreamProcesser(stream, threadNumber));        threadNumber++;    }}public void  shutDown(){    // 1. 关闭和Kafka的连接，这样会导致stream.hashNext返回false    if (this.consumer != null) {        this.consumer.shutdown();    }    // 2. 关闭线程池，会等待线程的执行完成    if (this.executorPool != null) {        // 2.1 关闭线程池        this.executorPool.shutdown();        // 2.2. 等待关闭完成, 等待五秒        try {            if (!this.executorPool.awaitTermination(5, TimeUnit.SECONDS)) {                System.out.println("Timed out waiting for consumer threads to shut down, exiting uncleanly!!");            }        } catch (InterruptedException e) {            System.out.println("Interrupted during shutdown, exiting uncleanly!!");        }    }}/** * 根据传入的zk的连接信息和groupID的值创建对应的ConsumerConfig对象 *  zk的连接信息，类似于：<br/>  hadoop-senior01.ibeifeng.com:2181,hadoop-senior02.ibeifeng.com:2181/kafka *   该kafka consumer所属的group id的值， group id值一样的kafka consumer会进行负载均衡 * @return Kafka连接信息 */private ConsumerConfig createConsumerConfig(String zookeeper ,String groupId){    // 1.构建属性对象    Properties prop = new Properties();    // 2.添加相关属性    prop.put("group.id", groupId); // 指定分组id    prop.put("zookeeper.connect", zookeeper); // 指定zk的连接url    prop.put("zookeeper.session.timeout.ms", "400"); //    prop.put("zookeeper.sync.time.ms", "200");    prop.put("auto.commit.interval.ms", "1000"); /*   prop.put("group.id",ConsumerPropertiesFactory.groupId); //指定分组ID    prop.put("zookeeper.connect",ConsumerPropertiesFactory.zookeeperConnect);//指定zk的连接url    prop.put("metadata.broker.list",ConsumerPropertiesFactory.metadataBrokerList);    prop.put("auto.offset.reset", ConsumerPropertiesFactory.autoOffsetReset);    prop.put("zookeeper.session.timeout.ms", "400"); //    prop.put("zookeeper.sync.time.ms", "200");    prop.put("auto.commit.interval.ms", "1000");*/    // 3.构建ConsumerConfig    return new ConsumerConfig(prop);}

}
上面就是线程的方式实现从kafka拉取数据。实现的关键点 kafka的分区数一般是对应的线程数。
kafka生产者底层实现了控制在一定数据大小或者时间超时就会切换partition生产数据，每个partition只能同时被一个线程消费。
public class ConsumerKafkaStreamProcesser implements Runnable {

private static final Logger LOGGER = LoggerFactory.getLogger(ConsumerKafkaStreamProcesser.class);// Kafka数据流private KafkaStream<String,String> stream ;//线程ID编号private int threadNumber ;/** * 构造函数 */public ConsumerKafkaStreamProcesser(KafkaStream<String,String> stream,int threadNumber){    this.stream = stream ;    this.threadNumber = threadNumber ;}public void run() {    // 1. 获取数据迭代器    ConsumerIterator<String, String> iter = this.stream.iterator();    // 2. 迭代输出数据    while (iter.hasNext()) {        // 2.1 获取数据值        MessageAndMetadata value = iter.next();        // 2.2 输出        LOGGER.info(this.threadNumber + ":" + ":" + value.offset() + value.key() + ":" + value.message());        System.out.println(this.threadNumber + ":" + ":" + value.offset() + value.key() + ":" + value.message() + "partition ..." + value.partition());    }    // 3. 表示当前线程执行完成    LOGGER.info("Shutdown Thread:" + this.threadNumber);    System.out.println("Shutdown Thread:" + this.threadNumber);}

}

这里是topic的输出处理，也是写在了线程中打印的log中有详细的解释

public class App
{
public static void main( String[] args )
{
String zookeeper = “192.168.18.128:2181” ;
String groupId = “group1”;
String topic1 = “testTopic4”;
String topic2 = “testTopic3”;
int threads = 10 ;

    KafkaConsumer instance = new KafkaConsumer(topic1,threads,zookeeper,groupId);    KafkaConsumer instance1 = new KafkaConsumer(topic2,threads,zookeeper,groupId);        new Thread(instance).start();        new Thread(instance1).start();   // int sleepMillis = 30000 ;   /* try {        Thread.sleep(sleepMillis);    } catch (InterruptedException e) {        e.printStackTrace();    }*/}

}
最后这里是测试类，简单提一下。实际生产中，kafka的连接配置都是写在resources中的。
拿到数据后对接数据库做保存处理的连接数据库配置也是写在resource中。静态加载速度较快。
如果没有必要可以不使用多线程的方式，切换线程也是要费效率的

阅读全文

0 0