kafka进击之路(四) ——consumer simple API开发

来源:互联网 发布:中信总行程序员待遇 编辑:程序博客网 时间:2024/06/06 01:34

说明

kafka版本:kafka_2.10-0.8.2.1(kafka0.9.xx版本提供了新的API)
IED环境:intellij14 + maven3.3
语言:java

consumer低级别API开发

低级别API适用场景

低级别API和高级别最大的不同就是你可以自己控制一个topic的不同partition的消费和offset。适用于:1)你想多次读一条消息,2)你只想消费一个topic的部分partition,3)你想对partition的offset有更加严格的控制等。
当然,更多的控制同时带来了更多的工作,比如1)你必须在程序中自己控制offset,2)你必须自己控制并处理不同partition的kafka broker的leader问题等。

程序示例

本程序实现一个低级别API的kafka consumer,实现对offset保持至本地文件,下次启动时,自己从offset文件读取offset位置。

maven依赖

      <dependency>            <groupId>org.apache.kafka</groupId>            <artifactId>kafka_2.10</artifactId>            <version>0.8.2.1</version>      </dependency>

程序包组织

这里写图片描述

配置文件consumer.properties

brokerList=xxxx,xxxx,xxxx,xxxxport=9092topic=myTopicpartitionNum=8#offset file pathcheckpoint=./checkpoint#once subscribe sizepatchSize=10#latest or earliestsubscribeStartPoint=earliest

KafkaConfig.java代码

package kafka.simple;import java.util.List;public class KafkaConfig {    public String topic = null;                 // topic    public int partitionNum = 0;                // partition个数    public int port = 0;                        // kafka broker端口号    public List<String> replicaBrokers = null;  // kafka broker ip列表    public String checkpoint;                   // checkpoint目录,即保存partition offset的目录    public int patchSize = 10;                  // 一次读取partition最大消息个数    public String subscribeStartPoint = null;   // 默认开始订阅点,latest or earliest(最近或者最早)    public KafkaConfig() { }    @Override    public String toString() {        return "[brokers:" + replicaBrokers.toString()                + "] [port:" + port                + "] [topic:" + topic                + "] [partition num:" + partitionNum                + "] [patch size:" + patchSize                + "] [start point:" + subscribeStartPoint                + "]";    }}

KafkaUtil.java代码

package kafka.simple;import kafka.api.PartitionOffsetRequestInfo;import kafka.common.TopicAndPartition;import kafka.javaapi.OffsetResponse;import kafka.javaapi.PartitionMetadata;import kafka.javaapi.TopicMetadata;import kafka.javaapi.TopicMetadataRequest;import kafka.javaapi.consumer.SimpleConsumer;import org.apache.log4j.LogManager;import org.apache.log4j.Logger;import java.util.Collections;import java.util.HashMap;import java.util.List;import java.util.Map;public class KafkaUtil {    private static final Logger logger = LogManager.getLogger(KafkaUtil.class);    /**     * 找一个broker leader     * @param seedBrokers 配置的broker列表     * @param port broker端口     * @param topic     * @param partition     * @return     */    public static PartitionMetadata findLeader(List<String> seedBrokers, int port, String topic, int partition) {        PartitionMetadata returnMeataData = null;        logger.info("find leader begin. brokers:[" + seedBrokers.toString() + "]");        loop:        for (String seed : seedBrokers) {            SimpleConsumer consumer = null;            try {                consumer = new SimpleConsumer(seed, port, 100000, 64 * 1024, "leaderLookup");                List<String> topics = Collections.singletonList(topic);                TopicMetadataRequest req = new TopicMetadataRequest(topics);                kafka.javaapi.TopicMetadataResponse res = consumer.send(req);                List<TopicMetadata> metadatas = res.topicsMetadata();                for (TopicMetadata item : metadatas) {                    for (PartitionMetadata part : item.partitionsMetadata()) {                        if (part.partitionId() == partition) {                            returnMeataData = part;                            break loop;                        }                    }                }            } catch (Exception e) {                logger.error("error communicating with broker [" + seed + "] to find Leader for [" + topic                        + ", " + partition + "] Reason: " + e);            } finally {                if (consumer != null) {                    consumer.close();                }            }        }        if (returnMeataData != null) {            // TODO            // seedBrokers.clear();            // for (kafka.cluster.Broker seed : returnMeataData.replicas()) {            //    seedBrokers.add(seed.host());            // }        }        return returnMeataData;    }    /**     * 找一个新的kafka broker leader     * @param oldLeader     * @param seedBrokers     * @param port     * @param topic     * @param partition     * @return     * @throws Exception     */    public static String findNewLeader(String oldLeader,                                       List<String> seedBrokers, int port,                                       String topic, int partition) throws Exception {        for (int i = 0; i < 3; ++i) {            boolean sleep = false;            PartitionMetadata metadata = findLeader(seedBrokers, port, topic, partition);            if (metadata == null) {                sleep = true;            } else if (metadata.leader() == null) {                sleep = true;            } else if (oldLeader.equalsIgnoreCase(metadata.leader().host()) && i == 0) {                sleep = true;            } else {                return metadata.leader().host();            }            if (sleep) {                try {                    Thread.sleep(1000);                } catch (InterruptedException e) {                    // do nothing                }            }        }        logger.warn("Unable to find new leader after Broker failure. Exiting");        throw new Exception("Unable to find new leader after Broker failure. Exiting");    }    /**     * 获取指定topic,指定partition的offset     * @param consumer     * @param topic     * @param partition     * @param whichTime     * @param clientId     * @return     */    public static long getSpecificOffset(SimpleConsumer consumer,                                         String topic, int partition,                                         long whichTime, String clientId) {        TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partition);        Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<>();        requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(whichTime, 1));        kafka.javaapi.OffsetRequest request =                new kafka.javaapi.OffsetRequest(requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientId);        OffsetResponse response = consumer.getOffsetsBefore(request);        if (response.hasError()) {            logger.warn("Error fetching data Offset Data the Broker. Reason: " + response.errorCode(topic, partition));            return -1;        }        long[] offsets = response.offsets(topic, partition);        return offsets[0];    }}

PartitionMsgTask.java代码

package kafka.simple;import kafka.javaapi.consumer.SimpleConsumer;import org.apache.log4j.LogManager;import org.apache.log4j.Logger;import kafka.api.FetchRequest;import kafka.api.FetchRequestBuilder;import kafka.common.ErrorMapping;import kafka.javaapi.FetchResponse;import kafka.javaapi.PartitionMetadata;import kafka.message.MessageAndOffset;import java.io.BufferedReader;import java.io.File;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.FileWriter;import java.io.IOException;import java.nio.ByteBuffer;import java.util.ArrayList;import java.util.List;public class PartitionMsgTask implements Runnable {    private static final Logger logger = LogManager.getLogger(PartitionMsgTask.class);    private KafkaConfig kafkaConfig = null;    private String filePath = null;    private int partitionIndex = 0;    private SimpleConsumer consumer = null;    private String leadBroker = null;    private long readOffset = 0;    private String clientName = null;    public PartitionMsgTask(KafkaConfig config, int index) {        partitionIndex = index;        this.kafkaConfig = config;        filePath = kafkaConfig.checkpoint + "/partition" + partitionIndex;    }    public void shutdown() {        if (consumer != null) {            consumer.close();        }    }    public void run() {        logger.info("partition:" + partitionIndex + " config:" + kafkaConfig.toString());        PartitionMetadata metadata = KafkaUtil                .findLeader(kafkaConfig.replicaBrokers, kafkaConfig.port, kafkaConfig.topic, partitionIndex);        if (metadata == null) {            logger.error("Can't find metadata for Topic:" + kafkaConfig.topic                    + " and Partition:" + partitionIndex + ". Exiting");            return;        }        if (metadata.leader() == null) {            logger.error("Can't find Leader for Topic:" + kafkaConfig.topic                    + " and Partition:" + partitionIndex + ". Exiting");            return;        }        leadBroker = metadata.leader().host();        clientName = "Client_" + kafkaConfig.topic + "_" + partitionIndex;        logger.info("leadBroker:" + leadBroker + " client:" + clientName);        consumer = new SimpleConsumer(leadBroker, kafkaConfig.port, 100000, 64 * 1024, clientName);        // first time to get offset        readOffset = getOffset();        logger.info("first time get offset :" + readOffset);        if (readOffset == -1) {            logger.error("get offset failed");            return;        }        logger.info("partition" + partitionIndex + " thread run success.");        while (true) {            try {                int ret;                List<String> messageList = new ArrayList<String>();                // subscribe message                long offset = subsribe(kafkaConfig.patchSize, messageList);                if (offset < 0) {                    logger.error("subscribe message failed. will continue");                    continue;                }                // todo process messageList                // todo 如果处理失败,可重试或者继续,自己选择是否保存offset                // save offset                ret = saveOffset(offset);                if (ret != 0) {                    if (saveOffset(offset) != 0) {                        continue;                    }                }                readOffset = offset;            } catch (Exception e) {                logger.error("exception :" + e.getMessage());            }        }    }    /**     * 获取offset     * @return     */    public long getOffset() {        long offset = -1;        // get from file        String offsetFile = filePath + "/" + "offset";        BufferedReader reader = null;        try {            File file = new File(offsetFile);            reader = new BufferedReader(new FileReader(file));            String tempStr = reader.readLine();            offset = Long.parseLong(tempStr);            reader.close();            return offset;        } catch (FileNotFoundException e) {            logger.info("offset file:" + offsetFile + " not found. will get the "                    + kafkaConfig.subscribeStartPoint + " offset.");        } catch (IOException e) {            logger.error("get offset from file exception");            return -1;        }        if (kafkaConfig.subscribeStartPoint.equals("earliest")) {            // get earliest offset            offset = KafkaUtil.getSpecificOffset(consumer,                    kafkaConfig.topic, partitionIndex,                    kafka.api.OffsetRequest.EarliestTime(), clientName);        } else if (kafkaConfig.subscribeStartPoint.equals("latest")) {            // get latest offset            offset = KafkaUtil.getSpecificOffset(consumer,                    kafkaConfig.topic, partitionIndex,                    kafka.api.OffsetRequest.LatestTime(), clientName);        } else {            logger.error("kafka config start point error");        }        return offset;    }    /**     * 保持offset     * @param offset     * @return     */    public int saveOffset(long offset) {        String offsetFile = filePath + "/" + "offset";        try {            File file = new File(offsetFile);            if (!file.exists()) {                file.createNewFile();            }            FileWriter fileWriter = new FileWriter(file);            fileWriter.write(String.valueOf(offset));            fileWriter.close();        } catch (IOException e) {            logger.error("save offset failed");            return -1;        }        return 0;    }    /**     * 订阅消息     * @param maxReads     * @param messageList     * @return     * @throws Exception     */    public long subsribe(long maxReads, List<String> messageList) throws Exception {        if (messageList == null) {            logger.warn("messageList is null");            return -1;        }        int numErrors = 0;        long offset = readOffset;        while (maxReads > 0) {            if (consumer == null) {                consumer = new SimpleConsumer(leadBroker, kafkaConfig.port, 100000, 64 * 1024, clientName);            }            FetchRequest request = new FetchRequestBuilder().clientId(clientName)                    .addFetch(kafkaConfig.topic, partitionIndex, offset, 100000).build();            FetchResponse fetchResponse = consumer.fetch(request);            if (fetchResponse.hasError()) {                logger.warn("fetch response has error");                numErrors++;                short code = fetchResponse.errorCode(kafkaConfig.topic, partitionIndex);                logger.warn("Error fetching data from the Broker:" + leadBroker + " error code: " + code);                if (numErrors > 3) {                    return -1;                }                if (code == ErrorMapping.OffsetOutOfRangeCode()) {                    // We asked for an invalid offset. For simple case ask for the last element to reset                    // offset = KafkaUtil.getLastOffset(consumer, kafkaConfig.topic, partitionIndex,                    //        kafka.api.OffsetRequest.LatestTime(), clientName);                    logger.warn("offset out of range. will get a new offset");                    offset = getOffset();                    continue;                }                consumer.close();                consumer = null;                leadBroker = KafkaUtil.findNewLeader(leadBroker,                        kafkaConfig.replicaBrokers,                        kafkaConfig.port,                        kafkaConfig.topic,                        partitionIndex);                continue;            }            numErrors = 0;            long numRead = 0;            for (MessageAndOffset messageAndOffset : fetchResponse.messageSet(kafkaConfig.topic, partitionIndex)) {                long currentOffset = messageAndOffset.offset();                if (currentOffset < offset) {                    logger.warn("Found an old offset: " + currentOffset + " Expecting: " + offset);                    continue;                }                offset = messageAndOffset.nextOffset();                ByteBuffer payload = messageAndOffset.message().payload();                byte[] bytes = new byte[payload.limit()];                payload.get(bytes);                String message = new String(bytes, "UTF-8");                messageList.add(message);                numRead++;                maxReads--;            }            if (numRead == 0) {                try {                    Thread.sleep(100);                } catch (InterruptedException e) {                    // do nothing                }            }        }        return offset;    }}

KafkaSimpleConsumer.java代码

package kafka.simple;import org.apache.log4j.LogManager;import org.apache.log4j.Logger;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.util.ArrayList;import java.util.Properties;import java.util.concurrent.ExecutorService;import java.util.concurrent.Executors;public class KafkaSimpleConsumer {    private static final Logger logger = LogManager.getLogger(KafkaSimpleConsumer.class);    private KafkaConfig kafkaConfig = null;    private ExecutorService executor = null;    private boolean inited = false;    public void run() {        if (!inited) {            logger.error("uninit, init first!");            return;        }        File file = new File(kafkaConfig.checkpoint);        if (!file.exists()) {            file.mkdir();        }        executor = Executors.newFixedThreadPool(kafkaConfig.partitionNum);        int threadNum = 0;        for (; threadNum < kafkaConfig.partitionNum; ++threadNum) {            file = new File(kafkaConfig.checkpoint +"/partition" + threadNum);            if (!file.exists() && !file.isDirectory()) {                file.mkdir();            }            logger.info("begin submit partition msg task thread");            executor.submit(new PartitionMsgTask(kafkaConfig, threadNum));        }    }    public int init(String confFile) {        Properties props = new Properties();        kafkaConfig = new KafkaConfig();        try {            FileInputStream in = new FileInputStream(confFile);            props.load(in);        } catch (FileNotFoundException e) {            logger.error("kafka config file not found. file name:" + confFile);            return -1;        } catch (IOException e) {            logger.error("properties load file failed");            return -1;        }        kafkaConfig.topic = props.getProperty("topic");        kafkaConfig.port = Integer.parseInt(props.getProperty("port"));        kafkaConfig.partitionNum = Integer.parseInt(props.getProperty("partitionNum"));        kafkaConfig.checkpoint = props.getProperty("checkpoint");        kafkaConfig.patchSize = Integer.parseInt(props.getProperty("patchSize"));        String startPoint = props.getProperty("subscribeStartPoint");        if (!startPoint.equals("latest") && !startPoint.equals("earliest")) {            logger.error("config file startPoint error. startPoint must be latest or earliest");            return -1;        }        kafkaConfig.subscribeStartPoint = startPoint;        String brokerList = props.getProperty("brokerList");        String[] brokers = brokerList.split(",");        kafkaConfig.replicaBrokers = new ArrayList<String>();        for (String str : brokers) {            kafkaConfig.replicaBrokers.add(str);        }        inited = true;        logger.info("init success. kafkaConfig:" + kafkaConfig.toString());        return 0;    }}

Main.java代码

package kafka.simple;public class Main {    public static void main(String[] args) {        KafkaSimpleConsumer consumer = new KafkaSimpleConsumer();        consumer.init("./consumer.properties");        consumer.run();    }}
0 0