KafkaProducer介绍

来源：互联网发布：淘宝订单数据编辑：程序博客网时间：2024/06/02 02:30

一生产者发送消息到broker的流程

1.1 ProducerIntercptor对消息进行拦截

1.2 Serialzer对key和value进行序列化

1.3 Partitioner对消息选择合适的分区

1.4 RecordAccumulator收集消息，实现批量发送

1.5 Sender从RecordAccumulator获取消息

1.6 构造ClientRequest

1.7 将ClientRequest交给Network,准备发送

1.8 Network将请求放入KafkaChannel的缓存

1.9 发送请求

1.10 收到响应，调用ClientRequest

1.11 调用RecordBatch的回调函数，最终调用到每一个消息上注册的回调函数

在这里主要涉及到两个线程：

主线程主要负责封装消息成ProducerRecord对象，之后调用send方法将消息放入RecordAccumulator中暂存

Sender线程负责将消息构造成请求，并从RecordAccumulator取出消息消息并批量发送

二核心字段

String clientId：该生产者的唯一标示

AtomicInteger PRODUCER_CLIENT_ID_SEQUENCE: clientId生成器

Partitioner: 分区选择器，根据一定策略将消息路由到合适的分区

int maxRequestSize: 消息的最大长度

long totalMemorySize: 发送单个消息的缓冲区的大小

Metadata: 整个kafka集群的元数据

RecordAccumulator accumulator: 用于收集并缓存消息，等待sender线程获取

Sender:发送消息的sender任务

Thread ioThread: 执行sender任务发送消息的线程

CompressionType: 压缩算法，针对RecordAccumulator中多条消息进行的压缩，消息越多效果越好

Serializer<K> keySerializer: key的序列化器

Serializer<V> valueSerializer: value的序列化器

long maxBlockTimeMs: 等待更新kafka集群元数据的最大时长

int requestTimeoutMs: 消息超时时长

ProducerInterceptors<K, V> interceptors: 拦截record，可以对record进行进一步处理再发送到服务器

三重要的方法

3.1 send方法

# 调用ProducerInterceptors的onSend方法，对消息进行拦截

# 调用doSend方法，然后就调用waitOnMetadata方法获取kafka集群元数据信息，底层会唤醒Sender线程更新Metadata保存的kafka元数据

# 调用Serializer的serialize方法对消息的key和value进行序列化

# 调用partition方法为消息选择合适的分区

# 调用RecordAccumulator的append方法将消息追加到RecordAccumulator中

# 唤醒Sender线程，由Sender线程将RecordAccumulator中缓存的消息发送出去

publicFuture<RecordMetadata>send(ProducerRecord<K,V> record,Callback callback) {
    // 在发送消息之前，对消息进行拦截，然后可以对消息进行修改
    ProducerRecord<K,V> interceptedRecord= this.interceptors== null ? record : this.interceptors.onSend(record);
    return doSend(interceptedRecord,callback);
}

/** 异步发送record 到topic */private Future<RecordMetadata> doSend(ProducerRecord<K, V> record, Callback callback) {    TopicPartition tp = null;    try {        // 首先获取集群信息和加载元数据的时间        ClusterAndWaitTime clusterAndWaitTime = waitOnMetadata(record.topic(), record.partition(), maxBlockTimeMs);        // 计算剩余的等待时间，还可以用于等待添加数据到队列（总的等待时间-获取元数据信息的时间）        long remainingWaitMs = Math.max(0, maxBlockTimeMs - clusterAndWaitTime.waitedOnMetadataMs);        Cluster cluster = clusterAndWaitTime.cluster;        // 序列化key & value        byte[] serializedKey;        try {            serializedKey = keySerializer.serialize(record.topic(), record.key());        } catch (ClassCastException cce) {            throw new SerializationException("Can't convert key of class " + record.key().getClass().getName() +                    " to class " + producerConfig.getClass(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG).getName() +                    " specified in key.serializer");        }        byte[] serializedValue;        try {            serializedValue = valueSerializer.serialize(record.topic(), record.value());        } catch (ClassCastException cce) {            throw new SerializationException("Can't convert value of class " + record.value().getClass().getName() +                    " to class " + producerConfig.getClass(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG).getName() +                    " specified in value.serializer");        }        // 计算record的分区，默认是0        int partition = partition(record, serializedKey, serializedValue, cluster);        // record的总长度        // size(4)+offset(8)+crc(4)+ magic(1)+attribute(1)+timestamp(8)+key(4)+keysize+value(4)+valuesize        int serializedSize = Records.LOG_OVERHEAD + Record.recordSize(serializedKey, serializedValue);        // 但是record的总长度不能大于maxRequestSize和totalMemorySize        ensureValidRecordSize(serializedSize);        tp = new TopicPartition(record.topic(), partition);        long timestamp = record.timestamp() == null ? time.milliseconds() : record.timestamp();        log.trace("Sending record {} with callback {} to topic {} partition {}", record, callback, record.topic(), partition);        Callback interceptCallback = this.interceptors == null ? callback : new InterceptorCallback<>(callback, this.interceptors, tp);        // 将record放入RecordAccumulator队列（相当于消息缓冲区），供Sender线程去读取数据，然后发给broker        RecordAccumulator.RecordAppendResult result = accumulator.append(tp, timestamp, serializedKey, serializedValue, interceptCallback, remainingWaitMs);        // 如果满了或者是新创建的，必须满上唤醒sender线程        if (result.batchIsFull || result.newBatchCreated) {            log.trace("Waking up the sender since topic {} partition {} is either full or getting a new batch", record.topic(), partition);            this.sender.wakeup();        }        // 返回结果        return result.future;        // handling exceptions and record the errors;        // for API exceptions return them in the future,        // for other exceptions throw directly    } catch (ApiException e) {        log.debug("Exception occurred during message send:", e);        if (callback != null)            callback.onCompletion(null, e);        this.errors.record();        if (this.interceptors != null)            this.interceptors.onSendError(record, tp, e);        return new FutureFailure(e);    } catch (InterruptedException e) {        this.errors.record();        if (this.interceptors != null)            this.interceptors.onSendError(record, tp, e);        throw new InterruptException(e);    } catch (BufferExhaustedException e) {        this.errors.record();        this.metrics.sensor("buffer-exhausted-records").record();        if (this.interceptors != null)            this.interceptors.onSendError(record, tp, e);        throw e;    } catch (KafkaException e) {        this.errors.record();        if (this.interceptors != null)            this.interceptors.onSendError(record, tp, e);        throw e;    } catch (Exception e) {        // we notify interceptor about all exceptions, since onSend is called before anything else in this method        if (this.interceptors != null)            this.interceptors.onSendError(record, tp, e);        throw e;    }}

private ClusterAndWaitTime waitOnMetadata(String topic, Integer partition, long maxWaitMs) throws InterruptedException {    // add topic to metadata topic list if it is not there already and reset expiry    // 如果元数据不存在这个topic，则添加到元数据的topic集合中    metadata.add(topic);    // 根据元数据获取集群信息    Cluster cluster = metadata.fetch();    // 获取指定topic的partition数量    Integer partitionsCount = cluster.partitionCountForTopic(topic);    // Return cached metadata if we have it, and if the record's partition is either undefined    // or within the known partition range    // 如果partition数量不为空，直接返回    if (partitionsCount != null && (partition == null || partition < partitionsCount))        return new ClusterAndWaitTime(cluster, 0);    long begin = time.milliseconds();    // 最大的等待时间    long remainingWaitMs = maxWaitMs;    long elapsed;    // Issue metadata requests until we have metadata for the topic or maxWaitTimeMs is exceeded.    // In case we already have cached metadata for the topic, but the requested partition is greater    // than expected, issue an update request only once. This is necessary in case the metadata    // is stale and the number of partitions for this topic has increased in the meantime.    do {        log.trace("Requesting metadata update for topic {}.", topic);        // 请求更新当前的集群元数据信息，在更新之前返回当前版本        int version = metadata.requestUpdate();        // 唤醒sender线程        sender.wakeup();        try {            // 等待元数据更新，直到当前版本大于我们所知道的最新版本            metadata.awaitUpdate(version, remainingWaitMs);        } catch (TimeoutException ex) {            // Rethrow with original maxWaitMs to prevent logging exception with remainingWaitMs            throw new TimeoutException("Failed to update metadata after " + maxWaitMs + " ms.");        }        // metadata更新完了在获取一次集群信息        cluster = metadata.fetch();        elapsed = time.milliseconds() - begin;        // 如果时间超过最大等待时间，抛出更新元数据失败异常        if (elapsed >= maxWaitMs)            throw new TimeoutException("Failed to update metadata after " + maxWaitMs + " ms.");        // 如果集群未授权topics包含这个topic，也会抛出异常        if (cluster.unauthorizedTopics().contains(topic))            throw new TopicAuthorizationException(topic);        remainingWaitMs = maxWaitMs - elapsed;        // 在此获取该topic的partition数量        partitionsCount = cluster.partitionCountForTopic(topic);    } while (partitionsCount == null);// 直到topic的partition数量不为空    if (partition != null && partition >= partitionsCount) {        throw new KafkaException(                String.format("Invalid partition given with record: %d is not in the range [0...%d).", partition, partitionsCount));    }    // 返回ClusterAndWaitTime    return new ClusterAndWaitTime(cluster, elapsed);}

阅读全文

0 0