Kafka源码分析（5）

来源：互联网发布：c程序编写软件编辑：程序博客网时间：2024/05/22 04:58

五 LogSubsystem

1、Log

Kafka通过Log文件将消息物化到磁盘中，Log只能在尾部追加而不能修改，它由一系列的LogSegments组成，每个LogSegment都有一个表示该段第一条消息位置的base offset。当往最后一个LogSegment中追加数据超过一定的时间或大小（此参数可配置）后，会生成一个新的LogSegment。

我们来看一下代码：

@threadsafeclass Log(val dir: File,          @volatile var config: LogConfig,          @volatile var recoveryPoint: Long = 0L,          scheduler: Scheduler,          time: Time = SystemTime) extends Logging with KafkaMetricsGroup {  import kafka.log.Log._  /* A lock that guards all modifications to the log */  private val lock = new Object  /* last time it was flushed */  private val lastflushedTime = new AtomicLong(time.milliseconds)  /* the actual segments of the log */  private val segments: ConcurrentNavigableMap[java.lang.Long, LogSegment] = new ConcurrentSkipListMap[java.lang.Long, LogSegment]  loadSegments()    /* Calculate the offset of the next message */  @volatile var nextOffsetMetadata = new LogOffsetMetadata(activeSegment.nextOffset(), activeSegment.baseOffset, activeSegment.size.toInt)  val topicAndPartition: TopicAndPartition = Log.parseTopicPartitionName(name)……………………………………  /** The name of this log */  def name  = dir.getName()……………………………………  /**   * The size of the log in bytes   */  def size: Long = logSegments.map(_.size).sum   /**   * The earliest message offset in the log   */  def logStartOffset: Long = logSegments.head.baseOffset  /**   * The offset metadata of the next message that will be appended to the log   */  def logEndOffsetMetadata: LogOffsetMetadata = nextOffsetMetadata  /**   *  The offset of the next message that will be appended to the log   */  def logEndOffset: Long = nextOffsetMetadata.messageOffset……………………………………}

这里省略了从磁盘加载该对象、读、追加、删除、刷回磁盘等一系列操作的代码，只列出了几个关键的字段和方法，可以看到该对象初始化时会生成segments结构，并从本地磁盘文件加载现有的segment。之后通过offset对数据进行管理，其中logEndOffset即之前我们说过的LEO，和追加数据操作有直接关系。

2、LogSegment

Segment是组成Log的单元，是存储在指定目录中的一系列文件。每个segment都有一个base_offset值用于标识起始的offset。每个offset分成两部分存储：一个log，此处应该理解为Log的实际数据，文件名为[base_offset].log；一个index，用于将逻辑offset映射到物理文件的指定位置，文件名为[base_offset].index。我们来看此类的一部分方法，这些方法的头部注释已经清楚地说明LogSegment是怎样追加数据、管理index的：

  /**   * Append the given messages starting with the given offset. Add   * an entry to the index if needed.   *    * It is assumed this method is being called from within a lock.   *    * @param offset The first offset in the message set.   * @param messages The messages to append.   */  @nonthreadsafe  def append(offset: Long, messages: ByteBufferMessageSet) {    if (messages.sizeInBytes > 0) {      trace("Inserting %d bytes at offset %d at position %d".format(messages.sizeInBytes, offset, log.sizeInBytes()))      // append an entry to the index (if needed)      if(bytesSinceLastIndexEntry > indexIntervalBytes) {        index.append(offset, log.sizeInBytes())        this.bytesSinceLastIndexEntry = 0      }      // append the messages      log.append(messages)      this.bytesSinceLastIndexEntry += messages.sizeInBytes    }  }    /**   * Find the physical file position for the first message with offset >= the requested offset.   *    * The lowerBound argument is an optimization that can be used if we already know a valid starting position   * in the file higher than the greatest-lower-bound from the index.   *    * @param offset The offset we want to translate   * @param startingFilePosition A lower bound on the file position from which to begin the search. This is purely an optimization and   * when omitted, the search will begin at the position in the offset index.   *    * @return The position in the log storing the message with the least offset >= the requested offset or null if no message meets this criteria.   */  @threadsafe  private[log] def translateOffset(offset: Long, startingFilePosition: Int = 0): OffsetPosition = {    val mapping = index.lookup(offset)    log.searchFor(offset, max(mapping.position, startingFilePosition))  }  /**   * Read a message set from this segment beginning with the first offset >= startOffset. The message set will include   * no more than maxSize bytes and will end before maxOffset if a maxOffset is specified.   *    * @param startOffset A lower bound on the first offset to include in the message set we read   * @param maxSize The maximum number of bytes to include in the message set we read   * @param maxOffset An optional maximum offset for the message set we read   *    * @return The fetched data and the offset metadata of the first message whose offset is >= startOffset,   *         or null if the startOffset is larger than the largest offset in this log   */  @threadsafe  def read(startOffset: Long, maxOffset: Option[Long], maxSize: Int): FetchDataInfo = {    if(maxSize < 0)      throw new IllegalArgumentException("Invalid max size for log read (%d)".format(maxSize))    val logSize = log.sizeInBytes // this may change, need to save a consistent copy    val startPosition = translateOffset(startOffset)    // if the start position is already off the end of the log, return null    if(startPosition == null)      return null    val offsetMetadata = new LogOffsetMetadata(startOffset, this.baseOffset, startPosition.position)    // if the size is zero, still return a log segment but with zero size    if(maxSize == 0)      return FetchDataInfo(offsetMetadata, MessageSet.Empty)    // calculate the length of the message set to read based on whether or not they gave us a maxOffset    val length =       maxOffset match {        case None =>          // no max offset, just use the max size they gave unmolested          maxSize        case Some(offset) => {          // there is a max offset, translate it to a file position and use that to calculate the max read size          if(offset < startOffset)            throw new IllegalArgumentException("Attempt to read with a maximum offset (%d) less than the start offset (%d).".format(offset, startOffset))          val mapping = translateOffset(offset, startPosition.position)          val endPosition =             if(mapping == null)              logSize // the max offset is off the end of the log, use the end of the file            else              mapping.position          min(endPosition - startPosition.position, maxSize)         }      }    FetchDataInfo(offsetMetadata, log.read(startPosition.position, length))  }

3、LogManager

该类负责Log类对象的创建、获取、删除，而对log数据的读写操作由Log对象本身负责。LogManager管理本地磁盘多个目录中的logs，log的每个segment就是该目录下的一个文件，如segment数量超过限制，最老的segment会被后台线程清除。我们来看一下代码：

@threadsafeclass LogManager(val logDirs: Array[File],                 val topicConfigs: Map[String, LogConfig],                 val defaultConfig: LogConfig,                 val cleanerConfig: CleanerConfig,                 ioThreads: Int,                 val flushCheckMs: Long,                 val flushCheckpointMs: Long,                 val retentionCheckMs: Long,                 scheduler: Scheduler,                 val brokerState: BrokerState,                 private val time: Time) extends Logging {  val RecoveryPointCheckpointFile = "recovery-point-offset-checkpoint"  val LockFile = ".lock"  val InitialTaskDelayMs = 30*1000  private val logCreationOrDeletionLock = new Object  private val logs = new Pool[TopicAndPartition, Log]()  createAndValidateLogDirs(logDirs)  private val dirLocks = lockLogDirs(logDirs)  private val recoveryPointCheckpoints = logDirs.map(dir => (dir, new OffsetCheckpoint(new File(dir, RecoveryPointCheckpointFile)))).toMap  loadLogs()………………………………  /**   * Get the log if it exists, otherwise return None   */  def getLog(topicAndPartition: TopicAndPartition): Option[Log] = {    val log = logs.get(topicAndPartition)    if (log == null)      None    else      Some(log)  }  /**   * Create a log for the given topic and the given partition   * If the log already exists, just return a copy of the existing log   */  def createLog(topicAndPartition: TopicAndPartition, config: LogConfig): Log = {    logCreationOrDeletionLock synchronized {      var log = logs.get(topicAndPartition)            // check if the log has already been created in another thread      if(log != null)        return log            // if not, create it      val dataDir = nextLogDir()      val dir = new File(dataDir, topicAndPartition.topic + "-" + topicAndPartition.partition)      dir.mkdirs()      log = new Log(dir,                     config,                    recoveryPoint = 0L,                    scheduler,                    time)      logs.put(topicAndPartition, log)      info("Created log for partition [%s,%d] in %s with properties {%s}."           .format(topicAndPartition.topic,                    topicAndPartition.partition,                    dataDir.getAbsolutePath,                   {import JavaConversions._; config.toProps.mkString(", ")}))      log    }  }  /**   *  Delete a log.   */  def deleteLog(topicAndPartition: TopicAndPartition) {    var removedLog: Log = null    logCreationOrDeletionLock synchronized {      removedLog = logs.remove(topicAndPartition)    }    if (removedLog != null) {      //We need to wait until there is no more cleaning task on the log to be deleted before actually deleting it.      if (cleaner != null) {        cleaner.abortCleaning(topicAndPartition)        cleaner.updateCheckpoints(removedLog.dir.getParentFile)      }      removedLog.delete()      info("Deleted log for partition [%s,%d] in %s."           .format(topicAndPartition.topic,                   topicAndPartition.partition,                   removedLog.dir.getAbsolutePath))    }  }………………………………}

0 0