第11课:Spark Streaming源码解读之Driver中的ReceiverTracker架构设计以及具体实现彻底研究

来源:互联网 发布:淘宝能不能换身份证 编辑:程序博客网 时间:2024/05/17 15:04

本期内容:

1.ReceiverTracker的架构设计

2.消息循环系统

3ReceiverTracker具体实现

 

启动Receiver的方式:

1.把每个Receiver都封装成为task,这个task是这个job中唯一的task,实质上讲ReceiverTracker启动Receiver的方式就是封装成一个一个的job,有多少个job就会启动多少Receiver。每个task就一条数据,就是Receiver的数据。

2.ReceiverTracker在启动Receiver的时候有一个ReceiverSupervisor, ReceiverSupervisorImp做为ReceiverSupervisor的实现,ReceiverSupervisor在启动的时候会启动Receiver,然后Receiver不断的接收数据,会通过blockGenerate把自己接收的数据变成一个一个的block,背后自己有个定时器,这个定时器会不断的存储数据。一种是直接通过blockGenerate存储,一种是先写日志WALReceiverSupervisorImpl会把存储的元数据汇报给ReceiverTracker(实际上是ReceiverTracker中的RPC通信消息实体)。后面进行下一步的数据管理工作。

 

数据的大小一般从多少记录考虑,例如10亿级别的

源码

ReceivedBlockHandler

//写数据的时候是通过ReceivedBlockHandler
private val receivedBlockHandler: ReceivedBlockHandler = {
 
if (WriteAheadLogUtils.enableReceiverLog(env.conf)) {
   
if (checkpointDirOption.isEmpty) {
     
throw new SparkException(
       
"Cannot enable receiver write-ahead log without checkpoint directory set. "+
         
"Please use streamingContext.checkpoint() to set the checkpoint directory. "+
         
"See documentation for more details.")
   
}
    new WriteAheadLogBasedBlockHandler(env.blockManager, receiver.streamId,
     
receiver.storageLevel, env.conf, hadoopConf, checkpointDirOption.get)
  } else {
   
new BlockManagerBasedBlockHandler(env.blockManager, receiver.storageLevel)
 
}
}

pushAndReportBlock

//存储数据且把数据汇报给Driver
def pushAndReportBlock(
   
receivedBlock: ReceivedBlock,
    metadataOption: Option[Any],
    blockIdOption: Option[StreamBlockId]
  ) {
  val blockId = blockIdOption.getOrElse(nextBlockId)
 
val time = System.currentTimeMillis
 
val blockStoreResult =receivedBlockHandler.storeBlock(blockId, receivedBlock)
 
logDebug(s"Pushed block $blockId in${(System.currentTimeMillis- time)} ms")
 
val numRecords = blockStoreResult.numRecords
 
val blockInfo =ReceivedBlockInfo(streamId, numRecords, metadataOption, blockStoreResult)
 
trackerEndpoint.askWithRetry[Boolean](AddBlock(blockInfo))
 
logDebug(s"Reported block $blockId")
}

private[streaming]case class ReceivedBlockInfo(
   
streamId: Int,
    numRecords: Option[Long],
    metadataOption: Option[Any],
    blockStoreResult: ReceivedBlockStoreResult
  ) {

 

/** Remote RpcEndpointRef for the ReceiverTracker */
private val trackerEndpoint= RpcUtils.makeDriverRef("ReceiverTracker", env.conf, env.rpcEnv)

 

 

ReceiverTracker是整个block管理的中心

//RPC消息循环体接收来自receiver的消息
private class ReceiverTrackerEndpoint(override valrpcEnv: RpcEnv) extendsThreadSafeRpcEndpoint {

 

//sealed说明所有的消息都在这里
private[streaming]sealed trait ReceiverTrackerMessage
private[streaming]case class RegisterReceiver(
   
streamId: Int,
    typ: String,
   
host: String,
   
executorId: String,
   
receiverEndpoint: RpcEndpointRef
  ) extends ReceiverTrackerMessage
private[streaming]case class AddBlock(receivedBlockInfo: ReceivedBlockInfo)
 
extends ReceiverTrackerMessage
private[streaming]case class ReportError(streamId: Int, message:String, error:String)
private[streaming]case class DeregisterReceiver(streamId: Int, msg:String, error:String)
 
extends ReceiverTrackerMessage

receiveAndReply

override defreceiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
 
// Remote messages
 
case RegisterReceiver(streamId, typ, host, executorId, receiverEndpoint) =>
   
val successful =
     
registerReceiver(streamId, typ, host, executorId, receiverEndpoint, context.senderAddress)
    context.reply(successful)
  case AddBlock(receivedBlockInfo) =>
   
if (WriteAheadLogUtils.isBatchingEnabled(ssc.conf, isDriver =true)) {
     
walBatchingThreadPool.execute(newRunnable {
       
override def run(): Unit = Utils.tryLogNonFatalError{
         
if (active) {
           
context.reply(addBlock(receivedBlockInfo))
          } else {
           
throw new IllegalStateException("ReceiverTracker RpcEndpoint shut down.")
         
}
        }
      })
    } else {
     
context.reply(addBlock(receivedBlockInfo))
    }
  case DeregisterReceiver(streamId, message, error) =>
   
deregisterReceiver(streamId, message, error)
    context.reply(true)
 
// Local messages
 
case AllReceiverIds =>
   
context.reply(receiverTrackingInfos.filter(_._2.state != ReceiverState.INACTIVE).keys.toSeq)
 
case StopAllReceivers =>
   
assert(isTrackerStopping || isTrackerStopped)
    stopReceivers()
    context.reply(true)
}

addBlock

/** Add new blocks for the given stream */
private def addBlock(receivedBlockInfo: ReceivedBlockInfo): Boolean = {
 
receivedBlockTracker.addBlock(receivedBlockInfo)
}

 

/** Add received block. This event will get written to the write ahead log (if enabled). */
def addBlock(receivedBlockInfo: ReceivedBlockInfo): Boolean = {
 
try {
   
val writeResult =writeToLog(BlockAdditionEvent(receivedBlockInfo))
   
if (writeResult) {
     
synchronized {
        getReceivedBlockQueue(receivedBlockInfo.streamId) += receivedBlockInfo
      }
      logDebug(s"Stream ${receivedBlockInfo.streamId} received "+
       
s"block ${receivedBlockInfo.blockStoreResult.blockId}")
   
} else {
     
logDebug(s"Failed to acknowledge stream${receivedBlockInfo.streamId} receiving "+
       
s"block ${receivedBlockInfo.blockStoreResult.blockId} in the Write Ahead Log.")
   
}
    writeResult
  } catch {
   
case NonFatal(e) =>
     
logError(s"Error adding block $receivedBlockInfo", e)
     
false
 
}
}

/** Write an update to the tracker to the write ahead log */
private def writeToLog(record: ReceivedBlockTrackerLogEvent): Boolean = {
 
if (isWriteAheadLogEnabled) {
   
logTrace(s"Writing record: $record")
   
try {
     
writeAheadLogOption.get.write(ByteBuffer.wrap(Utils.serialize(record)),
       
clock.getTimeMillis())
      true
   
} catch{
     
case NonFatal(e) =>
       
logWarning(s"Exception thrown while writing record:$record to the WriteAheadLog.", e)
       
false
   
}
 
} else {
   
true
 
}
}

/** Get the queue of received blocks belonging to a particular stream */
private def getReceivedBlockQueue(streamId: Int):ReceivedBlockQueue = {
 
streamIdToUnallocatedBlockQueues.getOrElseUpdate(streamId,new ReceivedBlockQueue)
}

//没有被分配,只是做记录
 
private val streamIdToUnallocatedBlockQueues= newmutable.HashMap[Int, ReceivedBlockQueue]
 

/** Allocate all unallocated blocks to the given batch. */
def allocateBlocksToBatch(batchTime: Time): Unit = {
 
if (receiverInputStreams.nonEmpty) {
   
receivedBlockTracker.allocateBlocksToBatch(batchTime)
 
}
}

def allocateBlocksToBatch(batchTime: Time): Unit = synchronized {
 
if (lastAllocatedBatchTime== null|| batchTime > lastAllocatedBatchTime) {
   
val streamIdToBlocks = streamIds.map { streamId =>
       
(streamId, getReceivedBlockQueue(streamId).dequeueAll(x =>true))
   
}.toMap
    val allocatedBlocks =AllocatedBlocks(streamIdToBlocks)
   
if (writeToLog(BatchAllocationEvent(batchTime, allocatedBlocks))) {
     
timeToAllocatedBlocks.put(batchTime, allocatedBlocks)
     
lastAllocatedBatchTime = batchTime
   
} else {
     
logInfo(s"Possibly processed batch$batchTime need to be processed again in WAL recovery")
   
}
  } else {
   
// This situation occurs when:
   
// 1. WAL is ended with BatchAllocationEvent, but without BatchCleanupEvent,
    // possibly processed batch job or half-processed batch job need to be processed again,
    // so the batchTime will be equal to lastAllocatedBatchTime.
    // 2. Slow checkpointing makes recovered batch time older than WAL recovered
    // lastAllocatedBatchTime.
    // This situation will only occurs in recovery time.
   
logInfo(s"Possibly processed batch$batchTime need to be processed again in WAL recovery")
 
}
}

ReceiverSupervisorImpl.scala

 

private valendpoint = env.rpcEnv.setupEndpoint(
 
"Receiver-" +streamId +"-" + System.currentTimeMillis(),new ThreadSafeRpcEndpoint {
   
override val rpcEnv: RpcEnv = env.rpcEnv

receive

override defreceive: PartialFunction[Any, Unit] = {
 
case StopReceiver =>
   
logInfo("Received stop signal")
   
ReceiverSupervisorImpl.this.stop("Stopped by driver", None)
 
case CleanupOldBlocks(threshTime) =>
   
logDebug("Received delete old batch signal")
   
cleanupOldBlocks(threshTime)
  case UpdateRateLimit(eps) =>
   
logInfo(s"Received a new rate limit:$eps.")
   
registeredBlockGenerators.foreach { bg =>
     
bg.updateRate(eps)
    }
}

private[receiver]def updateRate(newRate: Long): Unit =
 
if (newRate >0) {
   
if (maxRateLimit> 0) {

//限制每个记录流进的速度
     
rateLimiter.setRate(newRate.min(maxRateLimit))
   
} else {
     
rateLimiter.setRate(newRate)
   
}
  }

public final voidsetRate(doublepermitsPerSecond) {
   
Preconditions.checkArgument(permitsPerSecond > 0.0D && !Double.isNaN(permitsPerSecond), "rate must be positive");
   
Object var3 = this.mutex;
   
synchronized(this.mutex) {
       
this.resync(this.readSafeMicros());
       
double stableIntervalMicros = (double)TimeUnit.SECONDS.toMicros(1L) / permitsPerSecond;
   
    this.stableIntervalMicros = stableIntervalMicros;
       
this.doSetRate(permitsPerSecond, stableIntervalMicros);
   
}
}

ReceiverSupervisorI

/** Mark the supervisor and the receiver for stopping */
def stop(message:String, error: Option[Throwable]) {
 
stoppingError = error.orNull
 
stopReceiver(message, error)
  onStop(message, error)
  futureExecutionContext.shutdownNow()
 
stopLatch.countDown()
}

 

/** Stop receiver */
def stopReceiver(message:String, error: Option[Throwable]): Unit = synchronized {
 
try {
   
logInfo("Stopping receiver with message: "+ message + ": "+ error.getOrElse(""))
   
receiverState match{
     
case Initialized=>
       
logWarning("Skip stopping receiver because it has not yet stared")
     
case Started=>
       
receiverState =Stopped
       
receiver.onStop()
       
logInfo("Called receiver onStop")
       
onReceiverStop(message, error)
      case Stopped=>
       
logWarning("Receiver has been stopped")
   
}
  } catch {
   
case NonFatal(t) =>
     
logError("Error stopping receiver "+ streamId+ t.getStackTraceString)
 
}
}

子类实现

override protected defonReceiverStop(message: String, error: Option[Throwable]) {
 
logInfo("Deregistering receiver " + streamId)
 
val errorString = error.map(Throwables.getStackTraceAsString).getOrElse("")
 
trackerEndpoint.askWithRetry[Boolean](DeregisterReceiver(streamId, message, errorString))
 
logInfo("Stopped receiver " +streamId)
}

0 0
原创粉丝点击