Hadoop DataNode启动之offferService

来源:互联网 发布:java发送内嵌图片邮件 编辑:程序博客网 时间:2024/05/21 04:01
  offerService包含了DN主循环线程的核心功能,DN对其提供保护,如果该函数执行失败,则由DN线程sleep5秒后重新执行,在DN的线程体中,我们可以看到该逻辑。对于offerService中的部分功能,以前的文章里已经描述过,比如心跳、异步块报告,但都是单独描述,他们之间的协调可以通过offerService体现出来,例如执行先后顺序,执行间隔这里都有体现。

  public void offerService() throws Exception {         LOG.info("using BLOCKREPORT_INTERVAL of " + blockReportInterval + "msec" +        " Initial delay: " + initialBlockReportDelay + "msec");    //    // Now loop for a long time....    //    while (shouldRun) {      try {      //记录循环启动时间        long startTime = now();        //        // Every so often, send heartbeat or block-report        //        //如果时间超出了心跳间隔,则需发送心跳        if (startTime - lastHeartbeat > heartBeatInterval) {          //心跳的详细内容上篇已经讲过,这里不再赘述          lastHeartbeat = startTime;          DatanodeCommand[] cmds = namenode.sendHeartbeat(dnRegistration,                                                       data.getCapacity(),                                                       data.getDfsUsed(),                                                       data.getRemaining(),                                                       xmitsInProgress.get(),                                                       getXceiverCount());          myMetrics.addHeartBeat(now() - startTime);          //处理返回命令          if (!processCommand(cmds))            continue;        }                    // 计算自己收到的block        Block [] blockArray=null;        String [] delHintArray=null;        synchronized(receivedBlockList) {          synchronized(delHints) {            int numBlocks = receivedBlockList.size();            if (numBlocks > 0) {              if(numBlocks!=delHints.size()) {                LOG.warn("Panic: receiveBlockList and delHints are not of the same length" );              }              //              // Send newly-received blockids to namenode              //              blockArray = receivedBlockList.toArray(new Block[numBlocks]);              delHintArray = delHints.toArray(new String[numBlocks]);            }          }        }        if (blockArray != null) {          if(delHintArray == null || delHintArray.length != blockArray.length ) {            LOG.warn("Panic: block array & delHintArray are not the same" );          }          //向NN回报自己收到的块,服务端会根据参数信息修改blocks->datanode,更新节点容量等操作          namenode.blockReceived(dnRegistration, blockArray, delHintArray);          synchronized (receivedBlockList) {            synchronized (delHints) {            //向NN发送信息成功后,则更新本地信息              for(int i=0; i<blockArray.length; i++) {                receivedBlockList.remove(blockArray[i]);                delHints.remove(delHintArray[i]);              }            }          }        }        // 判断是否应该发送块报告        if (startTime - lastBlockReport > blockReportInterval) {          if (data.isAsyncBlockReportReady()) {            // 创建块报告            long brCreateStartTime = now();            Block[] bReport = data.retrieveAsyncBlockReport();                        // 发送块报告,NN会对比新旧块报告,并更新block-->datanode映射,无效块会删除,新增块会添加            long brSendStartTime = now();            DatanodeCommand cmd = namenode.blockReport(dnRegistration,                    BlockListAsLongs.convertToArrayLongs(bReport));                        // 日志记录:块报告生成时间、RPC调用时间,可根据该日志判断磁盘IO,网络传输的大致情况            long brSendCost = now() - brSendStartTime;            long brCreateCost = brSendStartTime - brCreateStartTime;            myMetrics.addBlockReport(brSendCost);            LOG.info("BlockReport of " + bReport.length                + " blocks took " + brCreateCost + " msec to generate and "                + brSendCost + " msecs for RPC and NN processing");            //更新最后块报告时间            if (resetBlockReportTime) {              lastBlockReport = startTime -                  R.nextInt((int)(blockReportInterval));              resetBlockReportTime = false;            } else {              /* say the last block report was at 8:20:14. The current report                * should have started around 9:20:14 (default 1 hour interval).                * If current time is :               *   1) normal like 9:20:18, next report should be at 10:20:14               *   2) unexpected like 11:35:43, next report should be at               *      12:20:14               */              lastBlockReport += (now() - lastBlockReport) /                                  blockReportInterval * blockReportInterval;            }            //处理返回命令,正常为空            processCommand(cmd);          } else {          //未到发送时间,则请求异步块报告            data.requestAsyncBlockReport();            if (lastBlockReport > 0) { // this isn't the first report              long waitingFor =                  startTime - lastBlockReport - blockReportInterval;              String msg = "Block report is due, and been waiting for it for " +                  (waitingFor/1000) + " seconds...";              if (waitingFor > LATE_BLOCK_REPORT_WARN_THRESHOLD) {                LOG.warn(msg);              } else if (waitingFor > LATE_BLOCK_REPORT_INFO_THRESHOLD) {                LOG.info(msg);              } else if (LOG.isDebugEnabled()) {                LOG.debug(msg);              }            }          }        }        // 启动 block scanner,详见:http://blog.csdn.net/lihm0_1/article/details/12437099        if (blockScanner != null && blockScannerThread == null &&            upgradeManager.isUpgradeCompleted()) {          LOG.info("Starting Periodic block scanner.");          blockScannerThread = new Daemon(blockScanner);          blockScannerThread.start();        }                    //计算等待时间,注意如果空闲情况下才等待,如果DN繁忙,有新块未处理,是不等待的        long waitTime = heartBeatInterval - (System.currentTimeMillis() - lastHeartbeat);        synchronized(receivedBlockList) {          if (waitTime > 0 && receivedBlockList.size() == 0) {//注意判断条件            try {              receivedBlockList.wait(waitTime);//开始等待            } catch (InterruptedException ie) {            }            delayBeforeBlockReceived();          }        } // synchronized      } catch(RemoteException re) {        String reClass = re.getClassName();        if (UnregisteredDatanodeException.class.getName().equals(reClass) ||            DisallowedDatanodeException.class.getName().equals(reClass) ||            IncorrectVersionException.class.getName().equals(reClass)) {          LOG.warn("DataNode is shutting down: " +                    StringUtils.stringifyException(re));          shutdown();          return;        }        LOG.warn(StringUtils.stringifyException(re));      } catch (IOException e) {        LOG.warn(StringUtils.stringifyException(e));      }    } // while (shouldRun)  } // offerService


原创粉丝点击