Hadoop DataNode启动之DataBlockScanner
来源:互联网 发布:淘宝兼职客服一般多少 编辑:程序博客网 时间:2024/05/20 18:43
存储在HDFS中的数据块每隔一段时间就会进行校验,因为由于硬件故障,系统BUG,异常断电等问题会导致数据块损坏,这时就需要HDFS采用其他块副本来修复。Hadoop为了感应块的损坏会启动一个后台线程周期性的对数据块进行检测,这个原理和客户端读取数据时的校验规则一样,以块为单位,每读取一个block,都要对其内的每一个chunk进行校验,默认情况下一个block大小为64M,每个chunk大小为512字节,下面我们从启动开始分析下这个过程
DataBlockScanner在DN启动时创建,在startDataNode函数中
void startDataNode(Configuration conf, AbstractList<File> dataDirs, SecureResources resources ) throws IOException {...... //initialize periodic block scanner String reason = null; //获得扫描周期,默认为三周,参数单位为小时 if (conf.getInt("dfs.datanode.scan.period.hours", 0) < 0) { reason = "verification is turned off by configuration"; } else if ( !(data instanceof FSDataset) ) { reason = "verifcation is supported only with FSDataset"; } if ( reason == null ) { //开始创建块扫描器,但并为启动 blockScanner = new DataBlockScanner(this, (FSDataset)data, conf); } else { LOG.info("Periodic Block Verification is disabled because " + reason + "."); } ...... }块扫描器的启动在DN主循环的offerService中
public void offerService() throws Exception { LOG.info("using BLOCKREPORT_INTERVAL of " + blockReportInterval + "msec" + " Initial delay: " + initialBlockReportDelay + "msec"); // // Now loop for a long time.... // while (shouldRun) { try { .... .... // start block scanner if (blockScanner != null && blockScannerThread == null && upgradeManager.isUpgradeCompleted()) { LOG.info("Starting Periodic block scanner."); //创建后台守护线程并启动 blockScannerThread = new Daemon(blockScanner); blockScannerThread.start(); } .... .... } // offerService块校验的具体流程包含在线程执行体中,大致分为三个部分 1、块集合初始化 2、更新block校验时间 3、循环校验负责规则的block。中间还有节流器控制
public void run() { try { //1、块集合初始化 init(); // 2、更新block校验时间 if (!assignInitialVerificationTimes()) { return; } //节流器配置 adjustThrottler(); //3、循环校验负责规则的block while (datanode.shouldRun && !Thread.interrupted()) { long now = System.currentTimeMillis(); synchronized (this) { if ( now >= (currentPeriodStart + scanPeriod)) { //开启新的校验周期 startNewPeriod(); } } if ( (now - getEarliestScanTime()) >= scanPeriod ) { //校验一个数据块 verifyFirstBlock(); } else { try { //循环间隔1秒 Thread.sleep(1000); } catch (InterruptedException ignored) {} } } } catch (RuntimeException e) { LOG.warn("RuntimeException during DataBlockScanner.run() : " + StringUtils.stringifyException(e)); throw e; } finally { shutdown(); LOG.info("Exiting DataBlockScanner thread."); } }先看初始化部分,他会获得当前块报告,构建扫描信息类,获得块校验日志文件
private void init() { // 获得块报告数组,并打乱顺序,以使块扫描不会总发生在同一个卷组上 Block arr[] = dataset.getBlockReport(); Collections.shuffle(Arrays.asList(arr)); //创建存放扫描信息的结构 blockInfoSet = new TreeSet<BlockScanInfo>(); blockMap = new HashMap<Block, BlockScanInfo>(); //填充扫描信息集合 long scanTime = -1; for (Block block : arr) { BlockScanInfo info = new BlockScanInfo( block ); info.lastScanTime = scanTime--; //still keep 'info.lastScanType' to NONE. addBlockInfo(info); } //获得扫描日志 File dir = null; FSDataset.FSVolume[] volumes = dataset.volumes.volumes; for(FSDataset.FSVolume vol : volumes) { if (LogFileHandler.isFilePresent(vol.getDir(), verificationLogFile)) { dir = vol.getDir(); break; } } if (dir == null) { dir = volumes[0].getDir(); } try { // 创建日志记录器 verificationLog = new LogFileHandler(dir, verificationLogFile, 100); } catch (IOException e) { LOG.warn("Could not open verfication log. " + "Verification times are not stored."); } synchronized (this) { //创建节流器 throttler = new BlockTransferThrottler(200, MAX_SCAN_RATE); } }下面看更新block校验时间的部分assignInitialVerificationTimes
private boolean assignInitialVerificationTimes() { int numBlocks = 1; synchronized (this) { //获得块数量 numBlocks = Math.max(blockMap.size(), 1); } //构建日志读取器 LogFileHandler.Reader logReader = null; try { if (verificationLog != null) { logReader = verificationLog.new Reader(false); } } catch (IOException e) { LOG.warn("Could not read previous verification times : " + StringUtils.stringifyException(e)); } //循环读取日志,并更新当前行数 if (verificationLog != null) { verificationLog.updateCurNumLines(); } try { // 根据校验日志更新块的校验时间 while (logReader != null && logReader.hasNext()) { if (!datanode.shouldRun || Thread.interrupted()) { return false; } //获取日志条目并更新 LogEntry entry = LogEntry.parseEntry(logReader.next()); if (entry != null) { updateBlockInfo(entry); } } } finally { //更新完毕后关闭日志流 IOUtils.closeStream(logReader); } //两个块之间的校验间隔,避免频繁块扫描 long verifyInterval = (long) (Math.min( scanPeriod/2.0/numBlocks, 10*60*1000 )); //最后校验时间 long lastScanTime = System.currentTimeMillis() - scanPeriod; /* Before this loop, entries in blockInfoSet that are not * updated above have lastScanTime of <= 0 . Loop until first entry has * lastModificationTime > 0. */ synchronized (this) { if (blockInfoSet.size() > 0 ) { BlockScanInfo info; //循环更新块校验时间和间隔 while ((info = blockInfoSet.first()).lastScanTime < 0) { delBlockInfo(info); info.lastScanTime = lastScanTime; lastScanTime += verifyInterval; addBlockInfo(info); } } } return true; }下面看块校验部分,这也是blockscanner的主体部分,包含了校验逻辑,其实这个逻辑和上一篇dataXceiverServer中块读取的逻辑一样,只不过这里块数据读取后并不发送到客户端,而是发送到一个空流里,我们先看如何开始一个新的扫描周期
private synchronized void startNewPeriod() { LOG.info("Starting a new period : work left in prev period : " + String.format("%.2f%%", (bytesLeft * 100.0)/totalBytesToScan)); // reset the byte counts : bytesLeft = totalBytesToScan; //记录需要扫描的字节数 currentPeriodStart = System.currentTimeMillis();//记录本周期的启动时间 }块校验函数如下
private void verifyFirstBlock() { Block block = null; synchronized (this) { if ( blockInfoSet.size() > 0 ) { //获取一个block,所有blockinfo放在一个TreeSet中,所里内部是排序过的 block = blockInfoSet.first().block; } } if ( block != null ) { verifyBlock(block);//开始校验 } }
在blockSender.sendBlock函数中会走上一篇dataXceiverServer中一样的块读取逻辑private void verifyBlock(Block block) { BlockSender blockSender = null; //安全起见,如果第一次校验失败则会再校验校验一次 for (int i=0; i<2; i++) { boolean second = (i > 0); try { adjustThrottler(); //构建BlockSender blockSender = new BlockSender(block, 0, -1, false, false, true, datanode);//构建空的数据输出流 DataOutputStream out = new DataOutputStream(new IOUtils.NullOutputStream()); //发送一个块 blockSender.sendBlock(out, null, throttler);//当块更新完毕后则记录日志 LOG.info((second ? "Second " : "") + "Verification succeeded for " + block); if ( second ) { totalTransientErrors++; } //更新扫描状态 updateScanStatus(block, ScanType.VERIFICATION_SCAN, true); return; } catch (IOException e) {........ } finally { IOUtils.closeStream(blockSender); datanode.getMetrics().incrBlocksVerified(); totalScans++; totalVerifications++; } } }一旦块扫描完毕,就会更新扫描状态,通过函数updateScanStatus实现,内部调用updateScanStatusInternal
private synchronized void updateScanStatusInternal(Block block, ScanType type, boolean scanOk, boolean updateOnly) { if (!isInitialized()) { return; } BlockScanInfo info = blockMap.get(block); if ( info != null ) { delBlockInfo(info);//删除旧的扫描信息 } else { if (updateOnly) { return; } // It might already be removed. Thats ok, it will be caught next time. info = new BlockScanInfo(block); } //构建新的扫描信息 long now = System.currentTimeMillis(); info.lastScanType = type; info.lastScanTime = now; info.lastScanOk = scanOk; addBlockInfo(info); if (type == ScanType.REMOTE_READ) { totalVerifications++; } // Don't update meta data too often in case of REMOTE_READ // of if the verification failed. long diff = now - info.lastLogTime; if (!scanOk || (type == ScanType.REMOTE_READ && diff < scanPeriod/3 && diff < ONE_DAY)) { return; } info.lastLogTime = now; LogFileHandler log = verificationLog; if (log != null) { //记录扫描日志 log.appendLine(LogEntry.newEnry(block, now)); } }校验日志位于current目录下,文件名为dncp_block_verification.log.curr,校验日志格式如下:
date="2013-09-2316:49:22,824" 校验时间:日期时间格式,毫秒数
time="1379926162824" 与上面表示同一时间,此处为时间戳格式
genstamp="1002" 块标记
id="-8049298064173279059" 块ID
通过上面的信息就可以确定一个块精确到毫秒的校验时间,具体时间通过System.currentTimeMillis()来获得。
- Hadoop DataNode启动之DataBlockScanner
- hadoop之DataBlockScanner
- Hadoop DataNode启动之refreshUsed
- Hadoop DataNode启动之asyncBlockReport
- Hadoop DataNode启动之asyncDiskService
- Hadoop DataNode启动之dataXceiverServer
- Hadoop DataNode启动之register
- Hadoop DataNode启动之heartbeat
- Hadoop DataNode启动之offferService
- hadoop之datanode无法启动
- Hadoop DataNode启动之DiskChecker(一)
- Hadoop DataNode启动之DiskChecker(二)
- Hadoop DataNode启动之数据目录校验
- hadoop datanode 无法启动
- hadoop datanode无法启动
- Hadoop datanode无法启动
- Hadoop datanode无法启动
- hadoop datanode无法启动
- 自助Linux之问题诊断工具strace
- 初学者学Java(二十一)-------反射机制
- 总结使用Unity 3D优化游戏运行性能的经验
- 16个最棒的WordPress婚纱摄影网站主题
- OCP-1Z0-053-V12.02-374题
- Hadoop DataNode启动之DataBlockScanner
- 刘汝佳训练指南《几何问题》专题总结 BY 9974
- Eclipse统计代码行数
- 【热门主题:异形战铁血战士主题】
- pthread_mutex_xx and pthread_spin_lock
- hadoop 根据SecondaryNameNode恢复Namenode
- 腾讯的一道面试题
- 10进制转2,8,16进制
- hdu 4605 Magic Ball Game