hadoop DirectoryScanner
来源:互联网 发布:上海松江3208t编程 编辑:程序博客网 时间:2024/06/07 11:26
Default value: reportCompileThreadPool –> 1
DirectoryScanner(DataNode datanode, FsDatasetSpi<?> dataset, Configuration conf) { this.datanode = datanode; this.dataset = dataset; int interval = conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_DEFAULT); scanPeriodMsecs = interval * 1000L; //msec int threads = conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_KEY, DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_DEFAULT); reportCompileThreadPool = Executors.newFixedThreadPool(threads, new Daemon.DaemonFactory()); masterThread = new ScheduledThreadPoolExecutor(1, new Daemon.DaemonFactory()); }
DirectoryScanner.start start the masterThread.
void start() { shouldRun = true; long offset = DFSUtil.getRandom().nextInt((int) (scanPeriodMsecs/1000L)) * 1000L; //msec long firstScanTime = Time.now() + offset; LOG.info("Periodic Directory Tree Verification scan starting at " + firstScanTime + " with interval " + scanPeriodMsecs); masterThread.scheduleAtFixedRate(this, offset, scanPeriodMsecs, TimeUnit.MILLISECONDS); }
DirectoryScanner.run
Master thread will call run method of DirectoryScanner .
Runs “reconcile()” periodically under the masterThread.
/** * Main program loop for DirectoryScanner * Runs "reconcile()" periodically under the masterThread. */ @Override public void run() { try { if (!shouldRun) { //shutdown has been activated LOG.warn("this cycle terminating immediately because 'shouldRun' has been deactivated"); return; } //We're are okay to run - do it reconcile(); } catch (Exception e) { //Log and continue - allows Executor to run again next cycle LOG.error("Exception during DirectoryScanner execution - will continue next cycle", e); } catch (Error er) { //Non-recoverable error - re-throw after logging the problem LOG.error("System Error during DirectoryScanner execution - permanently terminating periodic scanner", er); throw er; } }
reconcile
Reconcile differences between disk and in-memory blocks.
At first, it calls scan to put the result in diffs.
Secondly, it calls dataset.checkAndUpdate for each ScanInfo object.
void reconcile() throws IOException { scan(); for (Entry<String, LinkedList<ScanInfo>> entry : diffs.entrySet()) { String bpid = entry.getKey(); LinkedList<ScanInfo> diff = entry.getValue(); for (ScanInfo info : diff) { dataset.checkAndUpdate(bpid, info.getBlockId(), info.getBlockFile(), info.getMetaFile(), info.getVolume()); } } if (!retainDiffs) clear(); }
DirectoryScanner.scan
Scan for the differences between disk and in-memory blocks
Scan only the “finalized blocks” lists of both disk and memory.
void scan() { clear(); Map<String, ScanInfo[]> diskReport = getDiskReport(); // Hold FSDataset lock to prevent further changes to the block map synchronized(dataset) { // compare and set diffs ... } //end synchronized }
getDiskReport
Get lists of blocks on the disk sorted by blockId, per blockpool
private Map<String, ScanInfo[]> getDiskReport() { // First get list of data directories final List<? extends FsVolumeSpi> volumes = dataset.getVolumes(); // Use an array since the threads may return out of order and // compilersInProgress#keySet may return out of order as well. ScanInfoPerBlockPool[] dirReports = new ScanInfoPerBlockPool[volumes.size()]; Map<Integer, Future<ScanInfoPerBlockPool>> compilersInProgress = new HashMap<Integer, Future<ScanInfoPerBlockPool>>(); for (int i = 0; i < volumes.size(); i++) { if (isValid(dataset, volumes.get(i))) { ReportCompiler reportCompiler = new ReportCompiler(datanode,volumes.get(i)); Future<ScanInfoPerBlockPool> result = reportCompileThreadPool.submit(reportCompiler); compilersInProgress.put(i, result); } } for (Entry<Integer, Future<ScanInfoPerBlockPool>> report : compilersInProgress.entrySet()) { try { dirReports[report.getKey()] = report.getValue().get(); } catch (Exception ex) { LOG.error("Error compiling report", ex); // Propagate ex to DataBlockScanner to deal with throw new RuntimeException(ex); } } // Compile consolidated report for all the volumes ScanInfoPerBlockPool list = new ScanInfoPerBlockPool(); for (int i = 0; i < volumes.size(); i++) { if (isValid(dataset, volumes.get(i))) { // volume is still valid list.addAll(dirReports[i]); } } return list.toSortedArrays(); }
ReportCompiler
Although there is a ReportCompiler object per volume, there is a thread in reportCompileThreadPool, the scanner is executed volume by volume.
private static class ReportCompiler implements Callable<ScanInfoPerBlockPool> { private final FsVolumeSpi volume; private final DataNode datanode; public ReportCompiler(DataNode datanode, FsVolumeSpi volume) { this.datanode = datanode; this.volume = volume; } @Override public ScanInfoPerBlockPool call() throws Exception { String[] bpList = volume.getBlockPoolList(); ScanInfoPerBlockPool result = new ScanInfoPerBlockPool(bpList.length); for (String bpid : bpList) { LinkedList<ScanInfo> report = new LinkedList<ScanInfo>(); File bpFinalizedDir = volume.getFinalizedDir(bpid); result.put(bpid, compileReport(volume, bpFinalizedDir, bpFinalizedDir, report)); } return result; } /** Compile list {@link ScanInfo} for the blocks in the directory <dir> */ private LinkedList<ScanInfo> compileReport(FsVolumeSpi vol, File bpFinalizedDir, File dir, LinkedList<ScanInfo> report) { File[] files; try { files = FileUtil.listFiles(dir); } catch (IOException ioe) { LOG.warn("Exception occured while compiling report: ", ioe); // Initiate a check on disk failure. datanode.checkDiskErrorAsync(); // Ignore this directory and proceed. return report; } Arrays.sort(files); /* * Assumption: In the sorted list of files block file appears immediately * before block metadata file. This is true for the current naming * convention for block file blk_<blockid> and meta file * blk_<blockid>_<genstamp>.meta */ for (int i = 0; i < files.length; i++) { if (files[i].isDirectory()) { compileReport(vol, bpFinalizedDir, files[i], report); continue; } if (!Block.isBlockFilename(files[i])) { if (isBlockMetaFile(Block.BLOCK_FILE_PREFIX, files[i].getName())) { long blockId = Block.getBlockId(files[i].getName()); verifyFileLocation(files[i].getParentFile(), bpFinalizedDir, blockId); report.add(new ScanInfo(blockId, null, files[i], vol)); } continue; } File blockFile = files[i]; long blockId = Block.filename2id(blockFile.getName()); File metaFile = null; // Skip all the files that start with block name until // getting to the metafile for the block while (i + 1 < files.length && files[i + 1].isFile() && files[i + 1].getName().startsWith(blockFile.getName())) { i++; if (isBlockMetaFile(blockFile.getName(), files[i].getName())) { metaFile = files[i]; break; } } verifyFileLocation(blockFile.getParentFile(), bpFinalizedDir, blockId); report.add(new ScanInfo(blockId, blockFile, metaFile, vol)); } return report; } /** * Verify whether the actual directory location of block file has the * expected directory path computed using its block ID. */ private void verifyFileLocation(File actualBlockDir, File bpFinalizedDir, long blockId) { File blockDir = DatanodeUtil.idToBlockDir(bpFinalizedDir, blockId); if (actualBlockDir.compareTo(blockDir) != 0) { LOG.warn("Block: " + blockId + " has to be upgraded to block ID-based layout"); } } }
ScanInfoPerBlockPool
static class ScanInfoPerBlockPool extends HashMap<String, LinkedList<ScanInfo>> { private static final long serialVersionUID = 1L; ScanInfoPerBlockPool() {super();} ScanInfoPerBlockPool(int sz) {super(sz);} /** * Merges {@code that} ScanInfoPerBlockPool into this one */ public void addAll(ScanInfoPerBlockPool that) { if (that == null) return; for (Entry<String, LinkedList<ScanInfo>> entry : that.entrySet()) { String bpid = entry.getKey(); LinkedList<ScanInfo> list = entry.getValue(); if (this.containsKey(bpid)) { //merge that per-bpid linked list with this one this.get(bpid).addAll(list); } else { //add that new bpid and its linked list to this this.put(bpid, list); } } } /** * Convert all the LinkedList values in this ScanInfoPerBlockPool map * into sorted arrays, and return a new map of these arrays per blockpool * @return a map of ScanInfo arrays per blockpool */ public Map<String, ScanInfo[]> toSortedArrays() { Map<String, ScanInfo[]> result = new HashMap<String, ScanInfo[]>(this.size()); for (Entry<String, LinkedList<ScanInfo>> entry : this.entrySet()) { String bpid = entry.getKey(); LinkedList<ScanInfo> list = entry.getValue(); // convert list to array ScanInfo[] record = list.toArray(new ScanInfo[list.size()]); // Sort array based on blockId Arrays.sort(record); result.put(bpid, record); } return result; } }
阅读全文
0 0
- hadoop DirectoryScanner
- DataNode之DirectoryScanner分析
- maven java.lang.NoClassDefFoundError: org/codehaus/plexus/util/DirectoryScanner
- java.lang.NoSuchMethodError: org.codehaus.plexus.util.DirectoryScanner.setupMatchPatterns()解决办法
- hadoop
- Hadoop
- Hadoop
- hadoop
- hadoop
- Hadoop
- Hadoop
- hadoop
- Hadoop
- hadoop
- hadoop
- hadoop
- hadoop
- Hadoop
- Qt 5.9安装及配置环境变量的注意事项
- JS中的作用域以及全局变量的问题
- GoogleNet inception-v4概述及python实现方法
- CSS块级元素和行内元素
- node.js升级造成gulp无法使用的解决问题
- hadoop DirectoryScanner
- 基于Visual C++之实现Windows用户管理
- Android组件化初识
- TabLayout让文字平铺以及改变字体大小
- android事件传递全解析
- easyui的validate校验必填项,空格问题。
- Kerberos: The Network Authentication Protocol
- 获取数组中出现次数超过一半的元素
- 1.2.1 Web 资源——媒体类型(MIME)