hadoop DirectoryScanner

DirectoryScanner(DataNode datanode, FsDatasetSpi<?> dataset, Configuration conf) {    this.datanode = datanode;    this.dataset = dataset;    int interval = conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY,        DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_DEFAULT);    scanPeriodMsecs = interval * 1000L; //msec    int threads =         conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_KEY,                    DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_DEFAULT);    reportCompileThreadPool = Executors.newFixedThreadPool(threads,         new Daemon.DaemonFactory());    masterThread = new ScheduledThreadPoolExecutor(1,        new Daemon.DaemonFactory());  }

DirectoryScanner.start start the masterThread.

  void start() {    shouldRun = true;    long offset = DFSUtil.getRandom().nextInt((int) (scanPeriodMsecs/1000L)) * 1000L; //msec    long firstScanTime = Time.now() + offset;    LOG.info("Periodic Directory Tree Verification scan starting at "         + firstScanTime + " with interval " + scanPeriodMsecs);    masterThread.scheduleAtFixedRate(this, offset, scanPeriodMsecs,                                      TimeUnit.MILLISECONDS);  }


Master thread will call run method of DirectoryScanner .
Runs “reconcile()” periodically under the masterThread.

/**   * Main program loop for DirectoryScanner   * Runs "reconcile()" periodically under the masterThread.   */  @Override  public void run() {    try {      if (!shouldRun) {        //shutdown has been activated        LOG.warn("this cycle terminating immediately because 'shouldRun' has been deactivated");        return;      }      //We're are okay to run - do it      reconcile();          } catch (Exception e) {      //Log and continue - allows Executor to run again next cycle      LOG.error("Exception during DirectoryScanner execution - will continue next cycle", e);    } catch (Error er) {      //Non-recoverable error - re-throw after logging the problem      LOG.error("System Error during DirectoryScanner execution - permanently terminating periodic scanner", er);      throw er;    }  }


Reconcile differences between disk and in-memory blocks.
At first, it calls scan to put the result in diffs.
Secondly, it calls dataset.checkAndUpdate for each ScanInfo object.

  void reconcile() throws IOException {    scan();    for (Entry<String, LinkedList<ScanInfo>> entry : diffs.entrySet()) {      String bpid = entry.getKey();      LinkedList<ScanInfo> diff = entry.getValue();      for (ScanInfo info : diff) {        dataset.checkAndUpdate(bpid, info.getBlockId(), info.getBlockFile(),            info.getMetaFile(), info.getVolume());      }    }    if (!retainDiffs) clear();  }


Scan for the differences between disk and in-memory blocks
Scan only the “finalized blocks” lists of both disk and memory.

  void scan() {    clear();    Map<String, ScanInfo[]> diskReport = getDiskReport();    // Hold FSDataset lock to prevent further changes to the block map    synchronized(dataset) {      // compare and set diffs      ...    } //end synchronized  }


Get lists of blocks on the disk sorted by blockId, per blockpool

  private Map<String, ScanInfo[]> getDiskReport() {    // First get list of data directories    final List<? extends FsVolumeSpi> volumes = dataset.getVolumes();    // Use an array since the threads may return out of order and    // compilersInProgress#keySet may return out of order as well.    ScanInfoPerBlockPool[] dirReports = new ScanInfoPerBlockPool[volumes.size()];    Map<Integer, Future<ScanInfoPerBlockPool>> compilersInProgress =      new HashMap<Integer, Future<ScanInfoPerBlockPool>>();    for (int i = 0; i < volumes.size(); i++) {      if (isValid(dataset, volumes.get(i))) {        ReportCompiler reportCompiler =          new ReportCompiler(datanode,volumes.get(i));        Future<ScanInfoPerBlockPool> result =           reportCompileThreadPool.submit(reportCompiler);        compilersInProgress.put(i, result);      }    }    for (Entry<Integer, Future<ScanInfoPerBlockPool>> report :        compilersInProgress.entrySet()) {      try {        dirReports[report.getKey()] = report.getValue().get();      } catch (Exception ex) {        LOG.error("Error compiling report", ex);        // Propagate ex to DataBlockScanner to deal with        throw new RuntimeException(ex);      }    }    // Compile consolidated report for all the volumes    ScanInfoPerBlockPool list = new ScanInfoPerBlockPool();    for (int i = 0; i < volumes.size(); i++) {      if (isValid(dataset, volumes.get(i))) {        // volume is still valid        list.addAll(dirReports[i]);      }    }    return list.toSortedArrays();  }


Although there is a ReportCompiler object per volume, there is a thread in reportCompileThreadPool, the scanner is executed volume by volume.

private static class ReportCompiler   implements Callable<ScanInfoPerBlockPool> {    private final FsVolumeSpi volume;    private final DataNode datanode;    public ReportCompiler(DataNode datanode, FsVolumeSpi volume) {      this.datanode = datanode;      this.volume = volume;    }    @Override    public ScanInfoPerBlockPool call() throws Exception {      String[] bpList = volume.getBlockPoolList();      ScanInfoPerBlockPool result = new ScanInfoPerBlockPool(bpList.length);      for (String bpid : bpList) {        LinkedList<ScanInfo> report = new LinkedList<ScanInfo>();        File bpFinalizedDir = volume.getFinalizedDir(bpid);        result.put(bpid,            compileReport(volume, bpFinalizedDir, bpFinalizedDir, report));      }      return result;    }    /** Compile list {@link ScanInfo} for the blocks in the directory <dir> */    private LinkedList<ScanInfo> compileReport(FsVolumeSpi vol,        File bpFinalizedDir, File dir, LinkedList<ScanInfo> report) {      File[] files;      try {        files = FileUtil.listFiles(dir);      } catch (IOException ioe) {        LOG.warn("Exception occured while compiling report: ", ioe);        // Initiate a check on disk failure.        datanode.checkDiskErrorAsync();        // Ignore this directory and proceed.        return report;      }      Arrays.sort(files);      /*       * Assumption: In the sorted list of files block file appears immediately       * before block metadata file. This is true for the current naming       * convention for block file blk_<blockid> and meta file       * blk_<blockid>_<genstamp>.meta       */      for (int i = 0; i < files.length; i++) {        if (files[i].isDirectory()) {          compileReport(vol, bpFinalizedDir, files[i], report);          continue;        }        if (!Block.isBlockFilename(files[i])) {          if (isBlockMetaFile(Block.BLOCK_FILE_PREFIX, files[i].getName())) {            long blockId = Block.getBlockId(files[i].getName());            verifyFileLocation(files[i].getParentFile(), bpFinalizedDir,                blockId);            report.add(new ScanInfo(blockId, null, files[i], vol));          }          continue;        }        File blockFile = files[i];        long blockId = Block.filename2id(blockFile.getName());        File metaFile = null;        // Skip all the files that start with block name until        // getting to the metafile for the block        while (i + 1 < files.length && files[i + 1].isFile()            && files[i + 1].getName().startsWith(blockFile.getName())) {          i++;          if (isBlockMetaFile(blockFile.getName(), files[i].getName())) {            metaFile = files[i];            break;          }        }        verifyFileLocation(blockFile.getParentFile(), bpFinalizedDir,            blockId);        report.add(new ScanInfo(blockId, blockFile, metaFile, vol));      }      return report;    }    /**     * Verify whether the actual directory location of block file has the     * expected directory path computed using its block ID.     */    private void verifyFileLocation(File actualBlockDir,        File bpFinalizedDir, long blockId) {      File blockDir = DatanodeUtil.idToBlockDir(bpFinalizedDir, blockId);      if (actualBlockDir.compareTo(blockDir) != 0) {        LOG.warn("Block: " + blockId            + " has to be upgraded to block ID-based layout");      }    }  }


static class ScanInfoPerBlockPool extends                      HashMap<String, LinkedList<ScanInfo>> {    private static final long serialVersionUID = 1L;    ScanInfoPerBlockPool() {super();}    ScanInfoPerBlockPool(int sz) {super(sz);}    /**     * Merges {@code that} ScanInfoPerBlockPool into this one     */    public void addAll(ScanInfoPerBlockPool that) {      if (that == null) return;      for (Entry<String, LinkedList<ScanInfo>> entry : that.entrySet()) {        String bpid = entry.getKey();        LinkedList<ScanInfo> list = entry.getValue();        if (this.containsKey(bpid)) {          //merge that per-bpid linked list with this one          this.get(bpid).addAll(list);        } else {          //add that new bpid and its linked list to this          this.put(bpid, list);        }      }    }    /**     * Convert all the LinkedList values in this ScanInfoPerBlockPool map     * into sorted arrays, and return a new map of these arrays per blockpool     * @return a map of ScanInfo arrays per blockpool     */    public Map<String, ScanInfo[]> toSortedArrays() {      Map<String, ScanInfo[]> result =         new HashMap<String, ScanInfo[]>(this.size());      for (Entry<String, LinkedList<ScanInfo>> entry : this.entrySet()) {        String bpid = entry.getKey();        LinkedList<ScanInfo> list = entry.getValue();        // convert list to array        ScanInfo[] record = list.toArray(new ScanInfo[list.size()]);        // Sort array based on blockId        Arrays.sort(record);        result.put(bpid, record);                  }      return result;    }  }