Hadoop FileSystem学习

来源:互联网 发布:excel表格中重复数据 编辑:程序博客网 时间:2024/05/17 01:56
为了为不同的文件系统提供一个统一的接口,Hadoop提供了一个抽象的文件系统,而Hadoop分布式文件系统(Hadoop Distributed File System, HDFS)只是这个抽象文件系统的一个具体实现。Hadoop抽象文件系统接口主要由抽象类org.apache.hadoop.fs.FileSystem提供
默认成员变量:
//默认配置项 "fs.defaultFS";  public static final String FS_DEFAULT_NAME_KEY =                   CommonConfigurationKeys.FS_DEFAULT_NAME_KEY;//  FS_DEFAULT_NAME_DEFAULT = "file:///";  public static final String DEFAULT_FS =                   CommonConfigurationKeys.FS_DEFAULT_NAME_DEFAULT;  public static final Log LOG = LogFactory.getLog(FileSystem.class);  /**   * Priority of the FileSystem shutdown hook.   */  public static final int SHUTDOWN_HOOK_PRIORITY = 10;  /** FileSystem cache */  static final Cache CACHE = new Cache();  /** The key this instance is stored under in the cache. */  private Cache.Key key;  //记录每个文件系统的统计信息的映射  private static final Map<Class<? extends FileSystem>, Statistics>    statisticsTable =      new IdentityHashMap<Class<? extends FileSystem>, Statistics>();  //该文件的统计信息  protected Statistics statistics;  //当文件系统关闭或者jvm退出之后,需要把cache文件情况,该变量保存了与缓存中文件所对应的path 而且这些path都是排好序的  private Set<Path> deleteOnExit = new TreeSet<Path>();  boolean resolveSymlinks;

内部类Cache

privatefinalMap<Key, FileSystem>map=newHashMap<Key, FileSystem>();
为了能狗快速的获取到缓存中的一个文件系统对象实例,hadoop将文件系统对象都以key-value方式保存在hashmap里,key为Cache内置的静态内部类。
static class Cache {

    private final ClientFinalizer clientFinalizer = new ClientFinalizer();    private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>();    private final Set<Key> toAutoClose = new HashSet<Key>();    /** A variable that makes all objects in the cache unique */    private static AtomicLong unique = new AtomicLong(1);    FileSystem get(URI uri, Configuration conf) throws IOException{      Key key = new Key(uri, conf);      return getInternal(uri, conf, key);    }    /** The objects inserted into the cache using this method are all unique */    FileSystem getUnique(URI uri, Configuration conf) throws IOException{      Key key = new Key(uri, conf, unique.getAndIncrement());      return getInternal(uri, conf, key);    }    private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{      FileSystem fs;      synchronized (this) {        fs = map.get(key);      }      if (fs != null) {        return fs;      }      fs = createFileSystem(uri, conf);      synchronized (this) { // refetch the lock again        FileSystem oldfs = map.get(key);        if (oldfs != null) { // a file system is created while lock is releasing          fs.close(); // close the new file system          return oldfs;  // return the old file system        }                // now insert the new file system into the map        if (map.isEmpty()                && !ShutdownHookManager.get().isShutdownInProgress()) {          ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY);        }        fs.key = key;        map.put(key, fs);        if (conf.getBoolean("fs.automatic.close", true)) {          toAutoClose.add(key);        }        return fs;      }    }    synchronized void remove(Key key, FileSystem fs) {      if (map.containsKey(key) && fs == map.get(key)) {        map.remove(key);        toAutoClose.remove(key);        }    }    synchronized void closeAll() throws IOException {      closeAll(false);    }    /**     * Close all FileSystem instances in the Cache.     * @param onlyAutomatic only close those that are marked for automatic closing     */    synchronized void closeAll(boolean onlyAutomatic) throws IOException {      List<IOException> exceptions = new ArrayList<IOException>();      // Make a copy of the keys in the map since we'll be modifying      // the map while iterating over it, which isn't safe.      List<Key> keys = new ArrayList<Key>();      keys.addAll(map.keySet());      for (Key key : keys) {        final FileSystem fs = map.get(key);        if (onlyAutomatic && !toAutoClose.contains(key)) {          continue;        }        //remove from cache        remove(key, fs);        if (fs != null) {          try {            fs.close();          }          catch(IOException ioe) {            exceptions.add(ioe);          }        }      }      if (!exceptions.isEmpty()) {        throw MultipleIOException.createIOException(exceptions);      }    }    private class ClientFinalizer implements Runnable {      @Override      public synchronized void run() {        try {          closeAll(true);        } catch (IOException e) {          LOG.info("FileSystem.Cache.closeAll() threw an exception:\n" + e);        }      }    }    synchronized void closeAll(UserGroupInformation ugi) throws IOException {      List<FileSystem> targetFSList = new ArrayList<FileSystem>();      //Make a pass over the list and collect the filesystems to close      //we cannot close inline since close() removes the entry from the Map      for (Map.Entry<Key, FileSystem> entry : map.entrySet()) {        final Key key = entry.getKey();        final FileSystem fs = entry.getValue();        if (ugi.equals(key.ugi) && fs != null) {          targetFSList.add(fs);           }      }      List<IOException> exceptions = new ArrayList<IOException>();      //now make a pass over the target list and close each      for (FileSystem fs : targetFSList) {        try {          fs.close();        }        catch(IOException ioe) {          exceptions.add(ioe);        }      }      if (!exceptions.isEmpty()) {        throw MultipleIOException.createIOException(exceptions);      }    }    /** FileSystem.Cache.Key */    static class Key {      final String scheme;      final String authority;      final UserGroupInformation ugi;      final long unique;   // an artificial way to make a key unique      Key(URI uri, Configuration conf) throws IOException {        this(uri, conf, 0);      }      Key(URI uri, Configuration conf, long unique) throws IOException {        scheme = uri.getScheme()==null ?            "" : StringUtils.toLowerCase(uri.getScheme());        authority = uri.getAuthority()==null ?            "" : StringUtils.toLowerCase(uri.getAuthority());        this.unique = unique;                this.ugi = UserGroupInformation.getCurrentUser();      }      @Override      public int hashCode() {        return (scheme + authority).hashCode() + ugi.hashCode() + (int)unique;      }      static boolean isEqual(Object a, Object b) {        return a == b || (a != null && a.equals(b));              }      @Override      public boolean equals(Object obj) {        if (obj == this) {          return true;        }        if (obj != null && obj instanceof Key) {          Key that = (Key)obj;          return isEqual(this.scheme, that.scheme)                 && isEqual(this.authority, that.authority)                 && isEqual(this.ugi, that.ugi)                 && (this.unique == that.unique);        }        return false;              }      @Override      public String toString() {        return "("+ugi.toString() + ")@" + scheme + "://" + authority;              }    }  }


原创粉丝点击