Hadoop FileSystem学习
来源:互联网 发布:excel表格中重复数据 编辑:程序博客网 时间:2024/05/17 01:56
为了为不同的文件系统提供一个统一的接口,Hadoop提供了一个抽象的文件系统,而Hadoop分布式文件系统(Hadoop Distributed File System, HDFS)只是这个抽象文件系统的一个具体实现。Hadoop抽象文件系统接口主要由抽象类org.apache.hadoop.fs.FileSystem提供
默认成员变量:
//默认配置项 "fs.defaultFS"; public static final String FS_DEFAULT_NAME_KEY = CommonConfigurationKeys.FS_DEFAULT_NAME_KEY;// FS_DEFAULT_NAME_DEFAULT = "file:///"; public static final String DEFAULT_FS = CommonConfigurationKeys.FS_DEFAULT_NAME_DEFAULT; public static final Log LOG = LogFactory.getLog(FileSystem.class); /** * Priority of the FileSystem shutdown hook. */ public static final int SHUTDOWN_HOOK_PRIORITY = 10; /** FileSystem cache */ static final Cache CACHE = new Cache(); /** The key this instance is stored under in the cache. */ private Cache.Key key; //记录每个文件系统的统计信息的映射 private static final Map<Class<? extends FileSystem>, Statistics> statisticsTable = new IdentityHashMap<Class<? extends FileSystem>, Statistics>(); //该文件的统计信息 protected Statistics statistics; //当文件系统关闭或者jvm退出之后,需要把cache文件情况,该变量保存了与缓存中文件所对应的path 而且这些path都是排好序的 private Set<Path> deleteOnExit = new TreeSet<Path>(); boolean resolveSymlinks;
内部类Cache
privatefinalMap<Key, FileSystem>map=newHashMap<Key, FileSystem>();
为了能狗快速的获取到缓存中的一个文件系统对象实例,hadoop将文件系统对象都以key-value方式保存在hashmap里,key为Cache内置的静态内部类。
static class Cache {private final ClientFinalizer clientFinalizer = new ClientFinalizer(); private final Map<Key, FileSystem> map = new HashMap<Key, FileSystem>(); private final Set<Key> toAutoClose = new HashSet<Key>(); /** A variable that makes all objects in the cache unique */ private static AtomicLong unique = new AtomicLong(1); FileSystem get(URI uri, Configuration conf) throws IOException{ Key key = new Key(uri, conf); return getInternal(uri, conf, key); } /** The objects inserted into the cache using this method are all unique */ FileSystem getUnique(URI uri, Configuration conf) throws IOException{ Key key = new Key(uri, conf, unique.getAndIncrement()); return getInternal(uri, conf, key); } private FileSystem getInternal(URI uri, Configuration conf, Key key) throws IOException{ FileSystem fs; synchronized (this) { fs = map.get(key); } if (fs != null) { return fs; } fs = createFileSystem(uri, conf); synchronized (this) { // refetch the lock again FileSystem oldfs = map.get(key); if (oldfs != null) { // a file system is created while lock is releasing fs.close(); // close the new file system return oldfs; // return the old file system } // now insert the new file system into the map if (map.isEmpty() && !ShutdownHookManager.get().isShutdownInProgress()) { ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY); } fs.key = key; map.put(key, fs); if (conf.getBoolean("fs.automatic.close", true)) { toAutoClose.add(key); } return fs; } } synchronized void remove(Key key, FileSystem fs) { if (map.containsKey(key) && fs == map.get(key)) { map.remove(key); toAutoClose.remove(key); } } synchronized void closeAll() throws IOException { closeAll(false); } /** * Close all FileSystem instances in the Cache. * @param onlyAutomatic only close those that are marked for automatic closing */ synchronized void closeAll(boolean onlyAutomatic) throws IOException { List<IOException> exceptions = new ArrayList<IOException>(); // Make a copy of the keys in the map since we'll be modifying // the map while iterating over it, which isn't safe. List<Key> keys = new ArrayList<Key>(); keys.addAll(map.keySet()); for (Key key : keys) { final FileSystem fs = map.get(key); if (onlyAutomatic && !toAutoClose.contains(key)) { continue; } //remove from cache remove(key, fs); if (fs != null) { try { fs.close(); } catch(IOException ioe) { exceptions.add(ioe); } } } if (!exceptions.isEmpty()) { throw MultipleIOException.createIOException(exceptions); } } private class ClientFinalizer implements Runnable { @Override public synchronized void run() { try { closeAll(true); } catch (IOException e) { LOG.info("FileSystem.Cache.closeAll() threw an exception:\n" + e); } } } synchronized void closeAll(UserGroupInformation ugi) throws IOException { List<FileSystem> targetFSList = new ArrayList<FileSystem>(); //Make a pass over the list and collect the filesystems to close //we cannot close inline since close() removes the entry from the Map for (Map.Entry<Key, FileSystem> entry : map.entrySet()) { final Key key = entry.getKey(); final FileSystem fs = entry.getValue(); if (ugi.equals(key.ugi) && fs != null) { targetFSList.add(fs); } } List<IOException> exceptions = new ArrayList<IOException>(); //now make a pass over the target list and close each for (FileSystem fs : targetFSList) { try { fs.close(); } catch(IOException ioe) { exceptions.add(ioe); } } if (!exceptions.isEmpty()) { throw MultipleIOException.createIOException(exceptions); } } /** FileSystem.Cache.Key */ static class Key { final String scheme; final String authority; final UserGroupInformation ugi; final long unique; // an artificial way to make a key unique Key(URI uri, Configuration conf) throws IOException { this(uri, conf, 0); } Key(URI uri, Configuration conf, long unique) throws IOException { scheme = uri.getScheme()==null ? "" : StringUtils.toLowerCase(uri.getScheme()); authority = uri.getAuthority()==null ? "" : StringUtils.toLowerCase(uri.getAuthority()); this.unique = unique; this.ugi = UserGroupInformation.getCurrentUser(); } @Override public int hashCode() { return (scheme + authority).hashCode() + ugi.hashCode() + (int)unique; } static boolean isEqual(Object a, Object b) { return a == b || (a != null && a.equals(b)); } @Override public boolean equals(Object obj) { if (obj == this) { return true; } if (obj != null && obj instanceof Key) { Key that = (Key)obj; return isEqual(this.scheme, that.scheme) && isEqual(this.authority, that.authority) && isEqual(this.ugi, that.ugi) && (this.unique == that.unique); } return false; } @Override public String toString() { return "("+ugi.toString() + ")@" + scheme + "://" + authority; } } }
阅读全文
0 0
- Hadoop中的FileSystem学习
- Hadoop学习之FileSystem
- Hadoop FileSystem学习
- Hadoop FileSystem文件系统的概要学习
- Hadoop FileSystem
- Meet Hadoop & Hadoop Filesystem
- Hadoop-2.4.1学习之FileSystem及实战
- Hadoop Filesystem closed Exception
- hadoop FileSystem使用示例
- Hadoop HDFS FileSystem详解
- Hadoop IO RPC FileSystem
- hadoop之FileSystem操作
- The Hadoop Distributed Filesystem
- hadoop FileSystem源码分析
- 一步一步跟我学习hadoop(6)----hadoop利用FileSystem API 执行hadoop文件读写操作
- Hadoop学习----直接使用FileSystem以标准输出格式显示Hadoop文件系统中的文件
- Hadoop系统操作类FileSystem
- Hadoop源码分析之FileSystem
- HDU-6205 card card card(思维+简单线段树)
- git本地项目提交github命令及更新命令
- _STORAGE_WRITE_ERROR_
- 查找字符出现次数
- Android彻底组件化demo发布
- Hadoop FileSystem学习
- Nginx服务器如何处理请求
- Set/ZSet/Hash/List
- PHP中static与yield关键字的思考
- 北大青鸟 SQL第二学期第二章课后题(Library数据库)
- MongoDB 基本命令
- linux服务器(nginx或者apache)限制IP访问的方法
- 植物大战僵尸基于OpenCv实现
- python 3.6.2 使用VScode 安装lxml、ipython、jupyter包(单纯记录、没有难度)