linux内核分析笔记----虚拟文件系统(下)

来源：互联网发布：肯德基网络市场调研编辑：程序博客网时间：2024/05/17 22:28

接着上次的来，我今天讲虚拟文件系统剩下的一点知识.

3.目录项对象.目录项的概念上节已经说了,我就不多说.目录项中也可包括安装点.在路径/mnt/cdrom/foo中，/,mnt,cdrom都属于目录项对象。目录项由dentry结构体表示，定义在文件linux/dcache.h中，描述如下:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
structdentry { 
        atomic_t                 d_count;     /* usage count */
        unsignedlong           d_vfs_flags;  /* dentry cache flags */
        spinlock_t               d_lock;      /* per-dentry lock */
        structinode             *d_inode;     /* associated inode */
        structlist_head         d_lru;        /* unused list */
        structlist_head         d_child;      /* list of dentries within */
        structlist_head         d_subdirs;    /* subdirectories */
        structlist_head         d_alias;      /* list of alias inodes */
        unsignedlong           d_time;       /* revalidate time */
        structdentry_operations *d_op;        /* dentry operations table */
        structsuper_block       *d_sb;        /* superblock of file */
        unsignedint            d_flags;      /* dentry flags */
        int                     d_mounted;    /* is this a mount point? */
        void                    *d_fsdata;    /* filesystem-specific data */
        structrcu_head          d_rcu;        /* RCU locking */
        structdcookie_struct    *d_cookie;    /* cookie */
        structdentry            *d_parent;    /* dentry object of parent */
        structqstr              d_name;       /* dentry name */
        structhlist_node        d_hash;       /* list of hash table entries */
        structhlist_head        *d_bucket;    /* hash bucket */
        unsignedchar           d_iname[DNAME_INLINE_LEN_MIN]; /* short name */
};

由于目录项并非真正保存在磁盘上，所有目录项没有对应的磁盘数据结构，VFS根据字符串形式的路径名现场创建它，目录项结构体也没有是否被修改的标志。目录项对象有三种状态：被使用，未被使用和负状态。一个被使用的目录项对应一个有效的索引节点(即d_inode指向相应的索引节点)并且该对象存在一个或多个使用者(即d_count为正值)。一个未被使用的目录项对应一个有效的索引节点(d_inode指向一个索引节点)，但是VFS当前并未使用它(d_count为0)。该目录项对象仍然指向一个有效对象，而且被保留在内存中以便需要时再使用它。显然这样要比重新创建要效率高些。一个负状态的目录项没有对应的有效索引节点(d_inode为NULL).因为索引节点已被删除了，或路径不再正确了，但是目录项仍然保留，以便快速解析以后的路径查询。虽然负的状态目录项有些用处，但如果需要的话话，还是可以删除的，可以销毁它。

结构体dentry_operation指明了VFS操作目录的所有方法，如下：

?
1
2
3
4
5
6
7
8
structdentry_operations { 
        int(*d_revalidate) (structdentry *, int);
        int(*d_hash) (structdentry *, structqstr *); 
        int(*d_compare) (structdentry *, structqstr *, structqstr *); 
        int(*d_delete) (structdentry *); 
        void(*d_release) (structdentry *); 
        void(*d_iput) (structdentry *, structinode *); 
};

其实，如果VFS遍历路径名中所有的元素并将它们逐个地解析成目录项对象，将是一件非常耗时的事情。所以内核将目录项对象缓存在目录项缓存(dcache)中，目录项缓存包括三个主要部分：

1.“被使用的”目录项链表，该链表通过索引节点对象中的i_dentry项连接相关的索引节点，因为一个给定的索引节点可能有多个链接，所以就可能有多
个目录项对象，因此用一个链表来连接它们。
2.“最近被使用的”双向链表。该链表包含未被使用的和负状态的目录项对象。该链表是按时间插入的。
3. 哈希表和相应的哈希函数用来快速地将给定路径解析为相关目录项对象。

哈希表有数组dentry_hashtable表示，其中每一个元素都是一个指向具有相同键值的目录项对象链表的指针。数组的大小取决于系统中物理内存的大小。实际的哈希值由d_hash()计算，它是内核提供给文件系统的唯一的一个哈希函数。查找哈希表要通过d_lookup()函数，如果该函数在dcache中发现了与其相匹配的目录项对像，则匹配对象被返回；否则，返回NULL指针。dcache在一定意义上也提供了对索引节点的缓存。和目录项对象相关的索引节点对象不会被释放，因为目录项会让相关索引节点的使用计数为正，这样就可以确保索引节点留在内存中。只要目录项被缓存，其相应的索引节点也就被缓存了。

4.文件对象:文件对象表示进程以打开的文件。文件对象仅仅在进程观点上代表已打开文件，它反过来指向目录项对象(反过来指向索引节点)，其实只有目录项对象才表示已打开的实际文件。虽然一个文件对应的文件对象不是唯一的，但对应的索引节点和目录项对象无疑是唯一的。文件对象由file结构表示，定义在文件linux/fs.h中，如下:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
structfile { 
        structlist_head       f_list;        /* list of file objects */
        structdentry          *f_dentry;     /* associated dentry object */
        structvfsmount        *f_vfsmnt;     /* associated mounted fs */
        structfile_operations *f_op;         /* file operations table */
        atomic_t               f_count;      /* file object's usage count */
        unsignedint          f_flags;       /* flags specified on open */
        mode_t                 f_mode;       /* file access mode */
        loff_t                 f_pos;        /* file offset (file pointer) */
        structfown_struct     f_owner;       /* owner data for signals */
        unsignedint          f_uid;         /* user's UID */
        unsignedint          f_gid;         /* user's GID */
        int                   f_error;       /* error code */
        structfile_ra_state   f_ra;          /* read-ahead state */
        unsignedlong         f_version;     /* version number */
        void                  *f_security;   /* security module */
        void                  *private_data; /* tty driver hook */
        structlist_head       f_ep_links;    /* list of eventpoll links */
        spinlock_t             f_ep_lock;    /* eventpoll lock */
        structaddress_space   *f_mapping;    /* page cache mapping */
};

文件对象的操作有file_operations结构表示，在linux/fs.h中，如下:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
structfile_operations { 
        structmodule *owner; 
        loff_t (*llseek) (structfile *, loff_t, int);
        ssize_t (*read) (structfile *, char*, size_t, loff_t *);
        ssize_t (*aio_read) (structkiocb *, char*, size_t, loff_t);
        ssize_t (*write) (structfile *, constchar*, size_t, loff_t *);
        ssize_t (*aio_write) (structkiocb *, constchar*, size_t, loff_t);
        int(*readdir) (structfile *, void*, filldir_t); 
        unsignedint(*poll) (structfile *, structpoll_table_struct *); 
        int(*ioctl) (structinode *, structfile *, unsigned int, unsignedlong);
        int(*mmap) (structfile *, structvm_area_struct *); 
        int(*open) (structinode *, structfile *); 
        int(*flush) (structfile *); 
        int(*release) (structinode *, structfile *); 
        int(*fsync) (structfile *, structdentry *, int);
        int(*aio_fsync) (structkiocb *, int);
        int(*fasync) (int,structfile *, int);
        int(*lock) (structfile *, int,structfile_lock *); 
        ssize_t (*readv) (structfile *, conststruct iovec *, 
                          unsignedlong, loff_t *);
        ssize_t (*writev) (structfile *, conststruct iovec *, 
                           unsignedlong, loff_t *);
        ssize_t (*sendfile) (structfile *, loff_t *, size_t,
                             read_actor_t,void*); 
        ssize_t (*sendpage) (structfile *, structpage *, int,
                             size_t, loff_t *,int);
        unsignedlong(*get_unmapped_area) (structfile *, unsigned long,
                                            unsignedlong, unsignedlong,
                                            unsignedlong);
        int(*check_flags) (intflags); 
        int(*dir_notify) (structfile *filp, unsigned longarg); 
        int(*flock) (structfile *filp, intcmd, structfile_lock *fl); 
};

最后，除了以上几种VFS基础对象外，内核还使用了另外一些数据结构来管理文件系统的其它相关数据，如下：

1.file_system_type:因为linux支持众多的文件系统，所以内核必有由一个特殊的结构来描述每种文件系统的功能和行为：

?
1
2
3
4
5
6
7
8
9
10
11
12
structfile_system_type { 
        constchar             *name;     /* filesystem's name */
        structsubsystem        subsys;    /* sysfs subsystem object */
        int                    fs_flags;  /* filesystem type flags */
        /* the following is used to read the superblock off the disk */
        structsuper_block      *(*get_sb) (structfile_system_type *, int,char*, void*); 
        /* the following is used to terminate access to the superblock */
        void                   (*kill_sb) (structsuper_block *); 
        structmodule           *owner;    /* module owning the filesystem */
        structfile_system_type *next;     /* next file_system_type in list */
        structlist_head        fs_supers; /* list of superblock objects */
};

其中，get_sb()函数从磁盘上读取超级块，并且在文件系统被安装时，在内存中组装超级块对象，剩余的函数描述文件系统的属性。每种文件系统，不管有多少个实力安装到系统中，还是根本就没有安装到系统中，都只有一个file_system_type结构。更有趣的是，当文件系统被实际安装时，将有一个vfsmount结构体在安装点被创建。该结构体被用来代表文件系统的实例----换句话说，代表一个安装点.

2.vfsmount结构被定义在linux/mount.h中，下面是具体结构：

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
structvfsmount { 
        structlist_head   mnt_hash;        /* hash table list */
        structvfsmount    *mnt_parent;     /* parent filesystem */
        structdentry      *mnt_mountpoint; /* dentry of this mount point */
        structdentry      *mnt_root;       /* dentry of root of this fs */
        structsuper_block *mnt_sb;         /* superblock of this filesystem */
        structlist_head   mnt_mounts;      /* list of children */
        structlist_head   mnt_child;       /* list of children */
        atomic_t           mnt_count;      /* usage count */
        int               mnt_flags;       /* mount flags */
        char              *mnt_devname;    /* device file name */
        structlist_head   mnt_list;        /* list of descriptors */
        structlist_head   mnt_fslink;      /* fs-specific expiry list */
        structnamespace  *mnt_namespace   /* associated namespace */
};

vfs中维护的各种链表是为了跟踪文件系统和所有其他安装点的关系，mnt_flags保存了安装时指定的标志信息，下表给出了标准的安装标志：

安装那些管理不充分信任的移动设备时，这些标志很有用处。

系统中每一个进程都有自己的一组打开的文件，有三个数据结构将VFS层和文件的进程紧密联系在一起，它们分别是file_struct,fs_struct和namespace.

1.file_struct:该结构体有进程描述符中的files域指向，如下：

?
1
2
3
4
5
6
7
8
9
10
11
12
13
structfiles_struct { 
        atomic_t    count;             /* structure's usage count */
        spinlock_t  file_lock;         /* lock protecting this structure */
        int        max_fds;            /* maximum number of file objects */
        int        max_fdset;          /* maximum number of file descriptors */
        int        next_fd;            /* next file descriptor number */
        structfile **fd;               /* array of all file objects */
        fd_set      *close_on_exec;    /* file descriptors to close on exec() */
        fd_set      *open_fds;          /* pointer to open file descriptors */
        fd_set      close_on_exec_init;/* initial files to close on exec() */
        fd_set      open_fds_init;     /* initial set of file descriptors */
        structfile *fd_array[NR_OPEN_DEFAULT]; /* default array of file objects */
};

fd数组指针指向以打开的文件对象链表，默认情况下，指向fd_arrar数组。NR_OPEN_DEFAULT默认是32，所以该数组可以容纳32个文件对象。如果一个进程所打开的文件对象超过32个，内核将分配一个新数组，并且将fd指针指向它。这个值也是可以调整的。
2.第二个结构体是fs_struct：由进程描述符的fs域指向。它包含文件系统和进程相关的信息，在linux/fs_struct.h中，如下：

?
1
2
3
4
5
6
7
8
9
10
11
structfs_struct { 
        atomic_t        count;      /* structure usage count */
        rwlock_t        lock;       /* lock protecting structure */
        int            umask;       /* default file permissions*/
        structdentry   *root;       /* dentry of the root directory */
        structdentry   *pwd;        /* dentry of the current directory */
        structdentry   *altroot;    /* dentry of the alternative root */
        structvfsmount *rootmnt;    /* mount object of the root directory */
        structvfsmount *pwdmnt;     /* mount object of the current directory */
        structvfsmount *altrootmnt; /* mount object of the alternative root */
};

该结构包含了当前进程的当前工作目录和根目录。
3.最后一个是namespace：由进程描述符namespace域指向，定义在linux/namespace.h中,如下:

?
1
2
3
4
5
6
structnamespace{ 
        atomic_t            count;/* structure usage count */
        structvfsmount     *root; /* mount object of root directory */
        structlist_head    list;  /* list of mount points */
        structrw_semaphore sem;   /* semaphore protecting the namespace */
};

list域是连接已安装文件系统的双向链表，它包含的元素组成了全体命令空间。上述这些数据结构都是通过进程描述符连接起来的。对多数进程来说，它们的描述符都指向唯一的files_struct和fs_struct结构体。但是，对于那些使用克隆标志CLONE_FILES或CLONE_FS创建的进程，会共享这两个结构体。所以多个进程描述符可能指向同一个files_struct或fs_struct结构体。每个结构体都维护一个count域作为引用计数，它防止进程正使用该结构时，该结构被销毁。而namespace却不是这样，默认情况下，所有的进程共享同样的命名空间，也就是说，它们都看到同一个文件层层结构。只有在进行clone()操作时使用CLONE_NEWS标志，才会给进程一个另外的命名空间结构体的拷贝。