vfs的数据结构

来源：互联网发布：林忆莲野风知乎编辑：程序博客网时间：2024/05/22 04:56

每个VFS对象都存放在一个恰当的数据结构中，其中包括对象的属性和指向对象方法表的指针。内核可以动态地修改对象的方法，因此可以为对象建立专用的行为。

超级块对象

struct super_block {    struct list_head    s_list;     /* 所有的super_block都通过s_list链接在一起 */    dev_t           s_dev;      /*用设备编号标识文件系统所在的块设备 */    unsigned char       s_blocksize_bits; //用字节数取2的对数标识文件系统的块尺寸    unsigned long       s_blocksize;  //用字节数标识文件系统的块尺寸    loff_t          s_maxbytes; /* Max file size */    struct file_system_type *s_type; //文件系统类型    const struct super_operations   *s_op; /* 超级块方法 */    const struct dquot_operations   *dq_op;  /* 磁盘限额处理方法 */    const struct quotactl_ops   *s_qcop; /* 磁盘限额管理方法 */    const struct export_operations *s_export_op;   /* 网络文件系统使用的输出操作 */    unsigned long       s_flags; /* 安装标志 */    unsigned long       s_magic;  /* 文件系统的魔数 */    struct dentry       *s_root; /* 文件系统根目录的目录项对象 */    struct rw_semaphore s_umount;  /* 卸载所用的信号量 */    int         s_count;  /* 超级快引用计数器 */    atomic_t        s_active;  /* 超级快次级引用计数器 */#ifdef CONFIG_SECURITY    void                    *s_security;/* 指向超级块安全数据结构的指针 */#endif    const struct xattr_handler **s_xattr; /* 指向超级块扩展属性结构的指针 */    struct list_head    s_inodes;   /* 该文件系统实例内的所有inode，都通过它们的i_sb_list成员挂接到s_inodes起始的双向链表中 */    struct hlist_bl_head    s_anon;     /*  /* 用于处理远程网络文件系统的匿名目录项的链表 */*/    struct list_head    s_mounts;   /* list of mounts; _not_ for fs use */    struct block_device *s_bdev; /*指向块设备在内存中的block_device结构*/    struct backing_dev_info *s_bdi;     struct mtd_info     *s_mtd;    struct hlist_node   s_instances; /* 用于给定文件系统类型的超级块对象链表的指针 */    struct quota_info   s_dquot;    /* 磁盘限额的描述符 */    struct sb_writers   s_writers;    char s_id[32];               /* 包含超级块的块设备名称 */    u8 s_uuid[16];              /* UUID */    void            *s_fs_info;  /* 指向特定文件系统的超级块信息的指针 */    unsigned int        s_max_links;    fmode_t         s_mode;    /* Granularity of c/m/atime in ns.       Cannot be worse than a second */    u32        s_time_gran;    /*     * The next field is for VFS *only*. No filesystems have any business     * even looking at it. You had been warned.     */    struct mutex s_vfs_rename_mutex;    /* 当VFS通过目录重命名文件时使用的信号量 */    /*     * Filesystem subtype.  If non-empty the filesystem type field     * in /proc/mounts will be "type.subtype"     */    char *s_subtype;    /*     * Saved mount options for lazy filesystems using     * generic_show_options()     */    char __rcu *s_options;    const struct dentry_operations *s_d_op; /* default d_op for dentries */    /*     * Saved pool identifier for cleancache (-1 means none)     */    int cleancache_poolid;    struct shrinker s_shrink;   /* per-sb shrinker handle */    /* Number of inodes with nlink == 0 but still referenced */    atomic_long_t s_remove_count;    /* Being remounted read-only */    int s_readonly_remount;    /* AIO completions deferred from interrupt context */    struct workqueue_struct *s_dio_done_wq;    /*     * Keep the lru lists last in the structure so they always sit on their     * own individual cachelines.     */    struct list_lru     s_dentry_lru ____cacheline_aligned_in_smp;    struct list_lru     s_inode_lru ____cacheline_aligned_in_smp;    struct rcu_head     rcu;};

s_fs_info字段指向属于具体文件系统的超级块信息；例如，假如超级块对象指的是Ext2文件系统，该字段就指向ext2_sb_info数据结构，该结构包括磁盘分配位掩码和其他与VFS的通用文件模型无关的数据。
通常，为了效率起见，由s_fs_info字段所指向的数据被复制到内存。任何基于磁盘的文件系统都需要访问和更改自己的磁盘分配位图，以便分配或释放磁盘块。VFS允许这些文件系统直接对内存超级块的s_fs_info字段进行操作，而无需访问磁盘。
这种方法带来一个新问题：有可能VFS超级块最终不再与磁盘上相应的超级块同步，即读入内存的s_fs_info数据为脏。因此，有必要引入一个s_dirt标志来表示该超级块是否是脏的——那磁盘上的数据是否必须要更新。linux是通过周期性地将所有“脏”的超级块写回磁盘来减少该问题带来的危害。

与超级块关联的方法就是所谓的超级块操作。

每一个具体的文件系统都可以定义自己的超级块操作。

struct super_operations {     /* 为索引节点对象分配空间，包括具体文件系统的数据所需要的空间。 */    struct inode *(*alloc_inode)(struct super_block *sb);    /* 用通过传递参数指定的索引节点对象的内容更新一个文件系统的索引节点。  * 索引节点对象的i_ino字段标识所涉及磁盘上文件系统的索引节点。*/    int (*write_inode) (struct inode *, struct writeback_control *wbc);    ....};

这里只列出了两个比较重要常用的方法。

为了更好的理解。再此举一个具体文件系统ext2的例子

static const struct super_operations ext2_sops = {    .alloc_inode    = ext2_alloc_inode,    ....    .write_inode    = ext2_write_inode,    ....}

static struct inode *ext2_alloc_inode(struct super_block *sb){    struct ext2_inode_info *ei; //这是ext2文件系统自己定义的ext2版的inode    ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); //分配一个ext2的inode    if (!ei)        return NULL;    ei->i_block_alloc_info = NULL;    ei->vfs_inode.i_version = 1;    return &ei->vfs_inode;//可以看出一般具体文件系统定义的inode结构都在内部包含了vfs的inode数据结构}

/* * second extended file system inode data in memory */struct ext2_inode_info {    __le32  i_data[15];    __u32   i_flags;    __u32   i_faddr;    ....    struct inode    vfs_inode;    struct list_head i_orphan;  /* unlinked but open inodes */};

索引节点对象

inode包含了文件系统中一个文件的所有信息，inode和文件系统中的文件是一一对应的。借助于inode的这些信息，文件系统可以方便的操作文件

struct inode {    umode_t         i_mode;  /* 文件类型与访问权限 */    const struct inode_operations   *i_op;/* 索引节点的操作 */    struct super_block  *i_sb;/*该文件所属的super block*/    struct address_space    *i_mapping; /* 指向缓存address_space对象的指针 */    /* Stat data, not accessed from path walking */    unsigned long       i_ino; /* 索引节点号 */    dev_t           i_rdev; /* 实设备标识符 */    loff_t          i_size; /* 文件的字节数 */    spinlock_t      i_lock; /* i_blocks, i_bytes, maybe i_size */    unsigned short          i_bytes;  /* 文件中最后一个块的字节数 */    unsigned int        i_blkbits; /* 块的位数 */    blkcnt_t        i_blocks;/* 文件的块数 */    unsigned long       i_state; /* 索引节点的状态标志 */    struct mutex        i_mutex;/* 索引节点信号量 */    unsigned long       dirtied_when;   /* 索引节点的弄脏时间（以节拍为单位） */    /*系统中所有的inode都存放在hash_table，以方便对某个inode的快速查找，hash算法是    通过超级块和inode number计算的，这两项组合可以唯一确定一个inode。i_hash用作hash    表的冲突管理。*/    struct hlist_node   i_hash;    struct list_head    i_wb_list;  /* backing dev IO list */    struct list_head    i_lru;      /* inode LRU list */     /*文件必定属于某个文件系统实例，我们可以把super_block看做这个文件系统实例的代表，    该文件系统实例的所有inode都通过i_sb_list挂接到super_block的s_inodes链表上。*/    struct list_head    i_sb_list;    union {        struct hlist_head   i_dentry;        struct rcu_head     i_rcu;    };    u64         i_version;    atomic_t        i_count;//inode的使用计数，inode对应的文件可以被多个进程同时访问。    const struct file_operations    *i_fop; /* 缺省文件操作*/    struct file_lock    *i_flock;/* 指向文件锁链表的指针 */    struct address_space    i_data; /* 嵌入在inode中的文件的address_space对象 */#ifdef CONFIG_QUOTA    struct dquot        *i_dquot[MAXQUOTAS]; /* 索引节点磁盘限额 *#endif    struct list_head    i_devices;  /* 用于具体的字符或块设备索引节点链表的指针 */    union {        struct pipe_inode_info  *i_pipe;        struct block_device *i_bdev; /* 指向块设备驱动程序的指针 */        struct cdev     *i_cdev; /* 指向字符设备驱动程序的指针 */    };    ....};

每个索引节点对象都会复制磁盘索引节点包含的一些数据，比如分配给文件的磁盘块数。我们通常先分配索引节点对象，然后从磁盘索引节点读取来的数据填充。
索引节点对象也存放在一个称为inode_hashtable的散列表中。散列表加快了对索引节点对象的搜索，前提是系统内核要知道索引节点号及文件所在文件系统对应的超级块对象的地址。由于散列技术可能引发冲突，所以索引节点对象包含一个i_hash字段，该字段中包含向前和向后的两个指针，分别指向散列到同一地址的前一个索引节点和后一个索引节点；该字段因此创建了由这些索引节点组成的一个双向链表。

与索引节点对象关联的方法也叫索引节点操作。它们由inode_operations结构来描述，该结构的地址存放在i_op字段中：

struct inode_operations {    /* 为包含在一个目录项对象中的文件名对应的索引节点查找目录。 */    struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);     /* 在某一目录下，为与目录项对象相关的普通文件创建一个新的磁盘索引节点。 */    int (*create) (struct inode *,struct dentry *, umode_t, bool);     /* 在某个目录下，为与目录项对象相关的目录创建一个新的索引节点。 */    int (*mkdir) (struct inode *,struct dentry *,umode_t);     /* 从一个目录删除子目录，子目录的名称包含在目录项对象中。 */    int (*rmdir) (struct inode *,struct dentry *);     /* 在某个目录中，为与目录项对象相关的特定文件创建一个新的磁盘索引节点。      其中参数mode和rdev分别表示文件的类型和设备的主次设备号。 */    int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);    ....} ____cacheline_aligned;

我只列出了常用的几种方法，多了容易绕昏，可能看了注释你还是不太明白，后面的文章我们将会举具体文件系统的inode_operations例子即使用，来帮助大家更好理解。
上述列举的方法对所有可能的索引节点和文件系统类型都是可用的。不过，只有其中的一个子集应用到某一特定的索引节点和文件系统，未实现的方法对应的字段被置为NULL。

文件对象

文件对象是在文件被打开时创建的，由一个file结构组成。注意，文件对象在磁盘上没有对应的映像，虽然file看起来也是表示一个文件，但是要记住，file是进程相关的；而inode是进程无关的。因此一个文件在系统内只有一个inode；而一个文件在系统内可能有多个file结构，分别属于不同的进程。

struct file {    union {        struct llist_node   fu_llist;        struct rcu_head     fu_rcuhead;    } f_u;  /* 用于通用文件对象链表的指针 */    struct path     f_path;#define f_dentry    f_path.dentry   /* 与文件相关的目录项对象 */    struct inode        *f_inode;   /* cached value */    const struct file_operations    *f_op; /* 指向文件操作表的指针 */    /*     * Protects f_ep_links, f_flags, f_pos vs i_size in lseek SEEK_CUR.     * Must not be taken from IRQ context.     */    spinlock_t      f_lock;    //引用计数，使用file对象的进程数目。比如使用CLONE_FILES创建进程时，这些进程会共享打开的文件，因此会使用相同的file对象。    atomic_long_t       f_count;    unsigned int        f_flags; //打开文件时，传递的打开标志    fmode_t         f_mode; //打开文件时，传递的模式参数    //是一个很重的值，这个值表示文件的读写位置，这个值是不可以放在inode中的，因为一个inode可能对应多个打开的file结构。    loff_t          f_pos;    struct fown_struct  f_owner;    const struct cred   *f_cred;    struct file_ra_state    f_ra;   /* 文件预读状态 */    void            *private_data;#ifdef CONFIG_EPOLL    /* Used by fs/eventpoll.c to link all the hooks to this file */    struct list_head    f_ep_links;  /* 文件的事件轮询等待者链表的头 */    struct list_head    f_tfile_llink;#endif /* #ifdef CONFIG_EPOLL */    struct address_space    *f_mapping;  /* 指向文件地址空间对象的指针 */#endif};

文件对象通过一个名为filp的slab高速缓存分配，filp描述符地址存放在file_cachep变量中。Linux对分配的文件对象数目是有限制的，因此files_stat变量在其max_files字段中指定了可分配文件对象的最大数目，也就是系统可同时访问的最大文件数。

在使用文件对象包含在由具体文件系统的超级块所确立的几个链表中。每个超级块对象把文件对象链表的头存放在s_files字段中；因此，属于不同文件系统的文件对象就包含在不同的链表中。链表中分别指向前一个元素和后一个元素的指针都存放在文件对象的f_list字段中。

当VFS代表进程必须打开一个文件时，它调用get_empty_filp()函数来分配一个新的文件对象。该函数调用kmem_cache_alloc()从filp高速缓存中获得一个空闲的文件对像，然后初始化这个对象的字段。

每个文件系统都有其自己的文件操作集合，执行诸如读写文件这样的操作。当内核将一个索引节点从磁盘装入内存时，就会把指向这些文件操作的指针存放在file_operations结构中，而该结构的地址存放在该索引节点对象的i_fop字段中。当进程打开这个文件时，VFS就用存放在索引节点中的这个地址初始化新文件对象的fop字段，使得对文件操作的后续调用能够使用这些函数。如果需要，VFS随后也可以通过在f_op字段存放一个新值而修改文件操作的集合：

struct file_operations {     /* 指向一个模块的拥有者，该字段主要应用于那些有模块产生的文件系统 */    struct module *owner;       loff_t (*llseek) (struct file *, loff_t, int);//更新文件指针    ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);    ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);    ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); //启动一个异步写I/O操作    ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); // 启动一个异步写I/O操作    int (*iterate) (struct file *, struct dir_context *);     /* 检查是否在一个文件上有操作发生，如果没有则睡眠，直到该文件上有操作发生。 */    unsigned int (*poll) (struct file *, struct poll_table_struct *);    /* 与ioctl方法类似，但是它不用获得大内核锁。  * 我们认为所有的设备驱动程序和文件系统都将使用这个新方法而不是loctl方法。 */    long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);     /* 64位的内核使用该方法执行32位的系统调用ioctl()。 */    long (*compat_ioctl) (struct file *, unsigned int, unsigned long);     /* 执行文件的内存映射，并将映射放入进程的地址空间。 */    int (*mmap) (struct file *, struct vm_area_struct *);     /* 通过创建一个新的文件对象而打开一个文件，并把它链接到相应的索引节点对象。 */    int (*open) (struct inode *, struct file *);     /* 当打开文件的引用被关闭时调用该方法。该方法的实际用途取决于文件系统。 */    int (*flush) (struct file *, fl_owner_t id);    /* 释放文件对象。当打开文件的最后一个引用被关闭时（即文件对象f_count字段的值变为0时）调用该方法。 */    int (*release) (struct inode *, struct file *);     /* 将文件所缓存的全部数据写入磁盘。 */    int (*fsync) (struct file *, loff_t, loff_t, int datasync);     /* 启动一次异步I/O刷新操作。 */    int (*aio_fsync) (struct kiocb *, int datasync);     /* 通过信号来启用或禁止I/O事件通告。 */    int (*fasync) (int, struct file *, int);     /* 对file文件申请一个锁。 */    int (*lock) (struct file *, int, struct file_lock *);     /* 把数据从文件传送到页高速缓存的页；这个低层方法由sendfile()和用于套接字的网络代码使用。 */    ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);     /* 获得一个未用的地址范围来映射文件。 */    unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);    long (*fallocate)(struct file *file, int mode, loff_t offset,              loff_t len);    ....};

以上描述的方法对所有可能的文件类型都是可用的。不过，对于一个具体的文件类型，只使用其中的一个子集；那些未实现的方法对应的字段被置为NULL。

目录项对象

VFS把每个目录看作由若干子目录和文件组成的一个普通文件。磁盘文件系统的目录结构保存在磁盘目录项中，而块设备读取速度慢，需要很长时间才能找到与一个文件名对应的inode，Linux引入了目录项对象来利用之前查找的结果，一旦目录项被读人内存，VFS就把它转换成基于dentry结构的一个目录项对象。对于进程查找的路径名中的每个分量，内核都为其创建一个目录项对象；目录项对象将每个分量与其对应的索引节点相联系。例如，在查找路名/tmp/test时，内核为根目录“/“创建一个目录项对象，为根目录下的tmp项创建一个第二级目录项对象，为/tmp目录下的test项创建一个第三级目录项对象。

struct dentry {    /* RCU lookup touched fields */    unsigned int d_flags;    /* 目录项高速缓存标志 */ */    seqcount_t d_seq;       /* per dentry seqlock */    struct hlist_bl_node d_hash;    /* 内存中所有的dentry都保存在hash表中d_hash是为了处理hash冲突的*/    struct dentry *d_parent;    /* 指向这个dentry的父dentry，对于根目录d_parent指向自身 */    /*指定了文件的名称，qstr是一个包装器，存储了字符串的长度，hash值和字符串本身。****字符    串不是一个绝对路径，而是当前的分量****。如果文件的名称小于DNAME_INLINE_LEN_MIN，那么     d_name->name指向d_iname，否则要通过kmalloc进行分配。*/    struct qstr d_name;    struct inode *d_inode;      /*dentry对应的inode，这个是dentry最中要的成员，因为dentry的主要做用就是通过路径名查找inode*/    unsigned char d_iname[DNAME_INLINE_LEN];    /* small names */    /* Ref lookup also touches following */    struct lockref d_lockref;   /* per-dentry lock and refcount */    const struct dentry_operations *d_op;  /* 目录项方法 */    struct super_block *d_sb;   /* 指向该目录项所在文件系统实例的超级块*/    unsigned long d_time;       /* used by d_revalidate */    void *d_fsdata;         /* fs-specific data */    /*表头是dentry_unused，所有引用计数为0的dentry都会放到这个LRU链表中，并且插在链表前面，因此靠后的节点，表示越老。*/    struct list_head d_lru;     /* LRU list */    /*     * d_child and d_rcu can share memory     */    union {        struct list_head d_child;    /* 对目录而言，用于同一父目录中的目录项链表的指针 */        struct rcu_head d_rcu;  /* 回收目录项对象时，由RCU描述符使用 */    } d_u;    struct list_head d_subdirs; /* 所有的子dentry都通过他们的d_child链接到父亲的d_subdirs*/    struct hlist_node d_alias;  /* 用于与同一索引节点（别名）相关的目录项链表的指针*/};

与目录项对象关联的方法称为目录项操作。这些方法由dentry_operations结构加以描述，该结构的地址存放在目录项对象的d_op字段中。尽管一些文件系统定义了它们自己的目录项方法，但是这些字段通常为NULL，而VFS使用缺省函数代替这些方法。这里就不详细介绍了。

阅读全文

0 0