uclinux内核中根文件系统的建立

来源：互联网发布：互联网搜索算法编辑：程序博客网时间：2024/04/28 04:33

快乐虾

http://blog.csdn.net/lights_joy/

lights@wo.com.cn

本文适用于

bfin-uclinux-2009r1.1

代码移植到vs2008

欢迎转载，但请保留作者信息

在init_rootfs注册rootfs_fs_type之后，内核调用init_mount_tree函数创建系统中的根目录，此函数位于fs/namespace.c：

static void __init init_mount_tree(void)

{

struct vfsmount *mnt;

struct mnt_namespace *ns;

struct path root;

mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);

if (IS_ERR(mnt))

panic("Can't create rootfs");

ns = (mnt_namespace *)kmalloc(sizeof(*ns), GFP_KERNEL);

if (!ns)

panic("Can't allocate initial namespace");

atomic_set(&ns->count, 1);

INIT_LIST_HEAD(&ns->list);

init_waitqueue_head(&ns->poll);

ns->event = 0;

list_add(&mnt->mnt_list, &ns->list);

ns->root = mnt;

mnt->mnt_ns = ns;

init_task.nsproxy->mnt_ns = ns;

get_mnt_ns(ns);

root.mnt = ns->root;

root.dentry = ns->root->mnt_root;

set_fs_pwd(current->fs, &root);

set_fs_root(current->fs, &root);

}

首先，init_mount_tree() 函数会调用 do_kern_mount("rootfs", 0, "rootfs", NULL) 来挂载前面已经注册了的 rootfs 文件系统。这个函数内部自然会创建我们最关心也是最关键的根目录。

1.1 do_kern_mount

这个函数位于fs/super.c：

struct vfsmount *

do_kern_mount(const char *fstype, int flags, const char *name, void *data)

{

struct file_system_type *type = get_fs_type(fstype);

struct vfsmount *mnt;

if (!type)

return (vfsmount *)ERR_PTR(-ENODEV);

mnt = vfs_kern_mount(type, flags, name, data);

if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&

!mnt->mnt_sb->s_subtype)

mnt = fs_set_subtype(mnt, fstype);

put_filesystem(type);

return mnt;

}

它的调用栈为：

> bfin-uclinux-kernel-2009r1.1.dll!do_kern_mount(const char * fstype=0x100b04f0, int flags=0x00000000, const char * name=0x100b04f0, void * data=0x00000000) 行968 C++

bfin-uclinux-kernel-2009r1.1.dll!init_mount_tree() 行2288 + 0x13 字节 C++

bfin-uclinux-kernel-2009r1.1.dll!mnt_init() 行2341 C++

bfin-uclinux-kernel-2009r1.1.dll!vfs_caches_init(unsigned long mempages=0x000037b6) 行2353 C++

bfin-uclinux-kernel-2009r1.1.dll!start_kernel(int size=0x00000040) 行687 + 0xb 字节 C++

此函数首先根据名称查找表示文件系统的结构体，对于rootfs，显然type指向

static struct file_system_type rootfs_fs_type = {

.name = "rootfs",

.get_sb = rootfs_get_sb,

.kill_sb = kill_litter_super,

};

接着将此参数传递给vfs_kern_mount，看看这个函数做的工作：

1.2 vfs_kern_mount

这个函数同样位于fs/super.c：

struct vfsmount *

vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)

{

struct vfsmount *mnt;

char *secdata = NULL;

int error;

if (!type)

return (vfsmount *)ERR_PTR(-ENODEV);

error = -ENOMEM;

mnt = alloc_vfsmnt(name);

if (!mnt)

goto out;

if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {

secdata = alloc_secdata();

if (!secdata)

goto out_mnt;

error = security_sb_copy_data((char *)data, secdata);

if (error)

goto out_free_secdata;

}

error = type->get_sb(type, flags, name, data, mnt);

if (error < 0)

goto out_free_secdata;

BUG_ON(!mnt->mnt_sb);

error = security_sb_kern_mount(mnt->mnt_sb, secdata);

if (error)

goto out_sb;

mnt->mnt_mountpoint = mnt->mnt_root;

mnt->mnt_parent = mnt;

up_write(&mnt->mnt_sb->s_umount);

free_secdata(secdata);

return mnt;

out_sb:

dput(mnt->mnt_root);

up_write(&mnt->mnt_sb->s_umount);

deactivate_super(mnt->mnt_sb);

out_free_secdata:

free_secdata(secdata);

out_mnt:

free_vfsmnt(mnt);

out:

return (vfsmount *)ERR_PTR(error);

}

在这个场景里，do_kern_mount() 做的工作主要是：

1.2.1 alloc_vfsmount

在此函数的开头：

mnt = alloc_vfsmnt(name);

这个函数调用将创建并初始化一个struct vfsmount，初始化完成后各成员的值为：

mnt_hash

{next=0x00ed7820 prev=0x00ed7820 }

list_head

mnt_parent

0x00000000 {mnt_hash={...} mnt_parent=??? mnt_mountpoint=??? ...}

vfsmount *

mnt_mountpoint

0x00000000 {d_count={...} d_flags=??? d_lock={...} ...}

dentry *

mnt_root

0x00000000 {d_count={...} d_flags=??? d_lock={...} ...}

dentry *

mnt_sb

0x00000000 {s_list={...} s_dev=??? s_blocksize=??? ...}

super_block *

mnt_mounts

{next=0x00ed7838 prev=0x00ed7838 }

list_head

mnt_child

{next=0x00ed7840 prev=0x00ed7840 }

list_head

mnt_flags

0x00000000

int

mnt_devname

0x00ecf460 "rootfs"

const char *

mnt_list

{next=0x00ed7850 prev=0x00ed7850 }

list_head

mnt_expire

{next=0x00ed7858 prev=0x00ed7858 }

list_head

mnt_share

{next=0x00ed7860 prev=0x00ed7860 }

list_head

mnt_slave_list

{next=0x00ed7868 prev=0x00ed7868 }

list_head

mnt_slave

{next=0x00ed7870 prev=0x00ed7870 }

list_head

mnt_master

0x00000000 {mnt_hash={...} mnt_parent=??? mnt_mountpoint=??? ...}

vfsmount *

mnt_ns

0x00000000 {count={...} root=??? list={...} ...}

mnt_namespace *

mnt_id

0x00000000

int

mnt_group_id

0x00000000

int

mnt_count

{counter=0x00000001 }

atomic_t

mnt_expiry_mark

0x00000000

int

mnt_pinned

0x00000000

int

mnt_ghosts

0x00000000

int

__mnt_writers

{counter=0x00000000 }

atomic_t

可以看到，它们大部分的值为0。

1.2.2 rootfs_get_sb

在创建vfsmount结构体之后，vfs_kern_mount开始创建超级块：

error = type->get_sb(type, flags, name, data, mnt);

这个函数指针将指向rootfs_get_sb函数（fs/ramfs/inode.c）：

static int rootfs_get_sb(struct file_system_type *fs_type,

int flags, const char *dev_name, void *data, struct vfsmount *mnt)

{

return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,

mnt);

}

接着跟踪get_sb_nodev（fs/super.c）：

int get_sb_nodev(struct file_system_type *fs_type,

int flags, void *data,

int (*fill_super)(struct super_block *, void *, int),

struct vfsmount *mnt)

{

int error;

struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);

if (IS_ERR(s))

return PTR_ERR(s);

s->s_flags = flags;

error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);

if (error) {

up_write(&s->s_umount);

deactivate_super(s);

return error;

}

s->s_flags |= MS_ACTIVE;

return simple_set_mnt(mnt, s);

}

这个函数完成了几个工作，我们一一看过。

1.2.2.1 sget

get_sb_nodev函数调用的第一个函数就是sget：

/**

* sget - find or create a superblock

* @type: filesystem type superblock should belong to

* @test: comparison callback

* @set: setup callback

* @data: argument to each of them

struct super_block *sget(struct file_system_type *type,

int (*test)(struct super_block *,void *),

int (*set)(struct super_block *,void *),

void *data)

{

struct super_block *s = NULL;

struct super_block *old;

int err;

retry:

spin_lock(&sb_lock);

if (test) {

list_for_each_entry(super_block, old, &type->fs_supers, s_instances) {

if (!test(old, data))

continue;

if (!grab_super(old))

goto retry;

if (s)

destroy_super(s);

return old;

}

if (!s) {

spin_unlock(&sb_lock);

s = alloc_super(type);

if (!s)

return (super_block *)ERR_PTR(-ENOMEM);

goto retry;

}

err = set(s, data);

if (err) {

spin_unlock(&sb_lock);

destroy_super(s);

return (super_block *)ERR_PTR(err);

}

s->s_type = type;

strlcpy(s->s_id, type->name, sizeof(s->s_id));

list_add_tail(&s->s_list, &super_blocks);

list_add(&s->s_instances, &type->fs_supers);

spin_unlock(&sb_lock);

get_filesystem(type);

return s;

}

此函数的调用栈如下：

> bfin-uclinux-kernel-2009r1.1.dll!sget(file_system_type * type=0x100b6828, int (super_block *, void *)* test=0x00000000, int (super_block *, void *)* set=0x1003a955, void * data=0x00000000) 行339 C++

bfin-uclinux-kernel-2009r1.1.dll!get_sb_nodev(file_system_type * fs_type=0x100b6828, int flags=0x80000000, void * data=0x00000000, int (super_block *, void *, int)* fill_super=0x100a6dd0, vfsmount * mnt=0x00ed7820) 行838 + 0x12 字节 C++

bfin-uclinux-kernel-2009r1.1.dll!rootfs_get_sb(file_system_type * fs_type=0x100b6828, int flags=0x00000000, const char * dev_name=0x100b04f0, void * data=0x00000000, vfsmount * mnt=0x00ed7820) 行211 + 0x20 字节 C++

bfin-uclinux-kernel-2009r1.1.dll!vfs_kern_mount(file_system_type * type=0x100b6828, int flags=0x00000000, const char * name=0x100b04f0, void * data=0x00000000) 行914 + 0x1e 字节 C++

bfin-uclinux-kernel-2009r1.1.dll!do_kern_mount(const char * fstype=0x100b04f0, int flags=0x00000000, const char * name=0x100b04f0, void * data=0x00000000) 行972 + 0x15 字节 C++

bfin-uclinux-kernel-2009r1.1.dll!init_mount_tree() 行2288 + 0x13 字节 C++

bfin-uclinux-kernel-2009r1.1.dll!mnt_init() 行2341 C++

bfin-uclinux-kernel-2009r1.1.dll!vfs_caches_init(unsigned long mempages=0x000037b6) 行2353 C++

bfin-uclinux-kernel-2009r1.1.dll!start_kernel(int size=0x00000040) 行687 + 0xb 字节 C++

传递进来的test回调函数为NULL，data也为NULL，type指向rootfs_fs_type这一全局变量，set回调函数则指向set_anon_super（fs/super.c）。

因而此函数将首先调用alloc_super分配一个struct super_block，其值为：

1.2.2.1.1 alloc_super

此函数位于fs/super.c，用以分配一个super_block并进行初始化，初始化完成后这个结构体有如下值：

s_list

{next=0x00000000 prev=0x00000000 }

list_head

s_dev

0x00000000

unsigned int

s_blocksize

0x00000000

unsigned long

s_blocksize_bits

0x00

unsigned char

s_dirt

0x00

unsigned char

s_maxbytes

0x000000007fffffff

unsigned __int64

s_type

0x00000000 {name=??? fs_flags=??? get_sb=??? ...}

file_system_type *

s_op

0x100be278 default_op {alloc_inode=0x00000000 destroy_inode=0x00000000 dirty_inode=0x00000000 ...}

const super_operations *

dq_op

0x00000000 {initialize=??? drop=??? alloc_space=??? ...}

dquot_operations *

s_qcop

0x00000000 {quota_on=??? quota_off=??? quota_sync=??? ...}

quotactl_ops *

s_export_op

0x00000000 {encode_fh=??? fh_to_dentry=??? fh_to_parent=??? ...}

const export_operations *

s_flags

0x00000000

unsigned long

s_magic

0x00000000

unsigned long

s_root

0x00000000 {d_count={...} d_flags=??? d_lock={...} ...}

dentry *

s_umount

{activity=0x00000000 wait_lock={...} wait_list={...} }

rw_semaphore

s_lock

{count={...} wait_lock={...} wait_list={...} }

mutex

s_count

0x40000000

int

s_need_sync_fs

0x00000000

int

s_active

{counter=0x00000001 }

atomic_t

s_xattr

0x00000000

xattr_handler * *

s_inodes

{next=0x00ed5670 prev=0x00ed5670 }

list_head

s_dirty

{next=0x00ed5678 prev=0x00ed5678 }

list_head

s_io

{next=0x00ed5680 prev=0x00ed5680 }

list_head

s_more_io

{next=0x00ed5688 prev=0x00ed5688 }

list_head

s_anon

{first=0x00000000 }

hlist_head

s_files

{next=0x00ed5694 prev=0x00ed5694 }

list_head

s_dentry_lru

{next=0x00ed569c prev=0x00ed569c }

list_head

s_nr_dentry_unused

0x00000000

int

s_bdev

0x00000000 {bd_dev=??? bd_inode=??? bd_openers=??? ...}

block_device *

s_mtd

0x00000000

mtd_info *

s_instances

{next=0x00ed56b0 prev=0x00ed56b0 }

list_head

s_dquot

{flags=0x00000000 dqio_mutex={...} dqonoff_mutex={...} ...}

quota_info

s_frozen

0x00000000

int

s_wait_unfrozen

{lock={...} task_list={...} }

__wait_queue_head

s_id

0x00ed5790 ""

char [32]

s_fs_info

0x00000000

void *

s_mode

0x00000000

unsigned int

s_vfs_rename_mutex

{count={...} wait_lock={...} wait_list={...} }

mutex

s_time_gran

0x3b9aca00

unsigned int

s_subtype

0x00000000 <错误的指针>

char *

s_options

0x00000000 <错误的指针>

char *

此时还看不出file_system_type和super_block之间的关系。

1.2.2.1.2 set_anon_super

在使用alloc_super完成super_block结构体的分配后，sget接着调用set回调函数：

err = set(s, data);

对于rootfs，此回调函数指向set_anon_super（fs/super.c）

* Unnamed block devices are dummy devices used by virtual

* filesystems which don't use real block-devices. -- jrs

static DEFINE_IDA(unnamed_dev_ida);

static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */

int set_anon_super(struct super_block *s, void *data)

{

int dev;

int error;

retry:

if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)

return -ENOMEM;

spin_lock(&unnamed_dev_lock);

error = ida_get_new(&unnamed_dev_ida, &dev);

spin_unlock(&unnamed_dev_lock);

if (error == -EAGAIN)

/* We raced and lost with another CPU. */

goto retry;

else if (error)

return -EAGAIN;

if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {

spin_lock(&unnamed_dev_lock);

ida_remove(&unnamed_dev_ida, dev);

spin_unlock(&unnamed_dev_lock);

return -EMFILE;

}

s->s_dev = MKDEV(0, dev & MINORMASK);

return 0;

}

这个函数最重要的工作是设置s_dev的值，对于rootfs，这个值为0。

1.2.2.1.3 关联super_block和file_system_type

在完成super_block的初始化后，sget开始关联super_block和file_system_type：

s->s_type = type;

strlcpy(s->s_id, type->name, sizeof(s->s_id));

list_add_tail(&s->s_list, &super_blocks);

list_add(&s->s_instances, &type->fs_supers);

可以看到super_block结构体中的s_type指向了包含此超级块的file_system_type，且s_id复制了文件系统的名称。在这里super_blocks是一个全局变量，用以链接系统中所有的超级块。

从这里也可以看到一个super_block只有一个对应的file_system_type，但是一个file_system_type却可以有多个super_block。

1.2.2.1.4 get_filesystem

sget最后调用了一个叫get_filesystem的函数，但是由于未启用MODULE支持，此函数相当于什么也没做。

/* WARNING: This can be used only if we _already_ own a reference */

void get_filesystem(struct file_system_type *fs)

{

__module_get(fs->owner);

}

1.2.2.2 ramfs_fill_super

在完成sget调用后，rootfs_get_sb接着调用ramfs_fill_super函数（）：

static int ramfs_fill_super(struct super_block * sb, void * data, int silent)

{

struct inode * inode;

struct dentry * root;

sb->s_maxbytes = MAX_LFS_FILESIZE;

sb->s_blocksize = PAGE_CACHE_SIZE;

sb->s_blocksize_bits = PAGE_CACHE_SHIFT;

sb->s_magic = RAMFS_MAGIC;

sb->s_op = &ramfs_ops;

sb->s_time_gran = 1;

inode = ramfs_get_inode(sb, S_IFDIR | 0755, 0);

if (!inode)

return -ENOMEM;

root = d_alloc_root(inode);

if (!root) {

iput(inode);

return -ENOMEM;

}

sb->s_root = root;

return 0;

}

1.2.2.2.1 设置s_op

在alloc_super函数中将super_block的s_op指向了default_op，什么功能也没有，因而在这里首先替换了s_op，使其指向ramfs_ops：

const struct super_operations ramfs_ops = {

NULL,//struct inode *(*alloc_inode)(struct super_block *sb);

NULL,//void (*destroy_inode)(struct inode *);

NULL,//void (*dirty_inode) (struct inode *);

NULL,//int (*write_inode) (struct inode *, int);

/*.drop_inode =*/ generic_delete_inode,

NULL,//void (*delete_inode) (struct inode *);

NULL,//void (*put_super) (struct super_block *);

NULL,//void (*write_super) (struct super_block *);

NULL,//int (*sync_fs)(struct super_block *sb, int wait);

NULL,//void (*write_super_lockfs) (struct super_block *);

NULL,//void (*unlockfs) (struct super_block *);

/*.statfs =*/ simple_statfs,

};

1.2.2.2.2 ramfs_get_inode

ramfs_fill_super调用的第一个函数是ramfs_get_inode（fs/ramfs/inode.c）

struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev)

{

struct inode * inode = new_inode(sb);

if (inode) {

inode->i_mode = mode;

inode->i_uid = current->fsuid;

inode->i_gid = current->fsgid;

inode->i_blocks = 0;

inode->i_mapping->a_ops = &ramfs_aops;

inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;

mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);

mapping_set_unevictable(inode->i_mapping);

inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;

switch (mode & S_IFMT) {

default:

init_special_inode(inode, mode, dev);

break;

case S_IFREG:

inode->i_op = &ramfs_file_inode_operations;

inode->i_fop = &ramfs_file_operations;

break;

case S_IFDIR:

inode->i_op = &ramfs_dir_inode_operations;

inode->i_fop = &simple_dir_operations;

/* directory inodes start off with i_nlink == 2 (for "." entry) */

inc_nlink(inode);

break;

case S_IFLNK:

inode->i_op = &page_symlink_inode_operations;

break;

}

return inode;

}

此函数执行完后，将得到这样的一个inode：

i_hash

{next=0x00000000 pprev=0x00000000 }

hlist_node

i_list

{next=0x100b6348 prev=0x100b6348 }

list_head

i_sb_list

{next=0x00ed5670 prev=0x00ed5670 }

list_head

i_dentry

{next=0x04800de8 prev=0x04800de8 }

list_head

i_ino

0x00000001

unsigned long

i_count

{counter=0x00000001 }

atomic_t

i_nlink

0x00000002

unsigned int

i_uid

0x00000000

unsigned int

i_gid

0x00000000

unsigned int

i_rdev

0x00000000

unsigned int

i_version

0x0000000000000000

unsigned __int64

i_size

0x0000000000000000

__int64

i_atime

{tv_sec=0x00000000 tv_nsec=0x00000000 }

timespec

i_mtime

{tv_sec=0x00000000 tv_nsec=0x00000000 }

timespec

i_ctime

{tv_sec=0x00000000 tv_nsec=0x00000000 }

timespec

i_blkbits

0x0000000c

unsigned int

i_blocks

0x00000000

unsigned long

i_bytes

0x0000

unsigned short

i_mode

0x41ed

unsigned short

i_lock

{raw_lock={...} }

spinlock_t

i_mutex

{count={...} wait_lock={...} wait_list={...} }

mutex

i_alloc_sem

{activity=0x00000000 wait_lock={...} wait_list={...} }

rw_semaphore

i_op

0x100b09b8 struct inode_operations const ramfs_dir_inode_operations {create=0x100a6a40 lookup=0x1003a400 link=0x100394dd ...}

const inode_operations *

i_fop

0x100b0188 struct file_operations const simple_dir_operations {owner=0x00000000 llseek=0x1003a450 read=0x10039c71 ...}

const file_operations *

i_sb

0x00ed5600 {s_list={...} s_dev=0x00000000 s_blocksize=0x00001000 ...}

super_block *

i_flock

0x00000000 {fl_next=??? fl_link={...} fl_block={...} ...}

file_lock *

i_mapping

0x04800e74 {host=0x04800dd0 page_tree={...} tree_lock={...} ...}

address_space *

i_data

{host=0x04800dd0 page_tree={...} tree_lock={...} ...}

address_space

i_devices

{next=0x04800ec8 prev=0x04800ec8 }

list_head

i_pipe

0x00000000 {wait={...} nrbufs=??? curbuf=??? ...}

pipe_inode_info *

i_bdev

0x00000000 {bd_dev=??? bd_inode=??? bd_openers=??? ...}

block_device *

i_cdev

0x00000000 {kobj={...} owner=??? ops=??? ...}

cdev *

i_cindex

0x00000000

int

i_generation

0x00000000

unsigned int

i_state

0x00000000

unsigned long

dirtied_when

0x00000000

unsigned long

i_flags

0x00000000

unsigned int

i_writecount

{counter=0x00000000 }

atomic_t

i_private

0x00000000

void *

1.2.2.2.2.1 new_inode

ramfs_get_inode函数调用的第一个函数就是new_inode（fs/inode.c）

/**

* new_inode - obtain an inode

* @sb: superblock

* Allocates a new inode for given superblock. The default gfp_mask

* for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE.

* If HIGHMEM pages are unsuitable or it is known that pages allocated

* for the page cache are not reclaimable or migratable,

* mapping_set_gfp_mask() must be called with suitable flags on the

* newly created inode's mapping

struct inode *new_inode(struct super_block *sb)

{

* On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW

* error if st_ino won't fit in target struct field. Use 32bit counter

* here to attempt to avoid that.

static unsigned int last_ino;

struct inode * inode;

spin_lock_prefetch(&inode_lock);

inode = alloc_inode(sb);

if (inode) {

spin_lock(&inode_lock);

inodes_stat.nr_inodes++;

list_add(&inode->i_list, &inode_in_use);

list_add(&inode->i_sb_list, &sb->s_inodes);

inode->i_ino = ++last_ino;

inode->i_state = 0;

spin_unlock(&inode_lock);

}

return inode;

}

1.2.2.2.2.1.1 alloc_inode

new_inode调用的第一个函数为alloc_inode：

static struct inode *alloc_inode(struct super_block *sb)

{

static const struct address_space_operations empty_aops;

static struct inode_operations empty_iops;

static const struct file_operations empty_fops;

struct inode *inode;

if (sb->s_op->alloc_inode)

inode = sb->s_op->alloc_inode(sb);

else

inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL);

if (inode) {

struct address_space * const mapping = &inode->i_data;

inode->i_sb = sb;

inode->i_blkbits = sb->s_blocksize_bits;

inode->i_flags = 0;

atomic_set(&inode->i_count, 1);

inode->i_op = &empty_iops;

inode->i_fop = &empty_fops;

inode->i_nlink = 1;

atomic_set(&inode->i_writecount, 0);

inode->i_size = 0;

inode->i_blocks = 0;

inode->i_bytes = 0;

inode->i_generation = 0;

#ifdef CONFIG_QUOTA

memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));

#endif

inode->i_pipe = NULL;

inode->i_bdev = NULL;

inode->i_cdev = NULL;

inode->i_rdev = 0;

inode->dirtied_when = 0;

if (security_inode_alloc(inode)) {

if (inode->i_sb->s_op->destroy_inode)

inode->i_sb->s_op->destroy_inode(inode);

else

kmem_cache_free(inode_cachep, (inode));

return NULL;

}

spin_lock_init(&inode->i_lock);

lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);

mutex_init(&inode->i_mutex);

lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);

init_rwsem(&inode->i_alloc_sem);

lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);

mapping->a_ops = &empty_aops;

mapping->host = inode;

mapping->flags = 0;

mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);

mapping->assoc_mapping = NULL;

mapping->backing_dev_info = &default_backing_dev_info;

mapping->writeback_index = 0;

* If the block_device provides a backing_dev_info for client

* inodes then use that. Otherwise the inode share the bdev's

* backing_dev_info.

if (sb->s_bdev) {

struct backing_dev_info *bdi;

bdi = sb->s_bdev->bd_inode_backing_dev_info;

if (!bdi)

bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;

mapping->backing_dev_info = bdi;

}

inode->i_private = NULL;

inode->i_mapping = mapping;

}

return inode;

}

这个函数首先分配一个inode结构体，然后关联inode和super_block：

inode->i_sb = sb;

也由此可以看到inode和super_block之间是单向联系的！

还有inode的i_mapping初始化时将指向自身的i_data！

初始化完成后inode将有以下值：

i_hash

{next=0x00000000 pprev=0x00000000 }

hlist_node

i_list

{next=0x00000000 prev=0x00000000 }

list_head

i_sb_list

{next=0x00000000 prev=0x00000000 }

list_head

i_dentry

{next=0x04800de8 prev=0x04800de8 }

list_head

i_ino

0x00000000

unsigned long

i_count

{counter=0x00000001 }

atomic_t

i_nlink

0x00000001

unsigned int

i_uid

0x00000000

unsigned int

i_gid

0x00000000

unsigned int

i_rdev

0x00000000

unsigned int

i_version

0x0000000000000000

unsigned __int64

i_size

0x0000000000000000

__int64

i_atime

{tv_sec=0x00000000 tv_nsec=0x00000000 }

timespec

i_mtime

{tv_sec=0x00000000 tv_nsec=0x00000000 }

timespec

i_ctime

{tv_sec=0x00000000 tv_nsec=0x00000000 }

timespec

i_blkbits

0x0000000c

unsigned int

i_blocks

0x00000000

unsigned long

i_bytes

0x0000

unsigned short

i_mode

0x0000

unsigned short

i_lock

{raw_lock={...} }

spinlock_t

i_mutex

{count={...} wait_lock={...} wait_list={...} }

mutex

i_alloc_sem

{activity=0x00000000 wait_lock={...} wait_list={...} }

rw_semaphore

i_op

0x100be158 empty_iops {create=0x00000000 lookup=0x00000000 link=0x00000000 ...}

const inode_operations *

i_fop

0x100b0078 empty_fops {owner=0x00000000 llseek=0x00000000 read=0x00000000 ...}

const file_operations *

i_sb

0x00ed5600 {s_list={...} s_dev=0x00000000 s_blocksize=0x00001000 ...}

super_block *

i_flock

0x00000000 {fl_next=??? fl_link={...} fl_block={...} ...}

file_lock *

i_mapping

0x04800e74 {host=0x04800dd0 page_tree={...} tree_lock={...} ...}

address_space *

i_data

{host=0x04800dd0 page_tree={...} tree_lock={...} ...}

address_space

i_devices

{next=0x04800ec8 prev=0x04800ec8 }

list_head

i_pipe

0x00000000 {wait={...} nrbufs=??? curbuf=??? ...}

pipe_inode_info *

i_bdev

0x00000000 {bd_dev=??? bd_inode=??? bd_openers=??? ...}

block_device *

i_cdev

0x00000000 {kobj={...} owner=??? ops=??? ...}

cdev *

i_cindex

0x00000000

int

i_generation

0x00000000

unsigned int

i_state

0x00000000

unsigned long

dirtied_when

0x00000000

unsigned long

i_flags

0x00000000

unsigned int

i_writecount

{counter=0x00000000 }

atomic_t

i_private

0x00000000

void *

1.2.2.2.2.1.2 关联inode和super_block

在alloc_inode分配并初始化完成后new_inode将关联inode和super_block：

list_add(&inode->i_list, &inode_in_use);

list_add(&inode->i_sb_list, &sb->s_inodes);

inode->i_ino = ++last_ino;

从这几行代码可以看出每个super_block都可以有多个inode，而每一个inode都只能对应一个super_block。

且每个inode的i_ino都不重复地按顺序递增。

1.2.2.2.2.2 配置ramfs相关的inode属性

在使用new_inode配置完成通用的inode属性后，ramfs_get_inode将配置与具体文件系统相关的属性：

inode->i_mapping->a_ops = &ramfs_aops;

inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;

inode->i_op = &ramfs_dir_inode_operations;

inode->i_fop = &simple_dir_operations;

/* directory inodes start off with i_nlink == 2 (for "." entry) */

inc_nlink(inode);

1.2.2.2.3 d_alloc_root

在创建完成inode之后，开始在此inode之上创建dentry：

root = d_alloc_root(inode);

if (!root) {

iput(inode);

return -ENOMEM;

}

sb->s_root = root;

此函数位于fs/dcache.c：

/**

* d_alloc_root - allocate root dentry

* @root_inode: inode to allocate the root for

* Allocate a root ("/") dentry for the inode given. The inode is

* instantiated and returned. %NULL is returned if there is insufficient

* memory or the inode passed is %NULL.

struct dentry * d_alloc_root(struct inode * root_inode)

{

struct dentry *res = NULL;

if (root_inode) {

static const struct qstr name = { 0, /*.len =*/ 1, /*.name =*/ (unsigned char*)"/" };

res = d_alloc(NULL, &name);

if (res) {

res->d_sb = root_inode->i_sb;

res->d_parent = res;

d_instantiate(res, root_inode);

}

return res;

}

1.2.2.2.3.1 d_alloc

此函数位于fs/dcache.c：

/**

* d_alloc - allocate a dcache entry

* @parent: parent of entry to allocate

* @name: qstr of the name

* Allocates a dentry. It returns %NULL if there is insufficient memory

* available. On a success the dentry is returned. The name passed in is

* copied and the copy passed in may be reused after this call.

struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)

{

struct dentry *dentry;

char *dname;

dentry = (struct dentry *)kmem_cache_alloc(dentry_cache, GFP_KERNEL);

if (!dentry)

return NULL;

if (name->len > DNAME_INLINE_LEN-1) {

dname = (char*)kmalloc(name->len + 1, GFP_KERNEL);

if (!dname) {

kmem_cache_free(dentry_cache, dentry);

return NULL;

}

} else {

dname = (char*)dentry->d_iname;

}

dentry->d_name.name = (unsigned char*)dname;

dentry->d_name.len = name->len;

dentry->d_name.hash = name->hash;

memcpy(dname, name->name, name->len);

dname[name->len] = 0;

atomic_set(&dentry->d_count, 1);

dentry->d_flags = DCACHE_UNHASHED;

spin_lock_init(&dentry->d_lock);

dentry->d_inode = NULL;

dentry->d_parent = NULL;

dentry->d_sb = NULL;

dentry->d_op = NULL;

dentry->d_fsdata = NULL;

dentry->d_mounted = 0;

#ifdef CONFIG_PROFILING

dentry->d_cookie = NULL;

#endif

INIT_HLIST_NODE(&dentry->d_hash);

INIT_LIST_HEAD(&dentry->d_lru);

INIT_LIST_HEAD(&dentry->d_subdirs);

INIT_LIST_HEAD(&dentry->d_alias);

if (parent) {

dentry->d_parent = dget(parent);

dentry->d_sb = parent->d_sb;

} else {

INIT_LIST_HEAD(&dentry->d_u.d_child);

}

spin_lock(&dcache_lock);

if (parent)

list_add(&dentry->d_u.d_child, &parent->d_subdirs);

dentry_stat.nr_dentry++;

spin_unlock(&dcache_lock);

return dentry;

}

由此函数可以看出，每个dentry都可以有parent，最顶层的dentry则没有。

每一个dentry都有一个子dentry的链表。

每个dentry都保存了指向super_block的指针。

经过此函数可以得到这样一个根dentry：

d_count

{counter=0x00000001 }

atomic_t

d_flags

0x00000010

unsigned int

d_lock

{raw_lock={...} }

spinlock_t

d_inode

0x00000000 {i_hash={...} i_list={...} i_sb_list={...} ...}

inode *

d_hash

{next=0x00000000 pprev=0x00000000 }

hlist_node

d_parent

0x00000000 {d_count={...} d_flags=??? d_lock={...} ...}

dentry *

d_name

{hash=0x00000000 len=0x00000001 name=0x04801874 "/" }

qstr

d_lru

{next=0x04801840 prev=0x04801840 }

list_head

d_u

{d_child={...} d_rcu={...} }

dentry::<unnamed-type-d_u>

d_subdirs

{next=0x04801850 prev=0x04801850 }

list_head

d_alias

{next=0x04801858 prev=0x04801858 }

list_head

d_time

0xcdcdcdcd

unsigned long

d_op

0x00000000 {d_revalidate=??? d_hash=??? d_compare=??? ...}

dentry_operations *

d_sb

0x00000000 {s_list={...} s_dev=??? s_blocksize=??? ...}

super_block *

d_fsdata

0x00000000

void *

d_mounted

0x00000000

int

d_iname

0x04801874 "/"

unsigned char [36]

1.2.2.2.3.2 关联super_block和dentry

在完成dentry的分配后，d_alloc_root开始关联inode和dentry：

res->d_sb = root_inode->i_sb;

res->d_parent = res;

可见，对于最上层的dentry，其父指针指向自己。

1.2.2.2.3.3 d_instantiate

最后，d_alloc_root调用d_instantiate关联inode和dentry：

/**

* d_instantiate - fill in inode information for a dentry

* @entry: dentry to complete

* @inode: inode to attach to this dentry

* Fill in inode information in the entry.

* This turns negative dentries into productive full members

* of society.

* NOTE! This assumes that the inode count has been incremented

* (or otherwise set) by the caller to indicate that it is now

* in use by the dcache.

void d_instantiate(struct dentry *entry, struct inode * inode)

{

BUG_ON(!list_empty(&entry->d_alias));

spin_lock(&dcache_lock);

__d_instantiate(entry, inode);

spin_unlock(&dcache_lock);

security_d_instantiate(entry, inode);

}

其操作由__d_instantiate函数完成：

/* the caller must hold dcache_lock */

static void __d_instantiate(struct dentry *dentry, struct inode *inode)

{

if (inode)

list_add(&dentry->d_alias, &inode->i_dentry);

dentry->d_inode = inode;

fsnotify_d_instantiate(dentry, inode);

}

很简单的关联。

1.2.2.3 simple_set_mnt

在最后，rootfs_get_sb调用了simple_set_mnt：

int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)

{

mnt->mnt_sb = sb;

mnt->mnt_root = dget(sb->s_root);

return 0;

}

这个函数将vfsmount和super_block关联起来，同时将vfsmount和dentry根节点关联起来。

1.2.3 关联vfsmount和dentry

在创建super_block完成后，vfs_kern_mount将关联dentry：

mnt->mnt_mountpoint = mnt->mnt_root;

mnt->mnt_parent = mnt;

注意这里mnt_parent的设置。

1.3 创建mnt_namespace

创建根目录的最后一步就是创建mnt_namespace：

ns = (mnt_namespace *)kmalloc(sizeof(*ns), GFP_KERNEL);

if (!ns)

panic("Can't allocate initial namespace");

atomic_set(&ns->count, 1);

INIT_LIST_HEAD(&ns->list);

init_waitqueue_head(&ns->poll);

ns->event = 0;

list_add(&mnt->mnt_list, &ns->list);

ns->root = mnt;

mnt->mnt_ns = ns;

init_task.nsproxy->mnt_ns = ns;

get_mnt_ns(ns);

root.mnt = ns->root;

root.dentry = ns->root->mnt_root;

set_fs_pwd(current->fs, &root);

set_fs_root(current->fs, &root);

这里的关键一点是将创建出来的ns保存到了init_task中，这样如果要遍历整个目录树，只要从init_task.nsproxi->mnt_ns开始就可以了。

近日，我家6岁的小姑娘参加了第六届POP全国少儿英语风采大赛，拉票进行中（2011-6-15前）。

请帮忙点击新东方网站的链接：

http://popdasai.xdf.cn/toupiao.php?do=space&uid=4237

投她一票，谢谢！