[IO系统]04 节点路径搜索

来源：互联网发布：vsco淘宝内购原理编辑：程序博客网时间：2024/06/05 15:18

当进程必须识别一个文件时，就把它的文件路径名传递给某个VFS系统调用，如open()、mkdir()、rename()或stat()等。那么怎么根据给定的文件路径名在内存中找到和建立代表着目标文件或目录的dentry结构和inode结构哪？

简要概括如下：

1. 将路径名按照“/”拆分为一系列分量，除了最后一个分量，其他分量必然为目录。

2. 如果路径名为“/”则表明为绝对路径，则从current->fs->root所标识的目录开始查找；否则为相对路径，则从current->fs->pwd开始搜索,然后对每一分量进行处理。

3. 如果为“..”则跳转到父目录，更新nameidata数据结构；此处可能涉及到文件系统交换;

4. 如果为“.”，则继续下一个分量处理。

5. 非“..”和“.”的情况，则先在目录项高速缓存中搜索，如果查找到，则更新nameidata数据结构。否则通过从存储设备读取目录信息，更新nameidata数据结构，当然也要讲inode和dentry信息更新到目录项高速缓存和索引节点高速缓存。

6.最终，搜索结果记录在nameidata结构体中。

更为详细的如下：

1.1 数据结构

搜索过程中使用到的数据结构。

Nameidata结构主要是记录与路径名查找操作相关的数据。

struct nameidata {   structpath path;/* 将目录结构和mount结构封装在path结构中 */   structqstr last;/* 路径名的最后一个分量 */   structpath root; /* 根目录信息 */   structinode    *inode; /* path.dentry.d_inode */   unsignedint    flags;   unsigned    seq, m_seq;/* 相关目录项的顺序锁序号,m_seq挂载点*/   int     last_type; /* 路径名最后一个分量的类型 */   unsigned    depth;     /* 符号链接嵌套的当前级别 */   int     total_link_count;   structsaved {       structpath link;       structdelayed_call done;       constchar *name;       unsignedseq;   }*stack, internal[EMBEDDED_LEVELS]; /* 记录相应递归深度的符号链接的路径 */   structfilename *name;   structnameidata *saved;   structinode    *link_inode;   unsigned    root_seq;   int     dfd;};

另一个结构体：

/* * "quick string" -- eases parameterpassing, but more importantly * saves "metadata" about the string(ie length and the hash). * * hash comes first so it snuggles againstd_parent in the * dentry. */struct qstr { /* 用来存放路径名中当前节点的杂凑值以及节点名的长度 */   union{       struct{           HASH_LEN_DECLARE;       };       u64hash_len;   };   constunsigned char *name;};

1.2 路径名搜索

4.0版本以上内核，已经不存在path_lookup函数，但是节点路径检索思路基本还是相同的。

1.2.1 初始化

从filename_lookup开始分析，主要步骤如下：

1. 检查文件路径名的有效性

2. 初始化nd：如果给定了根目录，则设置搜索标识位LOOKUP_ROOT；并设置与进程相关的信息。

static int filename_lookup(int dfd,struct filename *name, unsigned flags,              struct path *path, struct path *root){   intretval;   structnameidata nd;/* 记录与节点搜索相关的数据信息 */   if(IS_ERR(name))/* 检查路径名是否有效 */       returnPTR_ERR(name);   if(unlikely(root)) {/*存在根目录，则将根目录记录在nd中，并设置搜索标识LOOKUP_ROOT */       nd.root= *root;       flags|= LOOKUP_ROOT;   }   set_nameidata(&nd,dfd, name);/* 初始化nd，并与进程交互 *//* 开始实质的搜索 */   retval= path_lookupat(&nd, flags | LOOKUP_RCU, path);   if(unlikely(retval == -ECHILD))       retval= path_lookupat(&nd, flags, path);   if(unlikely(retval == -ESTALE))       retval= path_lookupat(&nd, flags | LOOKUP_REVAL, path);    if(likely(!retval))       audit_inode(name,path->dentry, flags & LOOKUP_PARENT);   restore_nameidata();   putname(name);   returnretval;}

在path_lookupat中会首先执行path_init.

path_init主要是初始化查询，设置nd结构指向查询开始处的文件，这里分两种情况：

1. 绝对路径(以／开始)，获得根目录的dentry。它存储在task_struct中fs指向的fs_struct结构中。

2. 相对路径，直接从当前进程task_struct结构中的获得指针fs，它指向的一个fs_struct， fs_struct中有一个指向“当前工作目录”的dentry。

步骤如下：

1. 设置nd的一些字段，用于标识搜索过程和搜索结果。

2. 若搜索是基于根目录，即已经设置了LOOKUP_ROOT，则根据root的dentry和inode初始化nd，然后返回。

3. 基于文件路径名进行查找，Open系统调用都是前两种情况。

3.1若路径名的第一个字符为“/”，那么查找必须从根目录开始，获取相应的已安装文件系统对象（current->fs）和目录项对象（current->fs->root记录到nd->root中）；

3.2 若路径名的第一个字符不为“/”但指定基于当前进程的工作目录，则根据current->fs->pwd来初始化nd->path和nd-inode。

3.3 若指定了父目录，则基于dfd进行处理。

代码片段：

static const char *path_init(struct nameidata *nd,unsigned flags){    int retval= 0;    const char*s = nd->name->name;/** 在搜索的过程中，这个字段的值会随着路径名当前搜索结果而变；      * 例如，如果成功找到目标文件，那么这个字段的值就变成了LAST_NORM      * 而如果最后停留在了一个.上，则变成LAST_DOT    */    nd->last_type= LAST_ROOT; /* if there are only slashes... */    nd->flags= flags | LOOKUP_JUMPED | LOOKUP_PARENT;    nd->depth= 0;    if (flags& LOOKUP_ROOT) {    /*若搜索是基于根目录，即已经设置了LOOKUP_ROOT，则根据root的dentry和inode初始化nd，然后返回*/        structdentry *root = nd->root.dentry;        structinode *inode = root->d_inode;        if(*s) {            if(!d_can_lookup(root))                returnERR_PTR(-ENOTDIR);            retval= inode_permission(inode, MAY_EXEC);            if(retval)                returnERR_PTR(retval);        }        nd->path= nd->root;        nd->inode= inode;        if(flags & LOOKUP_RCU) {            rcu_read_lock();            nd->seq= __read_seqcount_begin(&nd->path.dentry->d_seq);            nd->root_seq= nd->seq;            nd->m_seq= read_seqbegin(&mount_lock);        } else{            path_get(&nd->path);        }        returns;    }     nd->root.mnt= NULL;    nd->path.mnt= NULL;    nd->path.dentry= NULL;     nd->m_seq= read_seqbegin(&mount_lock);    if (*s =='/') { /* 路径名以'/'开头 */         if(flags & LOOKUP_RCU)            rcu_read_lock();        set_root(nd);/*设置nd的root为当前进程fs的root*/         if(likely(!nd_jump_root(nd)))            returns;        nd->root.mnt= NULL;        rcu_read_unlock();        returnERR_PTR(-ECHILD);    } else if(nd->dfd == AT_FDCWD) {/* open系统调用中基本都为AT_FDCWD* 若路径名的第一个字符不为“/”但指定了当前进程的工作目录，* 则根据current->fs->pwd来初始化nd->path和nd-inode*/        if(flags & LOOKUP_RCU) {            structfs_struct *fs = current->fs;            unsignedseq;             rcu_read_lock();             do{                seq= read_seqcount_begin(&fs->seq);                nd->path= fs->pwd; /*保存当前路径*/                nd->inode= nd->path.dentry->d_inode;                nd->seq= __read_seqcount_begin(&nd->path.dentry->d_seq);            }while (read_seqcount_retry(&fs->seq, seq));        } else{            get_fs_pwd(current->fs,&nd->path);            nd->inode= nd->path.dentry->d_inode;        }        returns;    } else { /* 在openat系统调用dfd基本不为AT_FDCWD*/        /*Caller must check execute permissions on the starting path component */        structfd f = fdget_raw(nd->dfd);        structdentry *dentry;         if(!f.file)            returnERR_PTR(-EBADF);         dentry= f.file->f_path.dentry;         if(*s) {            if(!d_can_lookup(dentry)) {                fdput(f);                returnERR_PTR(-ENOTDIR);            }        }         nd->path= f.file->f_path;        if(flags & LOOKUP_RCU) {            rcu_read_lock();            nd->inode= nd->path.dentry->d_inode;            nd->seq= read_seqcount_begin(&nd->path.dentry->d_seq);        } else{            path_get(&nd->path);            nd->inode= nd->path.dentry->d_inode;        }        fdput(f);        returns;    }}

1.2.2 标准路径搜索

Path_init之后，就是执行link_path_walk，步骤如下：

1. 跳过路径名钱的任何斜杠“/

2. 如果剩余路径名为空，则返回0；在nameidata数据结构中的dentry和inode字段指向原路径名的最后一个解析分量对象的对象。

3. 执行一个循环，把路径名参数中的分解为分量(以“/”为分隔符)，对于每一个分量执行如下操作：

3.1 检查nd->inode代表的分量是否允许执行检索（在unix中，只有目录才是可执行检索的），若不允许，则返回错误

3.2 考虑要解析的下一个分量，根据名字，计算出一个与目录项高速缓存散列表有关的32位散列值。

3.3 如果分量名是“..”则通过handle_dots-> follow_dotdot更新nd->inode和nd->dentry:

代码片段：

/* * Nameresolution. * This isthe basic name resolution function, turning a pathname into * the finaldentry. We expect 'base' to be positive and a directory. * * Returns 0and nd will have valid dentry and mnt on success. * Returnserror and drops reference to input namei data on failure. */static int link_path_walk(const char *name, structnameidata *nd){    int err;     while(*name=='/') /* 若为绝对路径，则跳过符号“/” */        name++;    if(!*name) /* 若查找绝对路径，则直接返回成功*/        return0;     /* At thispoint we know we have a real path component. */    for(;;) {        u64hash_len;        inttype;         /* 检查当前分量inode是否允许检索 */        err =may_lookup(nd);        if (err)            returnerr;         hash_len= hash_name(name);/* 计算出一个与目录项高速缓存散列表有关的32位散列值*/         type =LAST_NORM;        if(name[0] == '.') switch (hashlen_len(hash_len)) {            case2:                if(name[1] == '.') {/*分量为“..”*/                    type= LAST_DOTDOT;                    nd->flags|= LOOKUP_JUMPED;                }                break;            case1:                type= LAST_DOT; /*分量为“.”*/        }        if(likely(type == LAST_NORM)) {/*不是“.”也不是“..”*/            …        }         nd->last.hash_len= hash_len;        nd->last.name= name;        nd->last_type= type;         name+= hashlen_len(hash_len);        if(!*name)            gotoOK;        /*         * If it wasn't NUL, we know it was '/'. Skipthat         * slash, and continue until no more slashes.         */        do {            name++;        }while (unlikely(*name == '/'));        if(unlikely(!*name)) {OK:            /*pathname body, done */            if(!nd->depth)                return0;            name= nd->stack[nd->depth - 1].name;            /*trailing symlink, done */            if(!name)                return0;            /*last component of nested symlink */            err= walk_component(nd, WALK_GET | WALK_PUT);        } else{            err= walk_component(nd, WALK_GET);        }        if (err< 0)            returnerr;         …    }}

3.3.1 如果最近解析的目录是当前的根目录，即nd->path.dentry == nd->root.dentry && nd->path.mnt== nd->root.mnt，那么就不需要再继续往上追踪了，在最近一个分量上调用follow_mount函数，继续追踪下一个分量。

3.3.2 如果最近的一个目录是文件系统的根目录，即nd->path.dentry == nd->path.mnt->mnt_root，并且这个文件系统没有安装在其他的文件系统之上（follow_up返回0），在最近一个分量上调用follow_mount函数，继续追踪下一个分量。

3.3.3 如果最近的一个目录是文件系统的根目录，即nd->path.dentry == nd->path.mnt->mnt_root，并且这个文件系统安装在其他的文件系统之上，那么就需要执行文件系统交换，更新nd->path.dentry和path->mnt，继续追踪父目录。

3.3.3 如果最近的一个目录不是文件系统的根目录，继续追踪父目录。

static int follow_dotdot(struct nameidata *nd){    while(1) {        structdentry *old = nd->path.dentry;/* 如果已经达到当前进程的根节点，这时不能再往上追踪了*/        if(nd->path.dentry == nd->root.dentry &&            nd->path.mnt == nd->root.mnt) {            break;        }/* 已经到达节点与其父节点在同一个设备上。* 在这种情况下既然已经到达的这个节点的dentry结构已经建立，* 则其父节点的dentry结构也必然已经建立在内存中，* 而且dentry结构中的指针d_parent就指向其父节点，所以往上跑一层是很简单的事情*/        if(nd->path.dentry != nd->path.mnt->mnt_root) {            /*rare case of legitimate dget_parent()... */            nd->path.dentry= dget_parent(nd->path.dentry); /*往上走一层，并且对应用计数加一*/            dput(old);            if(unlikely(!path_connected(&nd->path)))                return-ENOENT;            break;        }         /*是否要交换文件系统 */        if(!follow_up(&nd->path))            break;    }    follow_mount(&nd->path);    nd->inode= nd->path.dentry->d_inode;    return 0;}

Follow_mount，检查nd->dentry是否为某个文件系统的安装点，如果是，则调用lookup_mnt在目录项高速缓存中查找已安装文件系统的根目录，并把nd->path.mnt更新为安装点的目录项对象，然后重复整个操作（因为可能多个文件系统安装到同一个安装点）

/* * Skip totop of mountpoint pile in refwalk mode for follow_dotdot() */static void follow_mount(struct path *path){    while(d_mountpoint(path->dentry)) { /* 目录是否为安装点 */        structvfsmount *mounted = lookup_mnt(path); /* 在目录项告诉缓存中查找已安装的根目录 */        if(!mounted)            break;        dput(path->dentry);        mntput(path->mnt);        path->mnt= mounted; /* 更新path中的安装点对象地址 */        path->dentry= dget(mounted->mnt_root); /* 更新path的dentry信息*/    }}

3.4 如果分量名是“.”，则继续判断下一个分量。（“.”指的是当前目录，没有任何效果）

3.5 如果分量名不是“.”，也不是“..”，则需要在目录项告诉缓存中查找，如果具体文件系统有自定义的d_hash方法，则调用其修改hash_name计算的散列值。

static int link_path_walk(const char *name, structnameidata *nd){    int err;     while (*name=='/')/* 若为绝对路径，则跳过符号“/” */        name++;    if(!*name) /* 若查找结束，则返回成功*/        return0;     /* At thispoint we know we have a real path component. */    for(;;) {        u64hash_len;        inttype;         …         hash_len= hash_name(name);/* 计算出一个与目录项高速缓存散列表有关的32位散列值*/         type =LAST_NORM;        if(name[0] == '.') switch (hashlen_len(hash_len)) {/*是否为”.”或“..”*/            …        }        if(likely(type == LAST_NORM)) {/* 纠正hash值 */            structdentry *parent = nd->path.dentry;            nd->flags&= ~LOOKUP_JUMPED;            if(unlikely(parent->d_flags & DCACHE_OP_HASH)) {                structqstr this = { { .hash_len = hash_len }, .name = name };                err= parent->d_op->d_hash(parent, &this);                if(err < 0)                    returnerr;                hash_len= this.hash_len;                name= this.name;            }        }         nd->last.hash_len= hash_len;        nd->last.name= name;        nd->last_type= type;         name+= hashlen_len(hash_len);        if(!*name)            gotoOK;        /*         * If it wasn't NUL, we know it was '/'. Skipthat         * slash, and continue until no more slashes.         */        do {            name++;        } while(unlikely(*name == '/'));        if(unlikely(!*name)) {OK:            /*pathname body, done */            if(!nd->depth)                return0;            name= nd->stack[nd->depth - 1].name;            /*trailing symlink, done */            if(!name)                return0;            /*last component of nested symlink */              /* 通过walk_component函数找到解析字符串对应的inode，并且将nd->inode改称最新inode，准备继续解析后面的字符串信息。因为目录项所管理的inode在系统中通过hash表进行维护，因此，通过hash值可以很容易的找到inode。如果内存中还不存在inode对象，对于ext3文件系统会通过ext3_lookup函数从磁盘上获取inode的元数据信息，并且构造目录项中所有的inode对象。 */              err= walk_component(nd, WALK_GET | WALK_PUT);        } else{            err= walk_component(nd, WALK_GET);        }          …    }}

3.6 调用walk_component，进行分量实际的查找工作。

static int walk_component(struct nameidata *nd, intflags){    structpath path;    structinode *inode;    unsignedseq;    int err;    /*     * "." and ".." are special- ".." especially so because it has     * to be able to know about the current rootdirectory and     * parent relationships.     */    if(unlikely(nd->last_type != LAST_NORM)) {        err =handle_dots(nd, nd->last_type);        if(flags & WALK_PUT)            put_link(nd);        returnerr;    }    err =lookup_fast(nd, &path, &inode, &seq);    if(unlikely(err <= 0)) {        if(err < 0)            returnerr;        path.dentry= lookup_slow(&nd->last, nd->path.dentry,                      nd->flags);        if(IS_ERR(path.dentry))            returnPTR_ERR(path.dentry);         path.mnt= nd->path.mnt;        err =follow_managed(&path, nd);        if(unlikely(err < 0))            returnerr;         if(unlikely(d_is_negative(path.dentry))) {            path_to_nameidata(&path,nd);            return-ENOENT;        }         seq =0;    /* we are already out of RCU mode */        inode= d_backing_inode(path.dentry);    }     if (flags& WALK_PUT)        put_link(nd);    err =should_follow_link(nd, &path, flags & WALK_GET, inode, seq);    if(unlikely(err))        returnerr;    path_to_nameidata(&path,nd);    nd->inode= inode;    nd->seq= seq;    return 0;}

该函数的本质是调用lookup_fast-> __d_lookup在目录项高速缓存中搜索分量的目录项对象。如果查找到，则更新nd->path.mnt和nd->path.dentry.

static int lookup_fast(struct nameidata *nd,               struct path *path, struct inode **inode,               unsigned *seqp){    structvfsmount *mnt = nd->path.mnt;    structdentry *dentry, *parent = nd->path.dentry;…    if(nd->flags & LOOKUP_RCU) {        ...    } else {         /* 在目录项高速缓存中查找目录项对象 */        dentry= __d_lookup(parent, &nd->last);        if(unlikely(!dentry))            return0;          …    }     …    path->mnt= mnt;    path->dentry= dentry;    err =follow_managed(path, nd);     …}

查找高速缓存：

    list_bl_for_each_entry_rcu(dentry,node, b, d_hash) {         if(dentry->d_name.hash != hash)            continue;         spin_lock(&dentry->d_lock);        if(dentry->d_parent != parent)            gotonext;        if(d_unhashed(dentry))            gotonext;         /*         * It is safe to compare names since d_move()cannot         * change the qstr (protected by d_lock).         */        if(parent->d_flags & DCACHE_OP_COMPARE) {            inttlen = dentry->d_name.len;            constchar *tname = dentry->d_name.name;            if(parent->d_op->d_compare(parent, dentry, tlen, tname, name))                gotonext;        } else{            if(dentry->d_name.len != len)                gotonext;            if(dentry_cmp(dentry, str, len))                gotonext;        }         dentry->d_lockref.count++;        found= dentry;        spin_unlock(&dentry->d_lock);        break;next:        spin_unlock(&dentry->d_lock);

否则调用lookup_slow->lookup_real从磁盘读取目录，创建一个新的目录项对象，并把其插入到目录项高速缓存中，然后创建一个新的索引节点插入到索引节点高速缓存中。

struct dentry *d_alloc(struct dentry * parent,const struct qstr *name){    structdentry *dentry = __d_alloc(parent->d_sb, name);    if(!dentry)        returnNULL;    dentry->d_flags|= DCACHE_RCUACCESS;    spin_lock(&parent->d_lock);    /*     * don't need child lock because it is notsubject     * to concurrency here     */    __dget_dlock(parent);    dentry->d_parent= parent;    list_add(&dentry->d_child,&parent->d_subdirs);    spin_unlock(&parent->d_lock);     returndentry;}

然后跟新nd->dentry和nd->inode继续下一个分量的查找。

4. 现在，原路径名的所有分量都被解析了。

0 0