彻底的系统调用---open函数

来源:互联网 发布:网络爬虫 翻译 编辑:程序博客网 时间:2024/04/30 04:11
先写个简单的test.c#include <linux/module.h>#include <linux/init.h>#include <linux/fs.h>#include <linux/cdev.h>#include <linux/errno.h>struct cdev test_cdev;dev_t devno;unsigned int major = 0;unsigned int minor = 0;int test_open (struct inode *nod, struct file *filp){    printk("<kernel> %s\n", __FUNCTION__);                return 0;}struct file_operations test_ops = {    .open = test_open,};int init_test(void){    int err = 0;    err = alloc_chrdev_region(&devno, 0, 1, "alloc register");    if(err){        printk("<kernel> cdev_add failed\n");                    err = -EBUSY;        goto fail;    }    major = MAJOR(devno);    minor = MINOR(devno);    printk("major is [%d], minor is [%d]\n", major, minor);    cdev_init(&test_cdev, &test_ops);    err = cdev_add(&test_cdev, devno, 1);        if(err){        printk("<kernel> cdev_add failed\n");                    err = -ENODEV;        goto fail1;    }    printk("<kernel>init \n");            return 0;fail:    return err;fail1:    unregister_chrdev_region(devno, 1);            return err;}void exit_test(void){    cdev_del(&test_cdev);    unregister_chrdev_region(devno, 1);            printk("bye\n");        }module_init(init_test);module_exit(exit_test);MODULE_LICENSE("GPL");MODULE_AUTHOR("Jesse");MODULE_DESCRIPTION("this is a test module");MODULE_VERSION("v0.1");复制代码  仅一个简单的open,应该不会有更简单的字符设备驱动了。app 层还应该有这么个东西。fd = open("/dev/test", O_RDWR);好了,上面下面都有了。那,中间是怎么个回事?大致的过程:  fd = open("/dev/test", O_RDWR);sys_opentest_open    这个sys_open()可不是一个简单的函数,它包括了文件路径查找,文件权限判断等各种复杂BT的步骤。况且,不知何时起,内核里的sys_open已不是曾经的那个光明磊落的sys_open,tag不到,即便find到,也是一些bt的形式,早已面目全非。     -- fs/open.c -- SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode){    long ret;     if (force_o_largefile())        flags |= O_LARGEFILE;    ret = do_sys_open(AT_FDCWD, filename, flags, mode);        //==>bb    /* avoid REGPARM breakage on x86: */    asmlinkage_protect(3, ret, filename, flags, mode);    return ret; }复制代码有人问了,这个SYSCALL_DEFINE3是个什么东西,“你最好不要追究这样的问题”。内核里的各种宏定义,不是一般的有才。简单的gcc -E一下 简单的瞧瞧。    #define __SYSCALL_DEFINEx(x, name, ...)                 \    asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__));       \    static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__));   \    asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__))        \    {                               \        __SC_TEST##x(__VA_ARGS__);              \        return (long) SYSC##name(__SC_CAST##x(__VA_ARGS__));    \    }                               \    SYSCALL_ALIAS(sys##name, SyS##name);                \    static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__))#define SYSCALL_DEFINEx(x, sname, ...)              \    static const char *types_##sname[] = {          \        __SC_STR_TDECL##x(__VA_ARGS__)          \    };                          \    static const char *args_##sname[] = {           \        __SC_STR_ADECL##x(__VA_ARGS__)          \    };                          \    SYSCALL_METADATA(sname, x);             \    __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)int main(void){    SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode);}复制代码展开真面目:int main(void){    static const char *types__open[] = { __SC_STR_TDECL3(const char __user *, filename, int, flags, int, mode) };    static const char *args__open[] = { __SC_STR_ADECL3(const char __user *, filename, int, flags, int, mode) };     SYSCALL_METADATA(_open, 3);     asmlinkage long sys_open(__SC_DECL3(const char __user *, filename, int, flags, int, mode));     static inline long SYSC_open(__SC_DECL3(const char __user *, filename, int, flags, int, mode));     asmlinkage long SyS_open(__SC_LONG3(const char __user *, filename, int, flags, int, mode))     {         __SC_TEST3(const char __user *, filename, int, flags, int, mode);          return (long) SYSC_open(__SC_CAST3(const char __user *, filename, int, flags, int, mode));     }    SYSCALL_ALIAS(sys_open, SyS_open);     static inline long SYSC_open(__SC_DECL3(const char __user *, filename, int, flags, int, mode));}复制代码一些宏还未展开,点到为止,见好就收吧。       我们继续往下看。     bb:long do_sys_open(int dfd, const char __user *filename, int flags, int mode){    char *tmp = getname(filename);    //filename复制到了内核空间,即 *tmp ==>cc    int   fd  = PTR_ERR(tmp);        //return (long) ptr;    if (!IS_ERR(tmp)) {        fd = get_unused_fd_flags(flags);    //得到一个有效的fd ==>dd        if (fd >= 0) {            struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);    // ==>ee            if (IS_ERR(f)) {                put_unused_fd(fd);                fd = PTR_ERR(f);            } else {                fsnotify_open(f->f_path.dentry);    //==>ff                fd_install(fd, f);    //将 fd 与file结构关联,以便 read write 等系统调用使用 ==>gg            }            }            putname(tmp);    //分配完毕,释放掉暂时保存filename的内核空间:kmem_cache_free    }        return fd;}复制代码         cc:#define __getname_gfp(gfp)  kmem_cache_alloc(names_cachep, (gfp))#define __getname()     __getname_gfp(GFP_KERNEL)char * getname(const char __user * filename){    char *tmp, *result;    result = ERR_PTR(-ENOMEM);    tmp = __getname();        // kmem_cache_alloc: 内存分配出一块空间    if (tmp)  {        int retval = do_getname(filename, tmp);    //copy filenames to the kernel data space(*tmp) before using them        result = tmp;        if (retval < 0) {            __putname(tmp);            result = ERR_PTR(retval);        }    }    audit_getname(result);    return result;}复制代码dd:#define get_unused_fd_flags(flags) alloc_fd(0, (flags))-- fs/file.c --/* * allocate a file descriptor, mark it busy. */int alloc_fd(unsigned start, unsigned flags){       struct files_struct *files = current->files;    unsigned int fd;    int error;    struct fdtable *fdt;    spin_lock(&files->file_lock);repeat:    fdt = files_fdtable(files);    fd = start;    if (fd < files->next_fd)        fd = files->next_fd;    if (fd < fdt->max_fds)        fd = find_next_zero_bit(fdt->open_fds->fds_bits,                                fdt->max_fds,                                 fd);    //这个很熟悉的函数==>ddD    error = expand_files(files, fd);    if (error < 0)        goto out;    /*     * If we needed to expand the fs array we     * might have blocked - try again.     */    if (error)        goto repeat;    if (start <= files->next_fd)        files->next_fd = fd + 1;    FD_SET(fd, fdt->open_fds);    if (flags & O_CLOEXEC)        FD_SET(fd, fdt->close_on_exec);    else        FD_CLR(fd, fdt->close_on_exec);    error = fd;#if 1    /* Sanity check */    if (rcu_dereference(fdt->fd[fd]) != NULL) {        printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);        rcu_assign_pointer(fdt->fd[fd], NULL);    }#endifout:    spin_unlock(&files->file_lock);    return error;}复制代码ddD:  一个出镜率很高的函数,常用于各种什么符的的分配。当然了,这些符都是按顺序分配di,用类似数组的形式,数组里的0表示未分配,然后遍历去找这些0。unsigned long find_next_zero_bit(const unsigned long *addr,                               unsigned long size,                                 unsigned long offset){    const unsigned long *p = addr + BITOP_WORD(offset);    //p = addr    unsigned long result = offset & ~(BITS_PER_LONG-1);    //result = 0    unsigned long tmp;    if (offset >= size)        return size;    size   -= result;            offset %= BITS_PER_LONG;     if (offset) {        tmp = *(p++);        tmp |= ~0UL >> (BITS_PER_LONG - offset);        if (size < BITS_PER_LONG)            goto found_first;        if (~tmp)            goto found_middle;        size -= BITS_PER_LONG;        result += BITS_PER_LONG;    }    while (size & ~(BITS_PER_LONG-1)) {        if (~(tmp = *(p++)))            goto found_middle;        result += BITS_PER_LONG;        size -= BITS_PER_LONG;    }    if (!size)        return result;    tmp = *p;found_first:    tmp |= ~0UL << size;    if (tmp == ~0UL)    /* Are any bits zero? */        return result + size;   /* Nope. */found_middle:    return result + ffz(tmp);}复制代码           下面是理解的重点,也是一调到底的精髓。重点在于struct file的分配。         ee:/* * Note that the low bits of the passed in "open_flag" * are not the same as in the local variable "flag". See * open_to_namei_flags() for more details. */struct file *do_filp_open(int dfd, const char *pathname,                                  int open_flag, int mode, int acc_mode){    struct file *filp;    struct nameidata nd;    int error;    struct path path;    struct dentry *dir;    int count = 0;    int will_write;    int flag = open_to_namei_flags(open_flag);    /* 设置open的 mode */    if (!acc_mode)        acc_mode = MAY_OPEN | ACC_MODE(flag);                    /* O_TRUNC implies we need access checks for write permissions */    if (flag & O_TRUNC)        acc_mode |= MAY_WRITE;                 /* Allow the LSM permission hook to distinguish append        access from general write access. */    if (flag & O_APPEND)        acc_mode |= MAY_APPEND;    /*     * The simplest case - just a plain lookup.     */    if (!(flag & O_CREAT)) {        error = path_lookup_open(dfd, pathname, lookup_flags(flag),                                 &nd, flag);            if (error)            return ERR_PTR(error);        goto ok;    }    ...    ...ok:    /*     * Consider:     * 1. may_open() truncates a file     * 2. a rw->ro mount transition occurs     * 3. nameidata_to_filp() fails due to     *    the ro mount.     * That would be inconsistent, and should     * be avoided. Taking this mnt write here     * ensures that (2) can not occur.     */    will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);    if (will_write) {        error = mnt_want_write(nd.path.mnt);        if (error)            goto exit;    }    error = may_open(&nd.path, acc_mode, flag);    //**    if (error) {        if (will_write)            mnt_drop_write(nd.path.mnt);        goto exit;    }    filp = nameidata_to_filp(&nd, open_flag);    //分配struct file,得到filp    if (IS_ERR(filp))        ima_counts_put(&nd.path,                       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));    /*     * It is now safe to drop the mnt write     * because the filp has had a write taken     * on its behalf.     */    if (will_write)        mnt_drop_write(nd.path.mnt);    if (nd.root.mnt)        path_put(&nd.root);    return filp;    ...    ...}复制代码            struct file *nameidata_to_filp(struct nameidata *nd, int flags){    const struct cred *cred = current_cred();    struct file *filp;    /* Pick up the filp from the open intent */    filp = nd->intent.open.file;    /* Has the filesystem initialised the file for us? */    if (filp->f_path.dentry == NULL)        filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,                     NULL, cred);  //!!!    else         path_put(&nd->path);    return filp;}复制代码           static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,                    int flags, struct file *f,                    int (*open)(struct inode *, struct file *),                    const struct cred *cred){    struct inode *inode;    int error;    f->f_flags = flags;    f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |                FMODE_PREAD | FMODE_PWRITE;    inode = dentry->d_inode;    if (f->f_mode & FMODE_WRITE) {        error = __get_file_write_access(inode, mnt);        if (error)            goto cleanup_file;        if (!special_file(inode->i_mode))            file_take_write(f);    }    f->f_mapping = inode->i_mapping;    f->f_path.dentry = dentry;    f->f_path.mnt = mnt;    f->f_pos = 0;    f->f_op = fops_get(inode->i_fop);  //!!! !!!    file_move(f, &inode->i_sb->s_files);    error = security_dentry_open(f, cred);    if (error)        goto cleanup_all;    if (!open && f->f_op)  //f->f_op若有,则执行open        open = f->f_op->open;      if (open) {        error = open(inode, f);        if (error)            goto cleanup_all;    }    f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);    file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);复制代码          说下六个感叹号的地方。记得我们在注册字符设备的时候是否有个cdev_init ? 她的体内是不是有个 cdev->ops = fops ?inode里是不是有个i_cdev ?这里,file的f_op是不是被赋了inode的i_fop ?打开struct file, struct inode的定义处,多瞧上两眼。这里就不贴了。         就这样,fd = open("/dev/test", O_RDWR) 最终还是调到了test_open 。             最后就是个首尾函数,将得到的fd和struct file关联起来。         gg:void fd_install(unsigned int fd, struct file *file){       struct files_struct *files = current->files;    struct fdtable *fdt;    spin_lock(&files->file_lock);    fdt = files_fdtable(files);    BUG_ON(fdt->fd[fd] != NULL);    rcu_assign_pointer(fdt->fd[fd], file);    spin_unlock(&files->file_lock);}复制代码           do_sys_open 的结尾 return fd;  返回给 app。  fd = open("/dev/test", O_RDWR)             你懂的。

原创粉丝点击