先写个简单的test.c#include <linux/module.h>#include <linux/init.h>#include <linux/fs.h>#include <linux/cdev.h>#include <linux/errno.h>struct cdev test_cdev;dev_t devno;unsigned int major = 0;unsigned int minor = 0;int test_open (struct inode *nod, struct file *filp){ printk("<kernel> %s\n", __FUNCTION__); return 0;}struct file_operations test_ops = { .open = test_open,};int init_test(void){ int err = 0; err = alloc_chrdev_region(&devno, 0, 1, "alloc register"); if(err){ printk("<kernel> cdev_add failed\n"); err = -EBUSY; goto fail; } major = MAJOR(devno); minor = MINOR(devno); printk("major is [%d], minor is [%d]\n", major, minor); cdev_init(&test_cdev, &test_ops); err = cdev_add(&test_cdev, devno, 1); if(err){ printk("<kernel> cdev_add failed\n"); err = -ENODEV; goto fail1; } printk("<kernel>init \n"); return 0;fail: return err;fail1: unregister_chrdev_region(devno, 1); return err;}void exit_test(void){ cdev_del(&test_cdev); unregister_chrdev_region(devno, 1); printk("bye\n"); }module_init(init_test);module_exit(exit_test);MODULE_LICENSE("GPL");MODULE_AUTHOR("Jesse");MODULE_DESCRIPTION("this is a test module");MODULE_VERSION("v0.1");复制代码 仅一个简单的open,应该不会有更简单的字符设备驱动了。app 层还应该有这么个东西。fd = open("/dev/test", O_RDWR);好了,上面下面都有了。那,中间是怎么个回事?大致的过程: fd = open("/dev/test", O_RDWR);sys_opentest_open 这个sys_open()可不是一个简单的函数,它包括了文件路径查找,文件权限判断等各种复杂BT的步骤。况且,不知何时起,内核里的sys_open已不是曾经的那个光明磊落的sys_open,tag不到,即便find到,也是一些bt的形式,早已面目全非。 -- fs/open.c -- SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode){ long ret; if (force_o_largefile()) flags |= O_LARGEFILE; ret = do_sys_open(AT_FDCWD, filename, flags, mode); //==>bb /* avoid REGPARM breakage on x86: */ asmlinkage_protect(3, ret, filename, flags, mode); return ret; }复制代码有人问了,这个SYSCALL_DEFINE3是个什么东西,“你最好不要追究这样的问题”。内核里的各种宏定义,不是一般的有才。简单的gcc -E一下 简单的瞧瞧。 #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__)); \ static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__)); \ asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__)) \ { \ __SC_TEST##x(__VA_ARGS__); \ return (long) SYSC##name(__SC_CAST##x(__VA_ARGS__)); \ } \ SYSCALL_ALIAS(sys##name, SyS##name); \ static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__))#define SYSCALL_DEFINEx(x, sname, ...) \ static const char *types_##sname[] = { \ __SC_STR_TDECL##x(__VA_ARGS__) \ }; \ static const char *args_##sname[] = { \ __SC_STR_ADECL##x(__VA_ARGS__) \ }; \ SYSCALL_METADATA(sname, x); \ __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)int main(void){ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode);}复制代码展开真面目:int main(void){ static const char *types__open[] = { __SC_STR_TDECL3(const char __user *, filename, int, flags, int, mode) }; static const char *args__open[] = { __SC_STR_ADECL3(const char __user *, filename, int, flags, int, mode) }; SYSCALL_METADATA(_open, 3); asmlinkage long sys_open(__SC_DECL3(const char __user *, filename, int, flags, int, mode)); static inline long SYSC_open(__SC_DECL3(const char __user *, filename, int, flags, int, mode)); asmlinkage long SyS_open(__SC_LONG3(const char __user *, filename, int, flags, int, mode)) { __SC_TEST3(const char __user *, filename, int, flags, int, mode); return (long) SYSC_open(__SC_CAST3(const char __user *, filename, int, flags, int, mode)); } SYSCALL_ALIAS(sys_open, SyS_open); static inline long SYSC_open(__SC_DECL3(const char __user *, filename, int, flags, int, mode));}复制代码一些宏还未展开,点到为止,见好就收吧。 我们继续往下看。 bb:long do_sys_open(int dfd, const char __user *filename, int flags, int mode){ char *tmp = getname(filename); //filename复制到了内核空间,即 *tmp ==>cc int fd = PTR_ERR(tmp); //return (long) ptr; if (!IS_ERR(tmp)) { fd = get_unused_fd_flags(flags); //得到一个有效的fd ==>dd if (fd >= 0) { struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); // ==>ee if (IS_ERR(f)) { put_unused_fd(fd); fd = PTR_ERR(f); } else { fsnotify_open(f->f_path.dentry); //==>ff fd_install(fd, f); //将 fd 与file结构关联,以便 read write 等系统调用使用 ==>gg } } putname(tmp); //分配完毕,释放掉暂时保存filename的内核空间:kmem_cache_free } return fd;}复制代码 cc:#define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp))#define __getname() __getname_gfp(GFP_KERNEL)char * getname(const char __user * filename){ char *tmp, *result; result = ERR_PTR(-ENOMEM); tmp = __getname(); // kmem_cache_alloc: 内存分配出一块空间 if (tmp) { int retval = do_getname(filename, tmp); //copy filenames to the kernel data space(*tmp) before using them result = tmp; if (retval < 0) { __putname(tmp); result = ERR_PTR(retval); } } audit_getname(result); return result;}复制代码dd:#define get_unused_fd_flags(flags) alloc_fd(0, (flags))-- fs/file.c --/* * allocate a file descriptor, mark it busy. */int alloc_fd(unsigned start, unsigned flags){ struct files_struct *files = current->files; unsigned int fd; int error; struct fdtable *fdt; spin_lock(&files->file_lock);repeat: fdt = files_fdtable(files); fd = start; if (fd < files->next_fd) fd = files->next_fd; if (fd < fdt->max_fds) fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, fd); //这个很熟悉的函数==>ddD error = expand_files(files, fd); if (error < 0) goto out; /* * If we needed to expand the fs array we * might have blocked - try again. */ if (error) goto repeat; if (start <= files->next_fd) files->next_fd = fd + 1; FD_SET(fd, fdt->open_fds); if (flags & O_CLOEXEC) FD_SET(fd, fdt->close_on_exec); else FD_CLR(fd, fdt->close_on_exec); error = fd;#if 1 /* Sanity check */ if (rcu_dereference(fdt->fd[fd]) != NULL) { printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); rcu_assign_pointer(fdt->fd[fd], NULL); }#endifout: spin_unlock(&files->file_lock); return error;}复制代码ddD: 一个出镜率很高的函数,常用于各种什么符的的分配。当然了,这些符都是按顺序分配di,用类似数组的形式,数组里的0表示未分配,然后遍历去找这些0。unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset){ const unsigned long *p = addr + BITOP_WORD(offset); //p = addr unsigned long result = offset & ~(BITS_PER_LONG-1); //result = 0 unsigned long tmp; if (offset >= size) return size; size -= result; offset %= BITS_PER_LONG; if (offset) { tmp = *(p++); tmp |= ~0UL >> (BITS_PER_LONG - offset); if (size < BITS_PER_LONG) goto found_first; if (~tmp) goto found_middle; size -= BITS_PER_LONG; result += BITS_PER_LONG; } while (size & ~(BITS_PER_LONG-1)) { if (~(tmp = *(p++))) goto found_middle; result += BITS_PER_LONG; size -= BITS_PER_LONG; } if (!size) return result; tmp = *p;found_first: tmp |= ~0UL << size; if (tmp == ~0UL) /* Are any bits zero? */ return result + size; /* Nope. */found_middle: return result + ffz(tmp);}复制代码 下面是理解的重点,也是一调到底的精髓。重点在于struct file的分配。 ee:/* * Note that the low bits of the passed in "open_flag" * are not the same as in the local variable "flag". See * open_to_namei_flags() for more details. */struct file *do_filp_open(int dfd, const char *pathname, int open_flag, int mode, int acc_mode){ struct file *filp; struct nameidata nd; int error; struct path path; struct dentry *dir; int count = 0; int will_write; int flag = open_to_namei_flags(open_flag); /* 设置open的 mode */ if (!acc_mode) acc_mode = MAY_OPEN | ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ if (flag & O_TRUNC) acc_mode |= MAY_WRITE; /* Allow the LSM permission hook to distinguish append access from general write access. */ if (flag & O_APPEND) acc_mode |= MAY_APPEND; /* * The simplest case - just a plain lookup. */ if (!(flag & O_CREAT)) { error = path_lookup_open(dfd, pathname, lookup_flags(flag), &nd, flag); if (error) return ERR_PTR(error); goto ok; } ... ...ok: /* * Consider: * 1. may_open() truncates a file * 2. a rw->ro mount transition occurs * 3. nameidata_to_filp() fails due to * the ro mount. * That would be inconsistent, and should * be avoided. Taking this mnt write here * ensures that (2) can not occur. */ will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode); if (will_write) { error = mnt_want_write(nd.path.mnt); if (error) goto exit; } error = may_open(&nd.path, acc_mode, flag); //** if (error) { if (will_write) mnt_drop_write(nd.path.mnt); goto exit; } filp = nameidata_to_filp(&nd, open_flag); //分配struct file,得到filp if (IS_ERR(filp)) ima_counts_put(&nd.path, acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); /* * It is now safe to drop the mnt write * because the filp has had a write taken * on its behalf. */ if (will_write) mnt_drop_write(nd.path.mnt); if (nd.root.mnt) path_put(&nd.root); return filp; ... ...}复制代码 struct file *nameidata_to_filp(struct nameidata *nd, int flags){ const struct cred *cred = current_cred(); struct file *filp; /* Pick up the filp from the open intent */ filp = nd->intent.open.file; /* Has the filesystem initialised the file for us? */ if (filp->f_path.dentry == NULL) filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp, NULL, cred); //!!! else path_put(&nd->path); return filp;}复制代码 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, struct file *f, int (*open)(struct inode *, struct file *), const struct cred *cred){ struct inode *inode; int error; f->f_flags = flags; f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { error = __get_file_write_access(inode, mnt); if (error) goto cleanup_file; if (!special_file(inode->i_mode)) file_take_write(f); } f->f_mapping = inode->i_mapping; f->f_path.dentry = dentry; f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); //!!! !!! file_move(f, &inode->i_sb->s_files); error = security_dentry_open(f, cred); if (error) goto cleanup_all; if (!open && f->f_op) //f->f_op若有,则执行open open = f->f_op->open; if (open) { error = open(inode, f); if (error) goto cleanup_all; } f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);复制代码 说下六个感叹号的地方。记得我们在注册字符设备的时候是否有个cdev_init ? 她的体内是不是有个 cdev->ops = fops ?inode里是不是有个i_cdev ?这里,file的f_op是不是被赋了inode的i_fop ?打开struct file, struct inode的定义处,多瞧上两眼。这里就不贴了。 就这样,fd = open("/dev/test", O_RDWR) 最终还是调到了test_open 。 最后就是个首尾函数,将得到的fd和struct file关联起来。 gg:void fd_install(unsigned int fd, struct file *file){ struct files_struct *files = current->files; struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); BUG_ON(fdt->fd[fd] != NULL); rcu_assign_pointer(fdt->fd[fd], file); spin_unlock(&files->file_lock);}复制代码 do_sys_open 的结尾 return fd; 返回给 app。 fd = open("/dev/test", O_RDWR) 你懂的。