fuse用户态、内核态通信机制分析
来源:互联网 发布:php与mysql web开发五 编辑:程序博客网 时间:2024/05/02 00:25
关于fuse用户态文件系统的文章有很多,比如http://my.debugman.net/program/fuse-180.html,就写得很全面。但关于fuse用户态、内核态通信的文章还比较少,我现在发现的一篇是http://blog.chinaunix.net/uid-20687780-id-313603.html,主要讲解了用户态、内核态的通信协议。
这里主要分析一下fuse的内核态用户态通信机制。fuse的主要运行流程如下图所示:
当用户态程序执行了POSIX的文件系统操作,经过glibc,变换为系统调用传递给vfs,vfs再将其传给FUSE的内核模块,FUSE的内核模块根据系统调用的类型,将请求发送到用户态的FUSE进程,并等待用户态进程的应答。FUSE内核模块再收到应答后,将其发送给vfs,把最终运行结果呈现到用户态程序。
那FUSE是如何让用户态与内核态通信的呢?这个在源代码中可以看得比较清楚。
首先在,内核代码fs/fuse/dev.c中,
/* 为fuse定义一个misc设备 */static struct miscdevice fuse_miscdevice = { .minor = FUSE_MINOR, .name = "fuse", /* 生产的misc设备将会出现在/dev/fuse */ .fops = &fuse_dev_operations,};int __init fuse_dev_init(void){ int err = -ENOMEM; fuse_req_cachep = kmem_cache_create("fuse_request", sizeof(struct fuse_req), 0, 0, NULL); if (!fuse_req_cachep) goto out; err = misc_register(&fuse_miscdevice); /* 注册成misc设备,misc设备的主设备号为10 */ if (err) goto out_cache_clean; return 0; out_cache_clean: kmem_cache_destroy(fuse_req_cachep); out: return err;}
通过调用fuse_dev_init函数,将会生成一个misc设备(类似字符设备,但主设备号为10,并且会在/dev/目录下根据设备名,自动生成设备文件)在/dev/fuse下。用户态代码在通过open这个设备文件,并且通过如下函数,注册向fuse内核态通信的函数:
struct fuse_chan *fuse_kern_chan_new(int fd){struct fuse_chan_ops op = {.receive = fuse_kern_chan_receive,.send = fuse_kern_chan_send,.destroy = fuse_kern_chan_destroy,};size_t bufsize = getpagesize() + 0x1000;bufsize = bufsize < MIN_BUFSIZE ? MIN_BUFSIZE : bufsize;return fuse_chan_new(&op, fd, bufsize, NULL);}
fuse_kern_chan_receive函数,通过res = read(fuse_chan_fd(ch), buf, size);从/dev/fuse中读取内核发来的情求,再通过fuse_kern_chan_send函数中的ssize_t res = writev(fuse_chan_fd(ch), iov, count);将数据发送到内核模块。
再回到内核模块,还是fs/fuse/dev.c文件中,FUSE通过为/dev/fuse设备文件注册以下操作回调来支持用户态的对其的读写操作:
const struct file_operations fuse_dev_operations = {.owner= THIS_MODULE,.llseek= no_llseek, /* 不支持seek操作 */.read= do_sync_read, /* 使用通用的同步读函数 */.aio_read= fuse_dev_read, /* fuse为用户态读取提供的异步函数 */.write= do_sync_write, /* 使用通用的同步写函授 */.aio_write= fuse_dev_write, /* fuse为用户态读取提供的异步函授 */.poll= fuse_dev_poll, /* 检查是否在一个文件上有操作发生,如果没有则睡眠,直到该文件上有操作发生*/.release= fuse_dev_release, /* 用户态close该设备文件对应的fd */.fasync= fuse_dev_fasync, /* 通过信号来启用或禁止I/O事件通告*/};
其中,do_sync_read中,调用了ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos),同样do_sync_write函数中,也调用了ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos),所以他们不用单独实现。
在FUSE内核中,存在一个fuse_conn结构体,为用户态、内核态通信服务,其结构为:
/** * A Fuse connection. * * This structure is created, when the filesystem is mounted, and is * destroyed, when the client device is closed and the filesystem is * unmounted. */struct fuse_conn {/** Lock protecting accessess to members of this structure */spinlock_t lock;/** Mutex protecting against directory alias creation */struct mutex inst_mutex;/** Refcount 结构体的引用计数*/atomic_t count;/** The user id for this mount 用户ID*/uid_t user_id;/** The group id for this mount 组ID*/gid_t group_id;/** The fuse mount flags for this mount 挂载参数*/unsigned flags;/** Maximum read size 最大读取字节数*/unsigned max_read;/** Maximum write size 最大写入字节数*/unsigned max_write;/** Readers of the connection are waiting on this 读取请求的等待队列*/wait_queue_head_t waitq;/** The list of pending requests 正在等待的队列*/struct list_head pending;/** The list of requests being processed 正在处理的队列*/struct list_head processing;/** The list of requests under I/O 正在进行IO操作的队列*/struct list_head io;/** The next unique kernel file handle */u64 khctr;/** rbtree of fuse_files waiting for poll events indexed by ph */struct rb_root polled_files;/** Maximum number of outstanding background requests 最大后台请求数*/unsigned max_background;/** Number of background requests at which congestion starts */unsigned congestion_threshold;/** Number of requests currently in the background 后台请求数*/unsigned num_background;/** Number of background requests currently queued for userspace 正在执行的后台请求数*/unsigned active_background;/** The list of background requests set aside for later queuing */struct list_head bg_queue;/** Pending interrupts 中断请求队列*/struct list_head interrupts;/** Flag indicating if connection is blocked. This will be the case before the INIT reply is received, and if there are too many outstading backgrounds requests 阻塞标志*/int blocked;/** waitq for blocked connection 阻塞等待队列*/wait_queue_head_t blocked_waitq;/** waitq for reserved requests 等待服务的队列*/wait_queue_head_t reserved_req_waitq;/** The next unique request id */u64 reqctr;/** Connection established, cleared on umount, connection abort and device release 连接标志*/unsigned connected;/** Connection failed (version mismatch). Cannot race with setting other bitfields since it is only set once in INIT reply, before any other request, and never cleared */unsigned conn_error:1;/** Connection successful. Only set in INIT */unsigned conn_init:1;/** Do readpages asynchronously? Only set in INIT */unsigned async_read:1;/** Do not send separate SETATTR request before open(O_TRUNC) */unsigned atomic_o_trunc:1;/** Filesystem supports NFS exporting. Only set in INIT */unsigned export_support:1;/** Set if bdi is valid */unsigned bdi_initialized:1;/* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction *//** Is fsync not implemented by fs? */unsigned no_fsync:1;/** Is fsyncdir not implemented by fs? */unsigned no_fsyncdir:1;/** Is flush not implemented by fs? */unsigned no_flush:1;/** Is setxattr not implemented by fs? */unsigned no_setxattr:1;/** Is getxattr not implemented by fs? */unsigned no_getxattr:1;/** Is listxattr not implemented by fs? */unsigned no_listxattr:1;/** Is removexattr not implemented by fs? */unsigned no_removexattr:1;/** Are file locking primitives not implemented by fs? */unsigned no_lock:1;/** Is access not implemented by fs? */unsigned no_access:1;/** Is create not implemented by fs? */unsigned no_create:1;/** Is interrupt not implemented by fs? */unsigned no_interrupt:1;/** Is bmap not implemented by fs? */unsigned no_bmap:1;/** Is poll not implemented by fs? */unsigned no_poll:1;/** Do multi-page cached writes */unsigned big_writes:1;/** Don't apply umask to creation modes */unsigned dont_mask:1;/** The number of requests waiting for completion */atomic_t num_waiting;/** Negotiated minor version */unsigned minor;/** Backing dev info */struct backing_dev_info bdi;/** Entry on the fuse_conn_list */struct list_head entry;/** Device ID from super block 超级块的设备id*/dev_t dev;/** Dentries in the control filesystem */struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];/** number of dentries used in the above array */int ctl_ndents;/** O_ASYNC requests */struct fasync_struct *fasync;/** Key for lock owner ID scrambling */u32 scramble_key[4];/** Reserved request for the DESTROY message */struct fuse_req *destroy_req;/** Version counter for attribute changes 文件属性的版本*/u64 attr_version;/** Called on final put */void (*release)(struct fuse_conn *);/** Super block for this connection. */struct super_block *sb;/** Read/write semaphore to hold when accessing sb. 访问超级块的信号量*/struct rw_semaphore killsb;};
fuse_conn结构体的指针将会保存在file->private_data中,每次内核态向用户态发送情求时都会用到fuse_conn结构体。在fuse_dev_read函数的处理流程主要入下:
static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos){//省略变量定义struct fuse_conn *fc = fuse_get_conn(file); /* 获得fuse_conn结构体的指针 */if (!fc)return -EPERM; restart:spin_lock(&fc->lock);err = -EAGAIN;if ((file->f_flags & O_NONBLOCK) && fc->connected && !request_pending(fc)) //如果是非阻塞方式,则判断队列中有无等待处理请求,无请求则直接返回goto err_unlock;request_wait(fc); //阻塞等待内核态的请求到了 ......if (!list_empty(&fc->interrupts)) { //判断是否有中断请求需要发送,有则先发中断请求req = list_entry(fc->interrupts.next, struct fuse_req, intr_entry);return fuse_read_interrupt(fc, req, iov, nr_segs);}req = list_entry(fc->pending.next, struct fuse_req, list); //从pending队列中获得下一个要发生的请求req->state = FUSE_REQ_READING;list_move(&req->list, &fc->io); //将请求移动到正在进行IO的队列中in = &req->in;reqsize = in->h.len;/* If request is too large, reply with an error and restart the read */........spin_unlock(&fc->lock);fuse_copy_init(&cs, fc, 1, req, iov, nr_segs); //为将请求拷贝到用户态做准备err = fuse_copy_one(&cs, &in->h, sizeof(in->h)); //将请求的包头拷贝到用户态if (!err)err = fuse_copy_args(&cs, in->numargs, in->argpages, (struct fuse_arg *) in->args, 0); //将请求的包体拷贝到用户态fuse_copy_finish(&cs); //完成拷贝,释放内存spin_lock(&fc->lock);req->locked = 0;//对发送过程进行错误判断,省略....if (!req->isreply) //如果没有返回值,则结束请求request_end(fc, req);else {req->state = FUSE_REQ_SENT; //如果这个请求需要用户态返回执行结果list_move_tail(&req->list, &fc->processing); //则将请求转到processing队列中,交给fuse_dev_write来处理if (req->interrupted)queue_interrupt(fc, req);spin_unlock(&fc->lock);}return reqsize; err_unlock:spin_unlock(&fc->lock);return err;}