Linux块设备驱动(四)————块设备的数据结构与相关操作及I/O调度器
来源:互联网 发布:金山软件大厦 编辑:程序博客网 时间:2024/06/06 05:38
0、数据从内存到磁盘的过程
内存是一个线性的结构,Linux系统将内存分为页。一页最大可以是64KB,但是目前主流的系统页的大小都是4KB。每一页的数据会被先封装成一个段,用bio_vec表示。多个页会被封装成多个段,这些段被组成以一个bio_vec为元素的数组,这个数组用bio_io_vec表示。
bio_io_vec是bio中的一个指针。一个或者多个bio会组成一个request请求描述符。request将被连接到请求队列request_queue中,或者被合并到已经有的请求队列request_queue已有的request中。合并的条件是两个相邻的request请求所表示的扇区位置相邻。最后这个请求队列被处理,将数据写入磁盘。
1、块I/O请求(bio)
数据从内存到磁盘或者从磁盘到内存的过程,叫做I/O操作。内核使用一个核心数据结构来描述I/O操作。bio结构包含了一个段的数据(bio_io_vec),这个段的数据就是要操作的数据。
/* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) */struct bio { /*要传送的第一个扇区*/ sector_t bi_sector; /* device address in 512 byte sectors */ /*下一个扇区*/ struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; /*bio对应的块设备*/ unsigned long bi_flags; /* status, command, etc */ unsigned long bi_rw; /* bottom bits READ/WRITE, * top bits priority */ unsigned short bi_vcnt; /* how many bio_vec's */ unsigned short bi_idx; /* current index into bvl_vec */ /* Number of segments in this BIO after * physical address coalescing is performed. */ unsigned short bi_phys_segments; /* Number of segments after physical and DMA remapping * hardware coalescing is performed. */ unsigned short bi_hw_segments; unsigned int bi_size; /* residual I/O count */ /* * To keep track of the max hw size, we account for the * sizes of the first and last virtually mergeable segments * in this bio */ unsigned int bi_hw_front_size; unsigned int bi_hw_back_size; unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ struct bio_vec *bi_io_vec; /* the actual vec list */ bio_end_io_t *bi_end_io; atomic_t bi_cnt; /* pin count */ void *bi_private; bio_destructor_t *bi_destructor; /* destructor */};
与bio相关的宏
/*于获取目前的页指针*/ bio_page(bio) /*用于获取目前的页的偏移*/bio_offset(bio) bio_cur_sectors(bio)
2、请求结构(request)
几个连续的页面会组成一个bio结构,几个相邻的bio结构就会组成一个请求结构(request)。这样就不需要大幅度移动磁头了,节省了I/O操作的时间。
/* * try to put the fields that are referenced together in the same cacheline */ /*请求结构request*/struct request { struct list_head queuelist; /*请求队列request_queue链表*/ struct list_head donelist; request_queue_t *q; unsigned int cmd_flags; enum rq_cmd_type_bits cmd_type; /* Maintain bio traversal state for part by part I/O submission. * hard_* are block layer internals, no driver should touch them! */ /*要传送的第一个扇区号*/ sector_t sector; /* next sector to submit */ /*要传送的下一个扇区*/ sector_t hard_sector; /* next sector to complete */ unsigned long nr_sectors; /* no. of sectors left to submit */ unsigned long hard_nr_sectors; /* no. of sectors left to complete */ /* no. of sectors left to submit in the current segment */ unsigned int current_nr_sectors; /* no. of sectors left to complete in the current segment */ unsigned int hard_cur_sectors; struct bio *bio; /*指向第一个未完成的bio结构域*/ struct bio *biotail;/*请求链表中最后一个bio*/ struct hlist_node hash; /* merge hash */ /* * The rb_node is only used inside the io scheduler, requests * are pruned when moved to the dispatch queue. So let the * completion_data share space with the rb_node. */ union { struct rb_node rb_node; /* sort/lookup */ void *completion_data; }; /* * two pointers are available for the IO schedulers, if they need * more they have to dynamically allocate it. */ void *elevator_private; /*指向I/O调度器的私有数据1*/ void *elevator_private2;/*指向I/O调度器的私有数据2*/ struct gendisk *rq_disk; /*指向请求所指向的磁盘*/ unsigned long start_time; /* Number of scatter-gather DMA addr+len pairs after * physical address coalescing is performed. */ unsigned short nr_phys_segments;/*请求的物理段数*/ /* Number of scatter-gather addr+len pairs after * physical and DMA remapping hardware coalescing is performed. * This is the number of scatter-gather entries the driver * will actually have to deal with after DMA mapping is done. */ unsigned short nr_hw_segments; unsigned short ioprio; void *special; char *buffer; int tag; int errors; int ref_count; /* * when request is used as a packet command carrier */ unsigned int cmd_len; unsigned char cmd[BLK_MAX_CDB]; unsigned int data_len; unsigned int sense_len; void *data; void *sense; unsigned int timeout; int retries; /* * completion callback. */ rq_end_io_fn *end_io; void *end_io_data;};
3、请求队列(request_queue)
请求队列主要是用来连接对同一块设备的多个request请求结构。还包含块设备所支持的请求类型信息、请求的个数、段的大小、硬件扇区数等与设备相关的信息。
/*内核将请求队列request_queue设计为一个双向链表,链接request 请求*/struct request_queue{ /* * Together with queue_head for cacheline sharing */ struct list_head queue_head;/*连接到request结构, 表示待处理的请求*/ struct request *last_merge; elevator_t *elevator; /*电梯调度算法的指针*/ /* * the queue request freelist, one for reads and one for writes */ struct request_list rq;/*为分配请求描述符使用的数据结构*/ /*实现驱动程序处理请求的函数*/ request_fn_proc *request_fn; /*将一个新的request请求插入请求队列中的方法*/ make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unplug_fn *unplug_fn; merge_bvec_fn *merge_bvec_fn; issue_flush_fn *issue_flush_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; /* * Dispatch queue sorting */ sector_t end_sector; struct request *boundary_rq; /* * Auto-unplugging state */ struct timer_list unplug_timer; int unplug_thresh; /* After this many requests */ unsigned long unplug_delay; /* After this many jiffies */ struct work_struct unplug_work; struct backing_dev_info backing_dev_info; /* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. */ void *queuedata;/*指向块设备驱动程序的私有数据的指针*/ /* * queue needs bounce pages for pages above this limit */ unsigned long bounce_pfn; gfp_t bounce_gfp; /* * various queue flags, see QUEUE_* below */ unsigned long queue_flags; /* * protects queue structures from reentrancy. ->__queue_lock should * _never_ be used directly, it is queue private. always use * ->queue_lock. */ spinlock_t __queue_lock; spinlock_t *queue_lock; /* * queue kobject */ struct kobject kobj; /* * queue settings */ unsigned long nr_requests; /* Max # of requests */ unsigned int nr_congestion_on; unsigned int nr_congestion_off; unsigned int nr_batching; unsigned int max_sectors; unsigned int max_hw_sectors; unsigned short max_phys_segments; unsigned short max_hw_segments; unsigned short hardsect_size; unsigned int max_segment_size; unsigned long seg_boundary_mask; unsigned int dma_alignment; struct blk_queue_tag *queue_tags; unsigned int nr_sorted; unsigned int in_flight; /* * sg stuff */ unsigned int sg_timeout; unsigned int sg_reserved_size; int node;#ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace *blk_trace;#endif /* * reserved for flush operations */ unsigned int ordered, next_ordered, ordseq; int orderr, ordcolor; struct request pre_flush_rq, bar_rq, post_flush_rq; struct request *orig_bar_rq; unsigned int bi_size; struct mutex sysfs_lock;};
4、总结
请求队列(request_queue)、请求结构(request)、bio等之间的关系
5、四种调度算法(电梯算法)
内核需要一种调度,使物理相邻的请求尽可能先后执行,这样就可以减少寻找扇区的时间,这种调度就叫做I/O调度。
预期算法(Anticipatory)、最后期限算法、CFQ完全公平队列算法、Noop无操作算法。
- Linux块设备驱动(四)————块设备的数据结构与相关操作及I/O调度器
- linux 块设备驱动(二)——块设备数据结构
- Linux块设备驱动(一)————块设备的结构及磁盘的结构
- Linux块设备驱动(二)————块设备的体系架构
- Linux块设备驱动(三)————块设备驱动程序的框架
- 块设备驱动注册和注销、加载与卸载、块设备驱动的I/O请求
- 块设备驱动注册和注销、加载与卸载、块设备驱动的I/O请求
- 块设备驱动注册和注销、加载与卸载、块设备驱动的I/O请求
- linux驱动学习--第二十四天:第十三章:Linux 块设备驱动(一):块设备的 I/O 操作特点 和 block_device_operations 结构体
- linux块设备驱动(一)——块设备概念介绍
- 块设备I/O调度程序
- 块设备I/O调度程序
- 块设备I/O调度程序
- [linux驱动]linux块设备学习笔记(四)——请求处理
- Linux块设备驱动(1)---块驱动中相关的结构体及其操作
- 【tips】flash,字符设备与块设备 I/O 操作的不同
- Linux块设备驱动(五)————通用块层
- Linux内核块设备I/O子系统
- 从0开始复习java(3)
- 小程序思维导图,让小程序不再难懂(二)
- 编程语言是部车,你选了其中哪辆?
- 【qscoj】喵哈哈村与哗啦啦村的大战(一)
- 使用ajax提交bootstrap表单数据
- Linux块设备驱动(四)————块设备的数据结构与相关操作及I/O调度器
- 小程序思维导图,让小程序不再难懂(一)
- ssh 事务小计
- 展示远程服务器文件夹内容的C++实现
- Github搭建个人网站并上传个人项目
- 一款不错的jQuery分页插件--pagination
- 表格的背景色
- 论文免费查重
- 简化代码量——写优雅代码