ext2_new_blocks解析

来源:互联网 发布:淘宝访客数公式 编辑:程序博客网 时间:2024/04/27 18:39

        前面我们在分析ext2_allocate_blocks()函数的时候说过,该函数的实现严重依赖ext2_new_blocks()。ext2_new_blocks()才是分配磁盘块的核心函数,这篇博客的目的就是分析该函数的实现。

        在分析这个函数之前,需要给出两点说明:1. 该函数不保证能分配所请求数量的磁盘块;2. 该函数保证分配的磁盘块一定连续。明白这两点对于理解该函数非常重要。

        让我们来看看这个函数的具体实现,这个函数会很长,而且与ext2预留窗口机制结合在一起,看起来可能会比较晦涩,请作好心理准备:

ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal,    unsigned long *count, int *errp){struct buffer_head *bitmap_bh = NULL;struct buffer_head *gdp_bh;int group_no;int goal_group;ext2_grpblk_t grp_target_blk;/* blockgroup relative goal block */ext2_grpblk_t grp_alloc_blk;/* blockgroup-relative allocated block*/ext2_fsblk_t ret_block;/* filesyetem-wide allocated block */int bgi;/* blockgroup iteration index */int performed_allocation = 0;ext2_grpblk_t free_blocks;/* number of free blocks in a group */struct super_block *sb;struct ext2_group_desc *gdp;struct ext2_super_block *es;struct ext2_sb_info *sbi;struct ext2_reserve_window_node *my_rsv = NULL;struct ext2_block_alloc_info *block_i;unsigned short windowsz = 0;unsigned long ngroups;unsigned long num = *count;int ret;*errp = -ENOSPC;sb = inode->i_sb;if (!sb) {printk("ext2_new_blocks: nonexistent device");return 0;}/* * Check quota for allocation of this block. */ret = dquot_alloc_block(inode, num);if (ret) {*errp = ret;return 0;}sbi = EXT2_SB(sb);es = EXT2_SB(sb)->s_es;ext2_debug("goal=%lu.\n", goal);/* * Allocate a block from reservation only when * filesystem is mounted with reservation(default,-o reservation), and * it's a regular file, and * the desired window size is greater than 0 (One could use ioctl * command EXT2_IOC_SETRSVSZ to set the window size to 0 to turn off * reservation on that particular file) */block_i = EXT2_I(inode)->i_block_alloc_info;if (block_i) {windowsz = block_i->rsv_window_node.rsv_goal_size;if (windowsz > 0)my_rsv = &block_i->rsv_window_node;}if (!ext2_has_free_blocks(sbi)) {*errp = -ENOSPC;goto out;}/* * First, test whether the goal block is free. *//* 其实是对调用者传入的goal作有效性检查** 如果无效,则将其设置为一个特定的值*/if (goal < le32_to_cpu(es->s_first_data_block) ||    goal >= le32_to_cpu(es->s_blocks_count))goal = le32_to_cpu(es->s_first_data_block);group_no = (goal - le32_to_cpu(es->s_first_data_block)) /EXT2_BLOCKS_PER_GROUP(sb);goal_group = group_no;retry_alloc:gdp = ext2_get_group_desc(sb, group_no, &gdp_bh);if (!gdp)goto io_error;free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);/* * if there is not enough free blocks to make a new resevation * turn off reservation for this allocation *//* 这个判断决定是否使用预分配机制:** 如果当前的块组中空闲块数量不足以分配一个新的预分配窗口** 并且该文件的预分配窗口尚未分配(如果之前已经有预分配窗口** 那么还得继续使用之前的预留分配窗口)** 那么就关闭预分配机制*/if (my_rsv && (free_blocks < windowsz)&& (free_blocks > 0)&& (rsv_is_empty(&my_rsv->rsv_window)))my_rsv = NULL;if (free_blocks > 0) {//@grp_target_blk is relative to @group_nogrp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %EXT2_BLOCKS_PER_GROUP(sb));bitmap_bh = read_block_bitmap(sb, group_no);if (!bitmap_bh)goto io_error;//key point//这个函数是分配的核心grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no,bitmap_bh, grp_target_blk,my_rsv, &num);if (grp_alloc_blk >= 0)goto allocated;}ngroups = EXT2_SB(sb)->s_groups_count;smp_rmb();/* * Now search the rest of the groups.  We assume that * group_no and gdp correctly point to the last group visited. */for (bgi = 0; bgi < ngroups; bgi++) {group_no++;if (group_no >= ngroups)group_no = 0;gdp = ext2_get_group_desc(sb, group_no, &gdp_bh);if (!gdp)goto io_error;free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);/* * skip this group (and avoid loading bitmap) if there * are no free blocks */if (!free_blocks)continue;/* * skip this group if the number of * free blocks is less than half of the reservation * window size. */if (my_rsv && (free_blocks <= (windowsz/2)))continue;brelse(bitmap_bh);bitmap_bh = read_block_bitmap(sb, group_no);if (!bitmap_bh)goto io_error;/* * try to allocate block(s) from this group, without a goal(-1). */grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no,bitmap_bh, -1, my_rsv, &num);if (grp_alloc_blk >= 0)goto allocated;}/* * We may end up a bogus ealier ENOSPC error due to * filesystem is "full" of reservations, but * there maybe indeed free blocks avaliable on disk * In this case, we just forget about the reservations * just do block allocation as without reservations. */if (my_rsv) {my_rsv = NULL;windowsz = 0;group_no = goal_group;goto retry_alloc;}/* No space left on the device */*errp = -ENOSPC;goto out;allocated:ext2_debug("using block group %d(%d)\n",group_no, gdp->bg_free_blocks_count);ret_block = grp_alloc_blk + ext2_group_first_block_no(sb, group_no);if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||    in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||    in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),      EXT2_SB(sb)->s_itb_per_group) ||    in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),      EXT2_SB(sb)->s_itb_per_group)) {ext2_error(sb, "ext2_new_blocks",    "Allocating block in system zone - "    "blocks from "E2FSBLK", length %lu",    ret_block, num);/* * ext2_try_to_allocate marked the blocks we allocated as in * use.  So we may want to selectively mark some of the blocks * as free */goto retry_alloc;}performed_allocation = 1;if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {ext2_error(sb, "ext2_new_blocks",    "block("E2FSBLK") >= blocks count(%d) - "    "block_group = %d, es == %p ", ret_block,le32_to_cpu(es->s_blocks_count), group_no, es);goto out;}group_adjust_blocks(sb, group_no, gdp, gdp_bh, -num);percpu_counter_sub(&sbi->s_freeblocks_counter, num);mark_buffer_dirty(bitmap_bh);if (sb->s_flags & MS_SYNCHRONOUS)sync_dirty_buffer(bitmap_bh);*errp = 0;brelse(bitmap_bh);dquot_free_block_nodirty(inode, *count-num);mark_inode_dirty(inode);*count = num;return ret_block;io_error:*errp = -EIO;out:/* * Undo the block allocation */if (!performed_allocation) {dquot_free_block_nodirty(inode, *count);mark_inode_dirty(inode);}brelse(bitmap_bh);return 0;}
这个函数真TMD的长,照例,让我们先看看该函数的参数以及返回值情况:

  1. inode:代表该需要分配磁盘块的文件;
  2. goal:代表调用者建议最好从哪开始分配,以达到文件数据块的最佳连续性;
  3. count:调用者传入的欲分配的磁盘块数量,同时这个值也作为返回值告知调用者时机分配的磁盘块数量;
  4. errorp:该函数执行过程中可能产生的 错误。
函数的返回值为分配到的起始物理磁盘块号,*count则记录了本次共分配了多少个物理磁盘块。
接下来我们就来研究下该函数的具体实现原理。
        要来分析这个函数真是痛苦,如果不参和ext2的预留窗口机制倒也还好,但现在预留窗口也杂糅其中,给分析和理解带来了不少的困难。其实我的本意是将ext2的预留窗口在另一篇博客中作较为详尽的分析。不管这么多了,既来之,则安之吧,所以各位,如果你想更好地理解下面的内容,建议你先阅读ext2预留窗口的相关内容吧。
        我们知道,ext2文件系统将磁盘划分成大小相等的块组,每个块组有inode区,位图区以及数据块区,这样做的目的是尽量将相关文件(如相同目录下的)和文件的数据块存放在一起,以提升文件IO效率。
        其实说起来,这个函数的想法比较简单,那就是优先从建议块所在的块组分配磁盘块,如果这个块组中无法分配到我们想要的磁盘块,那么就从该块组的下一个块组开始分配,采用这种循环遍历的方式来分配,直到在某个块组中我们如愿分配到磁盘块,或者遍历一圈所有的块组下来,我们发现还是无法分配到我们想要的数量的磁盘块,那么我们只能降低条件进行第二轮分配,可能第一轮中我们想要分配10个连续磁盘块,那么第二轮我们可能降低要求,只要分配到5个连续磁盘块即可。
        整体思想其实就如上面描述的那样,比较简单,但是在实现的时候杂糅了预留窗口机制,所谓的预留窗口机制,意思是:每次分配的时候,ext2文件系统并不是根据块组内的位图从数据块区查找空闲磁盘块作为分配结果,而是优先从文件预留窗口中区分配,当然,初始的时候我们得为该文件初始化一个预留窗口并分配一定数量的磁盘块给该窗口,在分配过程中我们可能还需要扩充该预留窗口。当然,你也可以通过一定的手段来禁止预留窗口机制,比如通过ioctl来设置特定文件的预留窗口大小为0,或者在挂载的时候就不设置该挂载选项,此时分配的过程就变成了最传统的通过位图查找空闲磁盘块了。
        接下来让我们老老实实地看看代码,从代码中琢磨ext2分配磁盘块的思想吧。
retry_alloc:gdp = ext2_get_group_desc(sb, group_no, &gdp_bh);if (!gdp)goto io_error;free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);/* * if there is not enough free blocks to make a new resevation * turn off reservation for this allocation *//* 这个判断决定是否使用预分配机制:** 如果当前的块组中空闲块数量不足以分配一个新的预分配窗口** 并且该文件的预分配窗口尚未分配(如果之前已经有预分配窗口** 那么还得继续使用之前的预留分配窗口)** 那么就关闭预分配机制*/if (my_rsv && (free_blocks < windowsz)&& (free_blocks > 0)&& (rsv_is_empty(&my_rsv->rsv_window)))my_rsv = NULL;if (free_blocks > 0) {//@grp_target_blk is relative to @group_nogrp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %EXT2_BLOCKS_PER_GROUP(sb));bitmap_bh = read_block_bitmap(sb, group_no);if (!bitmap_bh)goto io_error;//key point//这个函数是分配的核心grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no,bitmap_bh, grp_target_blk,my_rsv, &num);if (grp_alloc_blk >= 0)goto allocated;}
 上面的这段代码,就是优先从goal所在的块组中分配磁盘块,首先计算该块组的空闲磁盘块数量,接下来有一个很重要的判断:如果该文件的预分配窗口为空(即还未使用预留窗口)并且当前空闲磁盘块数量少于预留窗口的大小,那么我们对该文件在这个块组中分配就不使用预留窗口(my_rsv = NULL),因为用了也白用,该块组没预留窗口需要的那么多的空闲磁盘块。接下来就是从该块组中去分配磁盘块,当然,这里的分配可能是从文件的预留窗口中分配或者从块组中直接分配,这个由函数ext2_try_to_allocate_with_rsv()来决定。
        如果上面分配成功了(无论分配了多少,是从预留窗口中分配还是块组中直接分配),那么无话可说,我们直接返回即可(goto allocated),但我这里有个疑问:只要块组的free_blocks > 0,这次分配岂不是一定会成功么?其实不然,假如该文件需要在块组中分配预留窗口的话,这个过程可能会失败,此时怎么办,直接返回在该块组分配失败么还是作其他处理?这个问题有待确认。
        无论怎么说,假如在优先块组中分配失败,那我们接下来的策略就是从该块组后面的块组开始,循环遍历每个块组,在该块组中尝试分配,这就是接下来这个for循环所要做的事情:
 /* Now search the rest of the groups.  We assume that * group_no and gdp correctly point to the last group visited. */for (bgi = 0; bgi < ngroups; bgi++) {group_no++;if (group_no >= ngroups)group_no = 0;gdp = ext2_get_group_desc(sb, group_no, &gdp_bh);if (!gdp)goto io_error;free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);/* * skip this group (and avoid loading bitmap) if there * are no free blocks */if (!free_blocks)continue;/* * skip this group if the number of * free blocks is less than half of the reservation * window size. */if (my_rsv && (free_blocks <= (windowsz/2)))continue;brelse(bitmap_bh);bitmap_bh = read_block_bitmap(sb, group_no);if (!bitmap_bh)goto io_error;/* * try to allocate block(s) from this group, without a goal(-1). */grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no,bitmap_bh, -1, my_rsv, &num);if (grp_alloc_blk >= 0)goto allocated;}/*
这段代码写的还是很清楚的,相信应该不用我作深入分析了,其分配过程基本上和前面一模一样。
        接下来程序走到这里,表示分配还没有成功,那看来我们就要降低标准了,前面我们在分配的时候文件的预留窗口有一定的大小,前面一圈都分配失败很可能就是因为每个块组中恰好都没有预留窗口所需的那么多的连续的磁盘块,因此,我们只能降低标准,将文件预留窗口大小设置为0,告诉分配者,我现在不要求什么预留窗口了,你有多少连续的就分配多少连续的吧。
if (my_rsv) {/* 降低标准无下限*/my_rsv = NULL;windowsz = 0;group_no = goal_group;goto retry_alloc;}
降低标准后再进行一轮的重试,如果还是分配不到哪怕一个磁盘块,那只能意味着真的没磁盘空间了,只能返回失败。
/* No space left on the device */*errp = -ENOSPC;goto out;
如果分配成功了,那真是一件值得开心的事情,我们需要把这个好消息告诉给调用者,
allocated:ext2_debug("using block group %d(%d)\n",group_no, gdp->bg_free_blocks_count);/* 磁盘块的起始块号计算: 分配成功的起始块号是相对块组而言的** 我们需要将其转化为相对文件系统起始块而言*/ret_block = grp_alloc_blk + ext2_group_first_block_no(sb, group_no);if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||    in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||    in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),      EXT2_SB(sb)->s_itb_per_group) ||    in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),      EXT2_SB(sb)->s_itb_per_group)) {ext2_error(sb, "ext2_new_blocks",    "Allocating block in system zone - "    "blocks from "E2FSBLK", length %lu",    ret_block, num);/* * ext2_try_to_allocate marked the blocks we allocated as in * use.  So we may want to selectively mark some of the blocks * as free */goto retry_alloc;}performed_allocation = 1;if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {ext2_error(sb, "ext2_new_blocks",    "block("E2FSBLK") >= blocks count(%d) - "    "block_group = %d, es == %p ", ret_block,le32_to_cpu(es->s_blocks_count), group_no, es);goto out;}group_adjust_blocks(sb, group_no, gdp, gdp_bh, -num);percpu_counter_sub(&sbi->s_freeblocks_counter, num);mark_buffer_dirty(bitmap_bh);if (sb->s_flags & MS_SYNCHRONOUS)sync_dirty_buffer(bitmap_bh);*errp = 0;brelse(bitmap_bh);dquot_free_block_nodirty(inode, *count-num);mark_inode_dirty(inode);/* 本次分配了num个空闲磁盘块*/*count = num;return ret_block;
分配成功的逻辑也很简单,无非就是设置下返回值,告诉调用者分配到的磁盘块从哪开始,有多少,如此而已,不用详述。
        因此,纵观这个分配函数,它也只是ext2_try_to_allocate_with_rsv()的复杂封装而已,所以,要深入理解分配过程,必须得啃下这个硬骨头,这也是我们下一篇博客所要征服的大山。