EXT2 file system ialloc.c with my comments

来源:互联网 发布:上瘾网络剧韩国 编辑:程序博客网 时间:2024/05/22 15:18
/* *  linux/fs/ext2/ialloc.c * * Copyright (C) 1992, 1993, 1994, 1995 * Remy Card (card@masi.ibp.fr) * Laboratoire MASI - Institut Blaise Pascal * Universite Pierre et Marie Curie (Paris VI) * *  BSD ufs-inspired inode and directory allocation by  *  Stephen Tweedie (sct@dcs.ed.ac.uk), 1993 *  Big-endian to little-endian byte-swapping/bitmaps by *        David S. Miller (davem@caip.rutgers.edu), 1995 */#include <linux/quotaops.h>#include <linux/sched.h>#include <linux/backing-dev.h>#include <linux/buffer_head.h>#include <linux/random.h>#include "ext2.h"#include "xattr.h"#include "acl.h"/* * ialloc.c contains the inodes allocation and deallocation routines *//* * The free inodes are managed by bitmaps.  A file system contains several * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap * block for inodes, N blocks for the inode table and data blocks. * * The file system contains group descriptors which are located after the * super block.  Each descriptor contains the number of the bitmap block and * the free blocks count in the block. *//* * Read the inode allocation bitmap for a given block_group, reading * into the specified slot in the superblock's bitmap cache. * * Return buffer_head of bitmap on success or NULL. *//*in this function ,the block group is already decided*/static struct buffer_head *read_inode_bitmap(struct super_block * sb, unsigned long block_group){struct ext2_group_desc *desc;struct buffer_head *bh = NULL;desc = ext2_get_group_desc(sb, block_group, NULL);if (!desc)goto error_out;/*this is the buffer head of the inode bitmap*/bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));if (!bh)ext2_error(sb, "read_inode_bitmap",    "Cannot read inode bitmap - "    "block_group = %lu, inode_bitmap = %u",    block_group, le32_to_cpu(desc->bg_inode_bitmap));error_out:return bh; /*return the buffer head*/}/*before this one is called, the bitmap is already cleaned, here is * is add the free inode count in bgd and if it is dir, increase other count*/static void ext2_release_inode(struct super_block *sb, int group, int dir){struct ext2_group_desc * desc;struct buffer_head *bh;desc = ext2_get_group_desc(sb, group, &bh);if (!desc) {ext2_error(sb, "ext2_release_inode","can't get descriptor for group %d", group);return;}/*lock the block group descriptor*/spin_lock(sb_bgl_lock(EXT2_SB(sb), group));/*add the free inode number of this block group*/le16_add_cpu(&desc->bg_free_inodes_count, 1);if (dir)/*if this is a directory inode, decreas the count for dir*/le16_add_cpu(&desc->bg_used_dirs_count, -1);spin_unlock(sb_bgl_lock(EXT2_SB(sb), group));if (dir)/*decrease the total directory numbers in the sb*/percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter);sb->s_dirt = 1;mark_buffer_dirty(bh); /*mark the page having this block group db as dirty*/}/* * NOTE! When we get the inode, we're the only people * that have access to it, and as such there are no * race conditions we have to worry about. The inode * is not on the hash-lists, and it cannot be reached * through the filesystem because the directory entry * has been deleted earlier. * * HOWEVER: we must make sure that we get no aliases, * which means that we have to call "clear_inode()" * _before_ we mark the inode not in use in the inode * bitmaps. Otherwise a newly created file might use * the same inode number (not actually the same pointer * though), and then we'd have two inodes sharing the * same inode number and space on the harddisk. */void ext2_free_inode (struct inode * inode){struct super_block * sb = inode->i_sb;int is_directory;unsigned long ino;struct buffer_head *bitmap_bh;unsigned long block_group;unsigned long bit;struct ext2_super_block * es;ino = inode->i_ino; /*the inode number*/ext2_debug ("freeing inode %lu\n", ino); /*for debug*//* * Note: we must free any quota before locking the superblock, * as writing the quota to disk may need the lock as well. *//* Quota is already initialized in iput() */ext2_xattr_delete_inode(inode);dquot_free_inode(inode);dquot_drop(inode);es = EXT2_SB(sb)->s_es;/*check if it is directory of not*/is_directory = S_ISDIR(inode->i_mode);if (ino < EXT2_FIRST_INO(sb) ||    ino > le32_to_cpu(es->s_inodes_count)) {ext2_error (sb, "ext2_free_inode",    "reserved or nonexistent inode %lu", ino);return;}/*get the inode block group number*/block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);/*which one in that block group*/bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);/*read the bitmap buffer head of that block group*/bitmap_bh = read_inode_bitmap(sb, block_group);if (!bitmap_bh)return;/* Ok, now we can actually update the inode bitmaps.. */if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group),bit, (void *) bitmap_bh->b_data))ext2_error (sb, "ext2_free_inode",      "bit already cleared for inode %lu", ino);else/*the bitmap is already cleaned, and now, deal with the couts*/ext2_release_inode(sb, block_group, is_directory);mark_buffer_dirty(bitmap_bh);if (sb->s_flags & MS_SYNCHRONOUS)sync_dirty_buffer(bitmap_bh); /*should be synced right now*/brelse(bitmap_bh);}/* * We perform asynchronous prereading of the new inode's inode block when * we create the inode, in the expectation that the inode will be written * back soon.  There are two reasons: * * - When creating a large number of files, the async prereads will be *   nicely merged into large reads * - When writing out a large number of inodes, we don't need to keep on *   stalling the writes while we read the inode block. * * FIXME: ext2_get_group_desc() needs to be simplified. */static void ext2_preread_inode(struct inode *inode){unsigned long block_group;unsigned long offset;unsigned long block;struct ext2_group_desc * gdp;struct backing_dev_info *bdi;/*backing device information*/bdi = inode->i_mapping->backing_dev_info;/*check the device condition*/if (bdi_read_congested(bdi))return;if (bdi_write_congested(bdi))return;block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL);if (gdp == NULL)return;/* * Figure out the offset within the block group inode table *//*the total offset from first inode table*/offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *EXT2_INODE_SIZE(inode->i_sb);   /*which block in logic address contain the inode in the table*/block = le32_to_cpu(gdp->bg_inode_table) +(offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));sb_breadahead(inode->i_sb, block);}/* * There are two policies for allocating an inode.  If the new inode is * a directory, then a forward search is made for a block group with both * free space and a low directory-to-inode ratio; if that fails, then of * the groups with above-average free space, that group with the fewest * directories already is chosen. * * For other inodes, search forward from the parent directory\'s block * group to find a free inode. *//*for the new created file, we have the parent inode, and sb*/static int find_group_dir(struct super_block *sb, struct inode *parent){int ngroups = EXT2_SB(sb)->s_groups_count; /*the number of block groups*/int avefreei = ext2_count_free_inodes(sb) / ngroups; /*the average free inode of each bg*/struct ext2_group_desc *desc, *best_desc = NULL;int group, best_group = -1;/*check one by one*/for (group = 0; group < ngroups; group++) {desc = ext2_get_group_desc (sb, group, NULL);if (!desc || !desc->bg_free_inodes_count) /*no free inode or des is not get, pass*/continue;if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) /*free inode less than average,pass*/continue;/*there must be at least one larger than average, choose the largest one to put*/if (!best_desc ||     (le16_to_cpu(desc->bg_free_blocks_count) >     le16_to_cpu(best_desc->bg_free_blocks_count))) {best_group = group;best_desc = desc;}}if (!best_desc) /*if all full, return fail*/return -1;return best_group; /*return the best group number*/}/*  * Orlov's allocator for directories.  *  * We always try to spread first-level directories. * * If there are blockgroups with both free inodes and free blocks counts  * not worse than average we return one with smallest directory count.  * Otherwise we simply return a random group.  *  * For the rest rules look so:  *  * It's OK to put directory into a group unless  * it has too many directories already (max_dirs) or  * it has too few free inodes left (min_inodes) or  * it has too few free blocks left (min_blocks) or  * it's already running too large debt (max_debt).  * Parent's group is preferred, if it doesn't satisfy these  * conditions we search cyclically through the rest. If none  * of the groups look good we just look for a group with more  * free inodes than average (starting at parent's group).  *  * Debt is incremented each time we allocate a directory and decremented  * when we allocate an inode, within 0--255.  */ #define INODE_COST 64#define BLOCK_COST 256static int find_group_orlov(struct super_block *sb, struct inode *parent){int parent_group = EXT2_I(parent)->i_block_group; /*het parent group*/struct ext2_sb_info *sbi = EXT2_SB(sb); /*sb information block*/struct ext2_super_block *es = sbi->s_es;  /*sb pointer in disk*/int ngroups = sbi->s_groups_count; int inodes_per_group = EXT2_INODES_PER_GROUP(sb); /*inode number per group*/int freei;int avefreei;int free_blocks;int avefreeb;int blocks_per_dir;int ndirs;int max_debt, max_dirs, min_blocks, min_inodes;int group = -1, i;struct ext2_group_desc *desc;freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);avefreei = freei / ngroups; /*average free inode of this fs*/free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);avefreeb = free_blocks / ngroups;  /*average free blocks of each bg*/ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);/*the total dir numbers of this fs*/if ((parent == sb->s_root->d_inode) ||    (EXT2_I(parent)->i_flags & EXT2_TOPDIR_FL)) {/*if parent is the root*/struct ext2_group_desc *best_desc = NULL;int best_ndir = inodes_per_group;int best_group = -1;/*it is used for random select, because it is root, any one is ok*/get_random_bytes(&group, sizeof(group));parent_group = (unsigned)group % ngroups; /*start from the random as parent*/for (i = 0; i < ngroups; i++) {group = (parent_group + i) % ngroups; /*start with the same bg*/desc = ext2_get_group_desc (sb, group, NULL);if (!desc || !desc->bg_free_inodes_count) /*no free inode, pass*/continue;if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) /*no free inode for dir, pass*/continue;if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) /*free inode less than average, pass*/continue;if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) /*free blocks less than average, pass*/continue;best_group = group;best_ndir = le16_to_cpu(desc->bg_used_dirs_count);best_desc = desc;}if (best_group >= 0) {desc = best_desc;group = best_group;goto found;}goto fallback;}if (ndirs == 0)ndirs = 1;/* percpu_counters are approximate... *//*the block-dir ratio*/blocks_per_dir = (le32_to_cpu(es->s_blocks_count)-free_blocks) / ndirs;max_dirs = ndirs / ngroups + inodes_per_group / 16;min_inodes = avefreei - inodes_per_group / 4;min_blocks = avefreeb - EXT2_BLOCKS_PER_GROUP(sb) / 4;max_debt = EXT2_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST);if (max_debt * INODE_COST > inodes_per_group)max_debt = inodes_per_group / INODE_COST;if (max_debt > 255)max_debt = 255;if (max_debt == 0)max_debt = 1;for (i = 0; i < ngroups; i++) {group = (parent_group + i) % ngroups;desc = ext2_get_group_desc (sb, group, NULL);if (!desc || !desc->bg_free_inodes_count)continue;if (sbi->s_debts[group] >= max_debt)continue;if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)continue;if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)continue;if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)  /*at least one will satisfy*/continue;goto found;}fallback:for (i = 0; i < ngroups; i++) {group = (parent_group + i) % ngroups;desc = ext2_get_group_desc (sb, group, NULL);if (!desc || !desc->bg_free_inodes_count)continue;if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)  /*if no one pass, select the inode more one*/goto found;}if (avefreei) {/* * The free-inodes counter is approximate, and for really small * filesystems the above test can fail to find any blockgroups */avefreei = 0;goto fallback;}return -1;found:return group;}/*for regular file inode allocation*/static int find_group_other(struct super_block *sb, struct inode *parent){int parent_group = EXT2_I(parent)->i_block_group;int ngroups = EXT2_SB(sb)->s_groups_count;struct ext2_group_desc *desc;int group, i;/* * Try to place the inode in its parent directory */group = parent_group;desc = ext2_get_group_desc (sb, group, NULL);if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&le16_to_cpu(desc->bg_free_blocks_count))/*if free blocks are enough in the parent directory, get it*/goto found;/* * We're going to place this inode in a different blockgroup from its * parent.  We want to cause files in a common directory to all land in * the same blockgroup.  But we want files which are in a different * directory which shares a blockgroup with our parent to land in a * different blockgroup. * * So add our directory's i_ino into the starting point for the hash. */group = (group + parent->i_ino) % ngroups;/* * Use a quadratic hash to find a group with a free inode and some * free blocks. *//*i guess here just to balance the inode to other block group so that  * we want files which are in a different * directory which shares a blockgroup with our parent to land in a * different blockgroup.*/for (i = 1; i < ngroups; i <<= 1) {group += i;if (group >= ngroups)group -= ngroups;desc = ext2_get_group_desc (sb, group, NULL);if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&le16_to_cpu(desc->bg_free_blocks_count))goto found;}/* * That failed: try linear search for a free inode, even if that group * has no free blocks. */group = parent_group;for (i = 0; i < ngroups; i++) {if (++group >= ngroups)group = 0;desc = ext2_get_group_desc (sb, group, NULL);if (desc && le16_to_cpu(desc->bg_free_inodes_count))goto found;}return -1;found:return group;}/*this is for creating s new inode, including allocate it to one bg*/struct inode *ext2_new_inode(struct inode *dir, int mode,     const struct qstr *qstr){struct super_block *sb;struct buffer_head *bitmap_bh = NULL;struct buffer_head *bh2;int group, i;ino_t ino = 0;struct inode * inode;struct ext2_group_desc *gdp;struct ext2_super_block *es;struct ext2_inode_info *ei;struct ext2_sb_info *sbi;int err;sb = dir->i_sb;inode = new_inode(sb);   /*follow the parent dir fs to create inode*/if (!inode)return ERR_PTR(-ENOMEM);ei = EXT2_I(inode);   /*pointer to that inode*/sbi = EXT2_SB(sb); /*pointer to that sb info*/es = sbi->s_es;if (S_ISDIR(mode)) {/*for directory situation*/if (test_opt(sb, OLDALLOC))/*if use old allcation policy just find one with enough space is ok*/group = find_group_dir(sb, dir);else/*other situation use orlov polity*/group = find_group_orlov(sb, dir);} else /*for the regular file allocation*/group = find_group_other(sb, dir);if (group == -1) {err = -ENOSPC;goto fail;}/*get the buffer head of that block group?*/for (i = 0; i < sbi->s_groups_count; i++) {gdp = ext2_get_group_desc(sb, group, &bh2);brelse(bitmap_bh);bitmap_bh = read_inode_bitmap(sb, group);if (!bitmap_bh) {err = -EIO;goto fail;}ino = 0;repeat_in_this_group:/*the next 0 bit of the block group bit map*/ino = ext2_find_next_zero_bit((unsigned long *)bitmap_bh->b_data,      EXT2_INODES_PER_GROUP(sb), ino);if (ino >= EXT2_INODES_PER_GROUP(sb)) {/* * Rare race: find_group_xx() decided that there were * free inodes in this group, but by the time we tried * to allocate one, they're all gone.  This can also * occur because the counters which find_group_orlov() * uses are approximate.  So just go and search the * next block group. */if (++group == sbi->s_groups_count)group = 0;/*if the one decide with no free inode, try next one*/continue;}if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group),ino, bitmap_bh->b_data)) {/*if set the bit successed, go to got, if failed it might be used, do it again*//* we lost this inode */if (++ino >= EXT2_INODES_PER_GROUP(sb)) {/* this group is exhausted, try next group */if (++group == sbi->s_groups_count)group = 0;continue;}/* try to find free inode in the same group */goto repeat_in_this_group;}goto got;}/* * Scanned all blockgroups. */err = -ENOSPC;goto fail;got:mark_buffer_dirty(bitmap_bh); /*mark the buffer*/if (sb->s_flags & MS_SYNCHRONOUS)sync_dirty_buffer(bitmap_bh /*check if it should be done right now*/brelse(bitmap_bh);ino += group * EXT2_INODES_PER_GROUP(sb) + 1; /*the total inode number in the fs(not in this bg)*//*check the location*/if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {ext2_error (sb, "ext2_new_inode",    "reserved inode or inode > inodes count - "    "block_group = %d,inode=%lu", group,    (unsigned long) ino);err = -EIO;goto fail;}/*increase the freeinode in the sb*/percpu_counter_add(&sbi->s_freeinodes_counter, -1);if (S_ISDIR(mode))/*if it is directory, increase the directory counter*/percpu_counter_inc(&sbi->s_dirs_counter);spin_lock(sb_bgl_lock(sbi, group));le16_add_cpu(&gdp->bg_free_inodes_count, -1); /*for group, it should be locked*/if (S_ISDIR(mode)) {/*if it is directory, increase the debts, if not reduce the debt*/if (sbi->s_debts[group] < 255)sbi->s_debts[group]++;le16_add_cpu(&gdp->bg_used_dirs_count, 1);} else {if (sbi->s_debts[group])sbi->s_debts[group]--;}spin_unlock(sb_bgl_lock(sbi, group));sb->s_dirt = 1;mark_buffer_dirty(bh2);/*for the following, initiate the inode value*/if (test_opt(sb, GRPID)) {inode->i_mode = mode;inode->i_uid = current_fsuid();inode->i_gid = dir->i_gid;} elseinode_init_owner(inode, dir, mode);inode->i_ino = ino;inode->i_blocks = 0;inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;memset(ei->i_data, 0, sizeof(ei->i_data));ei->i_flags =ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED);ei->i_faddr = 0;ei->i_frag_no = 0;ei->i_frag_size = 0;ei->i_file_acl = 0;ei->i_dir_acl = 0;ei->i_dtime = 0;ei->i_block_alloc_info = NULL;ei->i_block_group = group;ei->i_dir_start_lookup = 0;ei->i_state = EXT2_STATE_NEW;ext2_set_inode_flags(inode);spin_lock(&sbi->s_next_gen_lock);inode->i_generation = sbi->s_next_generation++;spin_unlock(&sbi->s_next_gen_lock);if (insert_inode_locked(inode) < 0) {err = -EINVAL;goto fail_drop;}dquot_initialize(inode);err = dquot_alloc_inode(inode);/*remove all the quata of the inode*/if (err)goto fail_drop;err = ext2_init_acl(inode, dir);if (err)goto fail_free_drop;err = ext2_init_security(inode, dir, qstr);if (err)goto fail_free_drop;mark_inode_dirty(inode); /*waiting to be flushed back*/ext2_debug("allocating inode %lu\n", inode->i_ino);ext2_preread_inode(inode);return inode;fail_free_drop:dquot_free_inode(inode);fail_drop:dquot_drop(inode);inode->i_flags |= S_NOQUOTA;clear_nlink(inode);unlock_new_inode(inode);iput(inode);return ERR_PTR(err);fail:make_bad_inode(inode);iput(inode);return ERR_PTR(err);}unsigned long ext2_count_free_inodes (struct super_block * sb){struct ext2_group_desc *desc;unsigned long desc_count = 0;int i;#ifdef EXT2FS_DEBUGstruct ext2_super_block *es;unsigned long bitmap_count = 0;struct buffer_head *bitmap_bh = NULL;es = EXT2_SB(sb)->s_es;/*from one to the next one to get the cout*/for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {unsigned x;desc = ext2_get_group_desc (sb, i, NULL);if (!desc)continue;desc_count += le16_to_cpu(desc->bg_free_inodes_count);/*no matter it is null or not ,release it*/brelse(bitmap_bh);bitmap_bh = read_inode_bitmap(sb, i);if (!bitmap_bh)continue;x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);printk("group %d: stored = %d, counted = %u\n",i, le16_to_cpu(desc->bg_free_inodes_count), x);bitmap_count += x;}brelse(bitmap_bh);printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter),desc_count, bitmap_count);return desc_count;#else/*this part is for real runnign, the part above is for debug*/for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {desc = ext2_get_group_desc (sb, i, NULL);if (!desc)continue;desc_count += le16_to_cpu(desc->bg_free_inodes_count);}return desc_count;#endif}/* Called at mount-time, super-block is locked *//*count the total dirs in the system*/unsigned long ext2_count_dirs (struct super_block * sb){unsigned long count = 0;int i;for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {struct ext2_group_desc *gdp = ext2_get_group_desc (sb, i, NULL);if (!gdp)continue;count += le16_to_cpu(gdp->bg_used_dirs_count);}return count;}

0 0
原创粉丝点击