crush 核心函数 crush_do_rule

来源:互联网 发布:社会网络的同义词 编辑:程序博客网 时间:2024/06/06 02:30

crush 核心函数

crush_do_rule
位置crush/mapper.c

重要过程

  • crush_do_rule: do crushrules iteratively
  • crush_choose_firstn: choose buckets or devices of specified type recursively
  • crush_bucket_choose: directly choose a son of the input bucket

步骤操作标志

/* step op codes */enum {    CRUSH_RULE_NOOP = 0,    CRUSH_RULE_TAKE = 1,          /* arg1 = value to start with */    CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */                      /* arg2 = type */    CRUSH_RULE_CHOOSE_INDEP = 3,  /* same */    CRUSH_RULE_EMIT = 4,          /* no args */    CRUSH_RULE_CHOOSELEAF_FIRSTN = 6,    CRUSH_RULE_CHOOSELEAF_INDEP = 7,    CRUSH_RULE_SET_CHOOSE_TRIES = 8, /* override choose_total_tries */    CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */    CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10,    CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11,    CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12,    CRUSH_RULE_SET_CHOOSELEAF_STABLE = 13};

最核心的函数

/** * crush_do_rule - calculate a mapping with the given input and rule * @map: the crush_map * @ruleno: the rule id * @x: hash input * @result: pointer to result vector * @result_max: maximum result size * @weight: weight vector (for map leaves) * @weight_max: size of weight vector * @scratch: scratch vector for private use; must be >= 3 * result_max */int crush_do_rule(const struct crush_map *map,          int ruleno, int x, int *result, int result_max,          const __u32 *weight, int weight_max,          int *scratch){    int result_len;    int *a = scratch;    int *b = scratch + result_max;    int *c = scratch + result_max*2;    int recurse_to_leaf;    int *w;    int wsize = 0;    int *o;    int osize;    int *tmp;    struct crush_rule *rule;    __u32 step;    int i, j;    int numrep;    int out_size;    /*     * the original choose_total_tries value was off by one (it     * counted "retries" and not "tries").  add one.     */    int choose_tries = map->choose_total_tries + 1;    int choose_leaf_tries = 0;    /*     * the local tries values were counted as "retries", though,     * and need no adjustment     */    int choose_local_retries = map->choose_local_tries;    int choose_local_fallback_retries = map->choose_local_fallback_tries;    int vary_r = map->chooseleaf_vary_r;    int stable = map->chooseleaf_stable;    if ((__u32)ruleno >= map->max_rules) {        dprintk(" bad ruleno %d\n", ruleno);        return 0;    }    rule = map->rules[ruleno];    result_len = 0;    w = a;    o = b;    for (step = 0; step < rule->len; step++) {        int firstn = 0;        struct crush_rule_step *curstep = &rule->steps[step];        switch (curstep->op) {        case CRUSH_RULE_TAKE:            if ((curstep->arg1 >= 0 &&                 curstep->arg1 < map->max_devices) ||                (-1-curstep->arg1 >= 0 &&                 -1-curstep->arg1 < map->max_buckets &&                 map->buckets[-1-curstep->arg1])) {                w[0] = curstep->arg1;                wsize = 1;            } else {                dprintk(" bad take value %d\n", curstep->arg1);            }            break;        case CRUSH_RULE_SET_CHOOSE_TRIES:            if (curstep->arg1 > 0)                choose_tries = curstep->arg1;            break;        case CRUSH_RULE_SET_CHOOSELEAF_TRIES:            if (curstep->arg1 > 0)                choose_leaf_tries = curstep->arg1;            break;        case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:            if (curstep->arg1 >= 0)                choose_local_retries = curstep->arg1;            break;        case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:            if (curstep->arg1 >= 0)                choose_local_fallback_retries = curstep->arg1;            break;        case CRUSH_RULE_SET_CHOOSELEAF_VARY_R:            if (curstep->arg1 >= 0)                vary_r = curstep->arg1;            break;        case CRUSH_RULE_SET_CHOOSELEAF_STABLE:            if (curstep->arg1 >= 0)                stable = curstep->arg1;            break;        case CRUSH_RULE_CHOOSELEAF_FIRSTN:        case CRUSH_RULE_CHOOSE_FIRSTN:            firstn = 1;            /* fall through */        case CRUSH_RULE_CHOOSELEAF_INDEP:        case CRUSH_RULE_CHOOSE_INDEP:            if (wsize == 0)                break;            recurse_to_leaf =                curstep->op ==                 CRUSH_RULE_CHOOSELEAF_FIRSTN ||                curstep->op ==                CRUSH_RULE_CHOOSELEAF_INDEP;            /* reset output */            osize = 0;            for (i = 0; i < wsize; i++) {                int bno;                /*                 * see CRUSH_N, CRUSH_N_MINUS macros.                 * basically, numrep <= 0 means relative to                 * the provided result_max                 */                numrep = curstep->arg1;                if (numrep <= 0) {                    numrep += result_max;                    if (numrep <= 0)                        continue;                }                j = 0;                /* make sure bucket id is valid */                bno = -1 - w[i];                if (bno < 0 || bno >= map->max_buckets) {                    // w[i] is probably CRUSH_ITEM_NONE                    dprintk("  bad w[i] %d\n", w[i]);                    continue;                }                if (firstn) {                    int recurse_tries;                    if (choose_leaf_tries)                        recurse_tries =                            choose_leaf_tries;                    else if (map->chooseleaf_descend_once)                        recurse_tries = 1;                    else                        recurse_tries = choose_tries;                    osize += crush_choose_firstn(                        map,                        map->buckets[bno],                        weight, weight_max,                        x, numrep,                        curstep->arg2,                        o+osize, j,                        result_max-osize,                        choose_tries,                        recurse_tries,                        choose_local_retries,                        choose_local_fallback_retries,                        recurse_to_leaf,                        vary_r,                        stable,                        c+osize,                        0);                } else {                    out_size = ((numrep < (result_max-osize)) ?                            numrep : (result_max-osize));                    crush_choose_indep(                        map,                        map->buckets[bno],                        weight, weight_max,                        x, out_size, numrep,                        curstep->arg2,                        o+osize, j,                        choose_tries,                        choose_leaf_tries ?                           choose_leaf_tries : 1,                        recurse_to_leaf,                        c+osize,                        0);                    osize += out_size;                }            }            if (recurse_to_leaf)                /* copy final _leaf_ values to output set */                memcpy(o, c, osize*sizeof(*o));            /* swap o and w arrays */            tmp = o;            o = w;            w = tmp;            wsize = osize;            break;        case CRUSH_RULE_EMIT:            for (i = 0; i < wsize && result_len < result_max; i++) {                result[result_len] = w[i];                result_len++;            }            wsize = 0;            break;        default:            dprintk(" unknown op %d at step %d\n",                curstep->op, step);            break;        }    }    return result_len;}

crush_choose_firstn - choose numrep distinct items of given type

/** * crush_choose_firstn - choose numrep distinct items of given type * @map: the crush_map * @bucket: the bucket we are choose an item from * @x: crush input value * @numrep: the number of items to choose * @type: the type of item to choose * @out: pointer to output vector * @outpos: our position in that vector * @out_size: size of the out vector * @tries: number of attempts to make * @recurse_tries: number of attempts to have recursive chooseleaf make * @local_retries: localized retries * @local_fallback_retries: localized fallback retries * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) * @stable: stable mode starts rep=0 in the recursive call for all replicas * @vary_r: pass r to recursive calls * @out2: second output vector for leaf items (if @recurse_to_leaf) * @parent_r: r value passed from the parent */static int crush_choose_firstn(const struct crush_map *map,                   struct crush_bucket *bucket,                   const __u32 *weight, int weight_max,                   int x, int numrep, int type,                   int *out, int outpos,                   int out_size,                   unsigned int tries,                   unsigned int recurse_tries,                   unsigned int local_retries,                   unsigned int local_fallback_retries,                   int recurse_to_leaf,                   unsigned int vary_r,                   unsigned int stable,                   int *out2,                   int parent_r){    int rep;    unsigned int ftotal, flocal;    int retry_descent, retry_bucket, skip_rep;    struct crush_bucket *in = bucket;    int r;    int i;    int item = 0;    int itemtype;    int collide, reject;    int count = out_size;    dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d \recurse_tries %d local_retries %d local_fallback_retries %d \parent_r %d stable %d\n",        recurse_to_leaf ? "_LEAF" : "",        bucket->id, x, outpos, numrep,        tries, recurse_tries, local_retries, local_fallback_retries,        parent_r, stable);    for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {        /* keep trying until we get a non-out, non-colliding item */        ftotal = 0;        skip_rep = 0;        do {            retry_descent = 0;            in = bucket;               /* initial bucket */            /* choose through intervening buckets */            flocal = 0;            do {                collide = 0;                retry_bucket = 0;                r = rep + parent_r;                /* r' = r + f_total */                r += ftotal;                /* bucket choose */                if (in->size == 0) {                    reject = 1;                    goto reject;                }                if (local_fallback_retries > 0 &&                    flocal >= (in->size>>1) &&                    flocal > local_fallback_retries)                    item = bucket_perm_choose(in, x, r);                else                    item = crush_bucket_choose(in, x, r);                if (item >= map->max_devices) {                    dprintk("   bad item %d\n", item);                    skip_rep = 1;                    break;                }                /* desired type? */                if (item < 0)                    itemtype = map->buckets[-1-item]->type;                else                    itemtype = 0;                dprintk("  item %d type %d\n", item, itemtype);                /* keep going? */                if (itemtype != type) {                    if (item >= 0 ||                        (-1-item) >= map->max_buckets) {                        dprintk("   bad item type %d\n", type);                        skip_rep = 1;                        break;                    }                    in = map->buckets[-1-item];                    retry_bucket = 1;                    continue;                }                /* collision? */                for (i = 0; i < outpos; i++) {                    if (out[i] == item) {                        collide = 1;                        break;                    }                }                reject = 0;                if (!collide && recurse_to_leaf) {                    if (item < 0) {                        int sub_r;                        if (vary_r)                            sub_r = r >> (vary_r-1);                        else                            sub_r = 0;                        if (crush_choose_firstn(map,                             map->buckets[-1-item],                             weight, weight_max,                             x, stable ? 1 : outpos+1, 0,                             out2, outpos, count,                             recurse_tries, 0,                             local_retries,                             local_fallback_retries,                             0,                             vary_r,                             stable,                             NULL,                             sub_r) <= outpos)                            /* didn't get leaf */                            reject = 1;                    } else {                        /* we already have a leaf! */                        out2[outpos] = item;                    }                }                if (!reject) {                    /* out? */                    if (itemtype == 0)                        reject = is_out(map, weight,                                weight_max,                                item, x);                    else                        reject = 0;                }reject:                if (reject || collide) {                    ftotal++;                    flocal++;                    if (collide && flocal <= local_retries)                        /* retry locally a few times */                        retry_bucket = 1;                    else if (local_fallback_retries > 0 &&                         flocal <= in->size + local_fallback_retries)                        /* exhaustive bucket search */                        retry_bucket = 1;                    else if (ftotal < tries)                        /* then retry descent */                        retry_descent = 1;                    else                        /* else give up */                        skip_rep = 1;                    dprintk("  reject %d  collide %d  "                        "ftotal %u  flocal %u\n",                        reject, collide, ftotal,                        flocal);                }            } while (retry_bucket);        } while (retry_descent);        if (skip_rep) {            dprintk("skip rep\n");            continue;        }        dprintk("CHOOSE got %d\n", item);        out[outpos] = item;        outpos++;        count--;#ifndef __KERNEL__        if (map->choose_tries && ftotal <= map->choose_total_tries)            map->choose_tries[ftotal]++;#endif    }    dprintk("CHOOSE returns %d\n", outpos);    return outpos;}

crush_bucket_choose

static int crush_bucket_choose(struct crush_bucket *in, int x, int r){    dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);    BUG_ON(in->size == 0);    switch (in->alg) {    case CRUSH_BUCKET_UNIFORM:        return bucket_uniform_choose((struct crush_bucket_uniform *)in,                      x, r);    case CRUSH_BUCKET_LIST:        return bucket_list_choose((struct crush_bucket_list *)in,                      x, r);    case CRUSH_BUCKET_TREE:        return bucket_tree_choose((struct crush_bucket_tree *)in,                      x, r);    case CRUSH_BUCKET_STRAW:        return bucket_straw_choose((struct crush_bucket_straw *)in,                       x, r);    case CRUSH_BUCKET_STRAW2:        return bucket_straw2_choose((struct crush_bucket_straw2 *)in,                        x, r);    default:        dprintk("unknown bucket %d alg %d\n", in->id, in->alg);        return in->items[0];    }}

bucket_straw_choose

static int bucket_straw_choose(struct crush_bucket_straw *bucket,                   int x, int r)                   {    __u32 i;    int high = 0;    __u64 high_draw = 0;    __u64 draw;    for (i = 0; i < bucket->h.size; i++) {        draw = crush_hash32_3(bucket->h.hash, x, bucket->h.items[i], r);        draw &= 0xffff;        draw *= bucket->straws[i];        if (i == 0 || draw > high_draw) {            high = i;            high_draw = draw;        }    }    return bucket->h.items[high];}
1 0
原创粉丝点击