max pooling in caffe

来源:互联网 发布:淘宝护肤品优惠信息 编辑:程序博客网 时间:2024/05/20 21:46

我们来看max pooling 在caffe 中怎么实现的吧

reshape

首先 reshap的时候:

  // If max pooling, we will initialize the vector index part.  if (this->layer_param_.pooling_param().pool() ==      PoolingParameter_PoolMethod_MAX && top.size() == 1) {    max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,        pooled_width_);  }

如是max pooling 则需要reshape max_idx 用来记录每次max pooling是 提取哪个地方的位置。
大小为num×channel×pooled_height×pooled_width

forward

再看forward:

case PoolingParameter_PoolMethod_MAX:    // Initialize 如果top有两个分支,就有top_mask 没研究这个。遇到再说,目前是进else分支    if (use_top_mask) {      top_mask = top[1]->mutable_cpu_data();      caffe_set(top_count, Dtype(-1), top_mask);    } else {    //get 到 max_idx_的指针      mask = max_idx_.mutable_cpu_data();      caffe_set(top_count, -1, mask);    }    //top_data 全部变成大浮点数的相反数。方便后面的取max运算    caffe_set(top_count, Dtype(-FLT_MAX), top_data);    // The main loop 找最大值    for (int n = 0; n < bottom[0]->num(); ++n) {      for (int c = 0; c < channels_; ++c) {        for (int ph = 0; ph < pooled_height_; ++ph) {          for (int pw = 0; pw < pooled_width_; ++pw) {            int hstart = ph * stride_h_ - pad_h_;            int wstart = pw * stride_w_ - pad_w_;            int hend = min(hstart + kernel_h_, height_);            int wend = min(wstart + kernel_w_, width_);            hstart = max(hstart, 0);            wstart = max(wstart, 0);            const int pool_index = ph * pooled_width_ + pw;            for (int h = hstart; h < hend; ++h) {              for (int w = wstart; w < wend; ++w) {                const int index = h * width_ + w;                if (bottom_data[index] > top_data[pool_index]) {                  top_data[pool_index] = bottom_data[index];                  if (use_top_mask) {                    top_mask[pool_index] = static_cast<Dtype>(index);                  } else {                    mask[pool_index] = index;                  }                }              }            }          }        }        // compute offset 移动指针位置        bottom_data += bottom[0]->offset(0, 1);        top_data += top[0]->offset(0, 1);        if (use_top_mask) {          top_mask += top[0]->offset(0, 1);        } else {          mask += top[0]->offset(0, 1);        }      }    }    break;

其中offset函数是这样定义的:

  inline int offset(const int n, const int c = 0, const int h = 0,      const int w = 0) const {    CHECK_GE(n, 0);    CHECK_LE(n, num());    CHECK_GE(channels(), 0);    CHECK_LE(c, channels());    CHECK_GE(height(), 0);    CHECK_LE(h, height());    CHECK_GE(width(), 0);    CHECK_LE(w, width());    return ((n * channels() + c) * height() + h) * width() + w;  }

带入的都是0,1 也就是 平移height timeswidth大小

backward

case PoolingParameter_PoolMethod_MAX:    // The main loop    if (use_top_mask) {      top_mask = top[1]->cpu_data();    } else {      mask = max_idx_.cpu_data();    }    for (int n = 0; n < top[0]->num(); ++n) {      for (int c = 0; c < channels_; ++c) {        for (int ph = 0; ph < pooled_height_; ++ph) {          for (int pw = 0; pw < pooled_width_; ++pw) {            const int index = ph * pooled_width_ + pw;            //找到对应位置 把上层的梯度加上去就好了            const int bottom_index =                use_top_mask ? top_mask[index] : mask[index];            bottom_diff[bottom_index] += top_diff[index];          }        }        bottom_diff += bottom[0]->offset(0, 1);        top_diff += top[0]->offset(0, 1);        if (use_top_mask) {          top_mask += top[0]->offset(0, 1);        } else {          mask += top[0]->offset(0, 1);        }      }    }    break;