Caffe源码:pooling_layer.cpp

来源:互联网 发布:淘宝上卖千岛片真的么 编辑:程序博客网 时间:2024/05/29 04:11

@brief PoolingLayer层反向传播conv2–>pool2


只将梯度传到贡献损失的神经元即max所在的神经元索引
核心代码

 bottom_diff[bottom_index] += top_diff[index];//计算梯度并更新原始bottom_diff

主体代码

#include <algorithm>#include <cfloat>#include <vector>#include "caffe/layers/pooling_layer.hpp"#include "caffe/util/math_functions.hpp"namespace caffe {using std::min;using std::max;template <typename Dtype>void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  PoolingParameter pool_param = this->layer_param_.pooling_param();  if (pool_param.global_pooling()) {    CHECK(!(pool_param.has_kernel_size() ||      pool_param.has_kernel_h() || pool_param.has_kernel_w()))      << "With Global_pooling: true Filter size cannot specified";  } else {    CHECK(!pool_param.has_kernel_size() !=      !(pool_param.has_kernel_h() && pool_param.has_kernel_w()))      << "Filter size is kernel_size OR kernel_h and kernel_w; not both";    CHECK(pool_param.has_kernel_size() ||      (pool_param.has_kernel_h() && pool_param.has_kernel_w()))      << "For non-square filters both kernel_h and kernel_w are required.";  }  CHECK((!pool_param.has_pad() && pool_param.has_pad_h()      && pool_param.has_pad_w())      || (!pool_param.has_pad_h() && !pool_param.has_pad_w()))      << "pad is pad OR pad_h and pad_w are required.";  CHECK((!pool_param.has_stride() && pool_param.has_stride_h()      && pool_param.has_stride_w())      || (!pool_param.has_stride_h() && !pool_param.has_stride_w()))      << "Stride is stride OR stride_h and stride_w are required.";  global_pooling_ = pool_param.global_pooling();  if (global_pooling_) {    kernel_h_ = bottom[0]->height();    kernel_w_ = bottom[0]->width();  } else {    if (pool_param.has_kernel_size()) {      kernel_h_ = kernel_w_ = pool_param.kernel_size();//如果设置kernelsize则默认为kernel为方的    } else {      kernel_h_ = pool_param.kernel_h();      kernel_w_ = pool_param.kernel_w();    }  }  CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";  CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";  if (!pool_param.has_pad_h()) {    pad_h_ = pad_w_ = pool_param.pad();//默认为0  } else {    pad_h_ = pool_param.pad_h();    pad_w_ = pool_param.pad_w();  }  if (!pool_param.has_stride_h()) {    stride_h_ = stride_w_ = pool_param.stride();//默认为方的  } else {    stride_h_ = pool_param.stride_h();    stride_w_ = pool_param.stride_w();  }  if (global_pooling_) {    CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1)      << "With Global_pooling: true; only pad = 0 and stride = 1";  }  if (pad_h_ != 0 || pad_w_ != 0) {    CHECK(this->layer_param_.pooling_param().pool()        == PoolingParameter_PoolMethod_AVE        || this->layer_param_.pooling_param().pool()        == PoolingParameter_PoolMethod_MAX)        << "Padding implemented only for average and max pooling.";    CHECK_LT(pad_h_, kernel_h_);    CHECK_LT(pad_w_, kernel_w_);  }}template <typename Dtype>void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "      << "corresponding to (num, channels, height, width)";  channels_ = bottom[0]->channels();  height_ = bottom[0]->height();  width_ = bottom[0]->width();  if (global_pooling_) {    kernel_h_ = bottom[0]->height();    kernel_w_ = bottom[0]->width();  }  //具体计算pooling后featuremap的2D大小  pooled_height_ = static_cast<int>(ceil(static_cast<float>(      height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;  pooled_width_ = static_cast<int>(ceil(static_cast<float>(      width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;  if (pad_h_ || pad_w_) {    // If we have padding, ensure that the last pooling starts strictly    // inside the image (instead of at the padding); otherwise clip the last.    if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) {      --pooled_height_;    }    if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) {      --pooled_width_;    }    CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_);    CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_);  }  top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,      pooled_width_);  if (top.size() > 1) {    top[1]->ReshapeLike(*top[0]);  }  // If max pooling, we will initialize the vector index part.  if (this->layer_param_.pooling_param().pool() ==      PoolingParameter_PoolMethod_MAX && top.size() == 1) {    max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,        pooled_width_);  }  // If stochastic pooling, we will initialize the random index part.  if (this->layer_param_.pooling_param().pool() ==      PoolingParameter_PoolMethod_STOCHASTIC) {    rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,      pooled_width_);  }}// TODO(Yangqing): Is there a faster way to do pooling in the channel-first// case?template <typename Dtype>void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  const Dtype* bottom_data = bottom[0]->cpu_data();//拿到输入数据的const指针  Dtype* top_data = top[0]->mutable_cpu_data();//拿到输出数据的const指针  const int top_count = top[0]->count();  // We'll output the mask to top[1] if it's of size >1.  const bool use_top_mask = top.size() > 1;//使用mask是干嘛的  int* mask = NULL;  // suppress warnings about uninitalized variables  Dtype* top_mask = NULL;  // Different pooling methods. We explicitly do the switch outside the for  // loop to save time, although this results in more code.  switch (this->layer_param_.pooling_param().pool()) {  case PoolingParameter_PoolMethod_MAX:    // Initialize    if (use_top_mask) {      top_mask = top[1]->mutable_cpu_data();      caffe_set(top_count, Dtype(-1), top_mask);    } else {      mask = max_idx_.mutable_cpu_data();//pooling的kernel窗口中最大值的索引      caffe_set(top_count, -1, mask);    }    caffe_set(top_count, Dtype(-FLT_MAX), top_data);//将所有输出初始化为负无穷大    // The main loop    for (int n = 0; n < bottom[0]->num(); ++n) {//batch_size数      for (int c = 0; c < channels_; ++c) {//通道数        for (int ph = 0; ph < pooled_height_; ++ph) {//对原窗口向下扫(竖向)扫输出窗口的宽次          for (int pw = 0; pw < pooled_width_; ++pw) {//对原窗口向右扫(横向)扫输出窗口的高次            int hstart = ph * stride_h_ - pad_h_;//kernel窗口的行头            int wstart = pw * stride_w_ - pad_w_;//kernel窗口的列头            int hend = min(hstart + kernel_h_, height_);//kernel窗口的行尾 hstart加上kernel_h_            int wend = min(wstart + kernel_w_, width_);//kernel窗口的列尾            hstart = max(hstart, 0);            wstart = max(wstart, 0);//[hstart,hend][wsatart,wend]标记了kernel窗口            const int pool_index = ph * pooled_width_ + pw;//输出窗口的索引值            for (int h = hstart; h < hend; ++h) {//kernel的窗口行 [h,w]表征了kernel窗口的具体点位置              for (int w = wstart; w < wend; ++w) {//kernel的窗口列                const int index = h * width_ + w;//kernel窗口中的点在输入featuremap的位置                if (bottom_data[index] > top_data[pool_index]) {                  top_data[pool_index] = bottom_data[index];//输出窗口赋值                  if (use_top_mask) {                    top_mask[pool_index] = static_cast<Dtype>(index);                  } else {                    mask[pool_index] = index;//在kernel范围内记录最大的输入值索引                  }                }              }            }          }        }        // compute offset 扫完一个通道后        bottom_data += bottom[0]->offset(0, 1);//bottom_data指向一个样本输入featuremap的下一个通道        top_data += top[0]->offset(0, 1);//同理同上        if (use_top_mask) {          top_mask += top[0]->offset(0, 1);        } else {          mask += top[0]->offset(0, 1);//同理同上        }      }    }    break;  case PoolingParameter_PoolMethod_AVE:    for (int i = 0; i < top_count; ++i) {      top_data[i] = 0;    }    // The main loop    for (int n = 0; n < bottom[0]->num(); ++n) {      for (int c = 0; c < channels_; ++c) {        for (int ph = 0; ph < pooled_height_; ++ph) {          for (int pw = 0; pw < pooled_width_; ++pw) {            int hstart = ph * stride_h_ - pad_h_;            int wstart = pw * stride_w_ - pad_w_;            int hend = min(hstart + kernel_h_, height_ + pad_h_);            int wend = min(wstart + kernel_w_, width_ + pad_w_);            int pool_size = (hend - hstart) * (wend - wstart);            hstart = max(hstart, 0);            wstart = max(wstart, 0);            hend = min(hend, height_);            wend = min(wend, width_);            for (int h = hstart; h < hend; ++h) {              for (int w = wstart; w < wend; ++w) {                top_data[ph * pooled_width_ + pw] +=                    bottom_data[h * width_ + w];              }            }            top_data[ph * pooled_width_ + pw] /= pool_size;          }        }        // compute offset        bottom_data += bottom[0]->offset(0, 1);        top_data += top[0]->offset(0, 1);      }    }    break;  case PoolingParameter_PoolMethod_STOCHASTIC:    NOT_IMPLEMENTED;    break;  default:    LOG(FATAL) << "Unknown pooling method.";  }}template <typename Dtype>void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {  if (!propagate_down[0]) {    return;  }  const Dtype* top_diff = top[0]->cpu_diff();//拿到该层的输入梯度指针  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();//拿到要计算的传播到下一层的梯度指针  // Different pooling methods. We explicitly do the switch outside the for  // loop to save time, although this results in more codes.  caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);//清零  // We'll output the mask to top[1] if it's of size >1.  const bool use_top_mask = top.size() > 1;  const int* mask = NULL;  // suppress warnings about uninitialized variables  const Dtype* top_mask = NULL;  switch (this->layer_param_.pooling_param().pool()) {  case PoolingParameter_PoolMethod_MAX:    // The main loop    if (use_top_mask) {      top_mask = top[1]->cpu_data();    } else {      mask = max_idx_.cpu_data();//max索引表征输入featuremap哪一个神经元贡献损失    }    for (int n = 0; n < top[0]->num(); ++n) {//batch_size数      for (int c = 0; c < channels_; ++c) {//通道数        for (int ph = 0; ph < pooled_height_; ++ph) {          for (int pw = 0; pw < pooled_width_; ++pw) {            const int index = ph * pooled_width_ + pw;//定位顶层的输入梯度位置 2D            const int bottom_index =                use_top_mask ? top_mask[index] : mask[index];            bottom_diff[bottom_index] += top_diff[index];//计算梯度并更新原始bottom_diff          }        }        bottom_diff += bottom[0]->offset(0, 1);        top_diff += top[0]->offset(0, 1);        if (use_top_mask) {          top_mask += top[0]->offset(0, 1);        } else {          mask += top[0]->offset(0, 1);        }      }    }    break;  case PoolingParameter_PoolMethod_AVE:    // The main loop    for (int n = 0; n < top[0]->num(); ++n) {      for (int c = 0; c < channels_; ++c) {        for (int ph = 0; ph < pooled_height_; ++ph) {          for (int pw = 0; pw < pooled_width_; ++pw) {            int hstart = ph * stride_h_ - pad_h_;            int wstart = pw * stride_w_ - pad_w_;            int hend = min(hstart + kernel_h_, height_ + pad_h_);            int wend = min(wstart + kernel_w_, width_ + pad_w_);            int pool_size = (hend - hstart) * (wend - wstart);            hstart = max(hstart, 0);            wstart = max(wstart, 0);            hend = min(hend, height_);            wend = min(wend, width_);            for (int h = hstart; h < hend; ++h) {              for (int w = wstart; w < wend; ++w) {                bottom_diff[h * width_ + w] +=                  top_diff[ph * pooled_width_ + pw] / pool_size;              }            }          }        }        // offset        bottom_diff += bottom[0]->offset(0, 1);        top_diff += top[0]->offset(0, 1);      }    }    break;  case PoolingParameter_PoolMethod_STOCHASTIC:    NOT_IMPLEMENTED;    break;  default:    LOG(FATAL) << "Unknown pooling method.";  }}#ifdef CPU_ONLYSTUB_GPU(PoolingLayer);#endifINSTANTIATE_CLASS(PoolingLayer);}  // namespace caffe
原创粉丝点击