Caffe源码解读：pooling_layer的前向传播与反向传播

来源：互联网发布：java 书编辑：程序博客网时间：2024/05/18 21:42

池化层分为最大池化和平均池化

正向传播的原理就不说了。

误差反向传播原理如下：

　　假设第l(小写的l，不要看成数字’1’了)层为卷积层，第l+1层为pooling层，且pooling层的误差敏感项为： ,卷积层的误差敏感项为： , 则两者的关系表达式为：

　　这里符号●表示的是矩阵的点积操作，即对应元素的乘积。卷积层和unsample()后的pooling层节点是一一对应的，所以下标都是用j表示。后面的符号表示的是第l层第j个节点处激发函数的导数(对节点输入的导数)。

　　其中的函数unsample()为上采样过程，其具体的操作得看是采用的什么pooling方法了。但unsample的大概思想为：pooling层的每个节点是由卷积层中多个节点(一般为一个矩形区域)共同计算得到，所以pooling层每个节点的误差敏感值也是由卷积层中多个节点的误差敏感值共同产生的，只需满足两层见各自的误差敏感值相等，下面以mean-pooling和max-pooling为例来说明。

　　假设卷积层的矩形大小为4×4, pooling区域大小为2×2, 很容易知道pooling后得到的矩形大小也为2*2（本文默认pooling过程是没有重叠的，卷积过程是每次移动一个像素，即是有重叠的，后续不再声明）,如果此时pooling后的矩形误差敏感值如下：

　　则按照mean-pooling，首先得到的卷积层应该是4×4大小，其值分布为(等值复制)：

　　因为得满足反向传播时各层间误差敏感总和不变，所以卷积层对应每个值需要平摊（除以pooling区域大小即可，这里pooling层大小为2×2=4)），最后的卷积层值

分布为：

　　mean-pooling时的unsample操作可以使用matalb中的函数kron()来实现，因为是采用的矩阵Kronecker乘积。C=kron(A, B)表示的是矩阵B分别与矩阵A中每个元素相乘，然后将相乘的结果放在C中对应的位置。比如：

　　如果是max-pooling，则需要记录前向传播过程中pooling区域中最大值的位置，这里假设pooling层值1,3,2,4对应的pooling区域位置分别为右下、右上、左上、左下。则此时对应卷积层误差敏感值分布为：

　　当然了，上面2种结果还需要点乘卷积层激发函数对应位置的导数值了，这里省略掉。

caffe中pooling_layer的实现如下：

// 前向传播template <typename Dtype>void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  const Dtype* bottom_data = bottom[0]->cpu_data();  Dtype* top_data = top[0]->mutable_cpu_data();  const int top_count = top[0]->count();  // We'll output the mask to top[1] if it's of size >1.  const bool use_top_mask = top.size() > 1;  int* mask = NULL;  // suppress warnings about uninitalized variables  Dtype* top_mask = NULL;  // Different pooling methods. We explicitly do the switch outside the for  // loop to save time, although this results in more code.  switch (this->layer_param_.pooling_param().pool()) {  //最大池化  case PoolingParameter_PoolMethod_MAX:    // Initialize    if (use_top_mask) {      top_mask = top[1]->mutable_cpu_data();      caffe_set(top_count, Dtype(-1), top_mask);    } else {      mask = max_idx_.mutable_cpu_data();      caffe_set(top_count, -1, mask);    }// 初始化top_data为float的最小值    caffe_set(top_count, Dtype(-FLT_MAX), top_data);    // The main loop    for (int n = 0; n < bottom[0]->num(); ++n) {      for (int c = 0; c < channels_; ++c) {        for (int ph = 0; ph < pooled_height_; ++ph) {          for (int pw = 0; pw < pooled_width_; ++pw) {            //计算图中被池化的范围坐标            int hstart = ph * stride_h_ - pad_h_;            int wstart = pw * stride_w_ - pad_w_;            int hend = min(hstart + kernel_h_, height_);            int wend = min(wstart + kernel_w_, width_);            hstart = max(hstart, 0);            wstart = max(wstart, 0);            const int pool_index = ph * pooled_width_ + pw;            for (int h = hstart; h < hend; ++h) {              for (int w = wstart; w < wend; ++w) {                const int index = h * width_ + w;//最大值保存到topdata值内                if (bottom_data[index] > top_data[pool_index]) {                  top_data[pool_index] = bottom_data[index];                  if (use_top_mask) {                    top_mask[pool_index] = static_cast<Dtype>(index);                  } else {                    mask[pool_index] = index;                  }                }              }            }          }        }        // compute offset// 移动bottom_data和top_data指针，进行下一个位置的池化        bottom_data += bottom[0]->offset(0, 1);        top_data += top[0]->offset(0, 1);        if (use_top_mask) {          top_mask += top[0]->offset(0, 1);        } else {          mask += top[0]->offset(0, 1);        }      }    }    break;  //平均池化  case PoolingParameter_PoolMethod_AVE:    for (int i = 0; i < top_count; ++i) {      top_data[i] = 0;    }    // The main loop    for (int n = 0; n < bottom[0]->num(); ++n) {      for (int c = 0; c < channels_; ++c) {        for (int ph = 0; ph < pooled_height_; ++ph) {          for (int pw = 0; pw < pooled_width_; ++pw) {//计算图中被池化的范围坐标            int hstart = ph * stride_h_ - pad_h_;            int wstart = pw * stride_w_ - pad_w_;            int hend = min(hstart + kernel_h_, height_ + pad_h_);            int wend = min(wstart + kernel_w_, width_ + pad_w_);            int pool_size = (hend - hstart) * (wend - wstart);            hstart = max(hstart, 0);            wstart = max(wstart, 0);            hend = min(hend, height_);            wend = min(wend, width_);//计算平均值            for (int h = hstart; h < hend; ++h) {              for (int w = wstart; w < wend; ++w) {                top_data[ph * pooled_width_ + pw] +=                    bottom_data[h * width_ + w];              }            }            top_data[ph * pooled_width_ + pw] /= pool_size;          }        }        // compute offset// 移动bottom_data和top_data指针，进行下一个位置的池化        bottom_data += bottom[0]->offset(0, 1);        top_data += top[0]->offset(0, 1);      }    }    break;  case PoolingParameter_PoolMethod_STOCHASTIC:    NOT_IMPLEMENTED;    break;  default:    LOG(FATAL) << "Unknown pooling method.";  }}//反向传播template <typename Dtype>void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {  if (!propagate_down[0]) {    return;  }  const Dtype* top_diff = top[0]->cpu_diff();  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();  // Different pooling methods. We explicitly do the switch outside the for  // loop to save time, although this results in more codes.  // 初始化bottom_diff为0  caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);  // We'll output the mask to top[1] if it's of size >1.  const bool use_top_mask = top.size() > 1;  const int* mask = NULL;  // suppress warnings about uninitialized variables  const Dtype* top_mask = NULL;  switch (this->layer_param_.pooling_param().pool()) {  //最大池化的情况  case PoolingParameter_PoolMethod_MAX:    // The main loop    if (use_top_mask) {      top_mask = top[1]->cpu_data();    } else {      mask = max_idx_.cpu_data();    }    for (int n = 0; n < top[0]->num(); ++n) {      for (int c = 0; c < channels_; ++c) {        for (int ph = 0; ph < pooled_height_; ++ph) {          for (int pw = 0; pw < pooled_width_; ++pw) {            const int index = ph * pooled_width_ + pw;//bottom_index是原图中最大值的位置，只更新该位置的残差            const int bottom_index =                use_top_mask ? top_mask[index] : mask[index];            bottom_diff[bottom_index] += top_diff[index];          }        }// 移动bottom_diff和top_diff指针，进行下一个位置的反传        bottom_diff += bottom[0]->offset(0, 1);        top_diff += top[0]->offset(0, 1);        if (use_top_mask) {          top_mask += top[0]->offset(0, 1);        } else {          mask += top[0]->offset(0, 1);        }      }    }    break;  //平均池化的情况  case PoolingParameter_PoolMethod_AVE:    // The main loop    for (int n = 0; n < top[0]->num(); ++n) {      for (int c = 0; c < channels_; ++c) {        for (int ph = 0; ph < pooled_height_; ++ph) {          for (int pw = 0; pw < pooled_width_; ++pw) {            int hstart = ph * stride_h_ - pad_h_;            int wstart = pw * stride_w_ - pad_w_;            int hend = min(hstart + kernel_h_, height_ + pad_h_);            int wend = min(wstart + kernel_w_, width_ + pad_w_);            int pool_size = (hend - hstart) * (wend - wstart);            hstart = max(hstart, 0);            wstart = max(wstart, 0);            hend = min(hend, height_);            wend = min(wend, width_);//原图中的某位置的残差等于对应池化层位置残差除pool_size//原图中pool_size个位置对应池化层中的一个位置            for (int h = hstart; h < hend; ++h) {              for (int w = wstart; w < wend; ++w) {                bottom_diff[h * width_ + w] +=                  top_diff[ph * pooled_width_ + pw] / pool_size;              }            }          }        }        // offset// 移动bottom_diff和top_diff指针，进行下一个位置的反传        bottom_diff += bottom[0]->offset(0, 1);        top_diff += top[0]->offset(0, 1);      }    }    break;  case PoolingParameter_PoolMethod_STOCHASTIC:    NOT_IMPLEMENTED;    break;  default:    LOG(FATAL) << "Unknown pooling method.";  }}

0 0