Caffe源码解读：conv_layer的前向传播与反向传播

来源：互联网发布：天刀如何导出捏脸数据编辑：程序博客网时间：2024/05/19 02:20

正向传播原理请见： http://blog.csdn.net/xg123321123/article/details/53319080

误差反向传播原理请见：https://zhuanlan.zhihu.com/p/22860936

下面直接上conv_layer.cpp代码:

//前向传播template <typename Dtype>void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  //blobs_[0]保存权值, blobs_[1]保存偏置  const Dtype* weight = this->blobs_[0]->cpu_data();  //bottom.size()是bottom中blob的数量，等于top中blob的数量  for (int i = 0; i < bottom.size(); ++i) {    //获取输入，输出数据指针    const Dtype* bottom_data = bottom[i]->cpu_data();    Dtype* top_data = top[i]->mutable_cpu_data();//第n张图片    for (int n = 0; n < this->num_; ++n) {      //卷积操作，采用矩阵乘积实现      this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,          top_data + n * this->top_dim_);      if (this->bias_term_) {        const Dtype* bias = this->blobs_[1]->cpu_data();//加上偏置        this->forward_cpu_bias(top_data + n * this->top_dim_, bias);      }    }  }}//反向传播template <typename Dtype>void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,      const vector<bool>& propagate_down/*是否反传*/, const vector<Blob<Dtype>*>& bottom) {  const Dtype* weight = this->blobs_[0]->cpu_data();  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();  for (int i = 0; i < top.size(); ++i) {//上一层传下来的导数    const Dtype* top_diff = top[i]->cpu_diff();    const Dtype* bottom_data = bottom[i]->cpu_data();//传给下一层的导数    Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();    // Bias gradient, if necessary.// 更新偏置，直接加上残差(每个偏置所对应的图内所有残差之和)    if (this->bias_term_ && this->param_propagate_down_[1]) {      Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff();      for (int n = 0; n < this->num_; ++n) {        this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_);      }    }    if (this->param_propagate_down_[0] || propagate_down[i]) {      for (int n = 0; n < this->num_; ++n) {        // gradient w.r.t. weight. Note that we will accumulate diffs.// 对weight 计算导数（用来更新weight）        // /将下一层残差与weight进行相关计算，得到卷积层的残差        if (this->param_propagate_down_[0]) {          this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_,              top_diff + n * this->top_dim_, weight_diff);        }        // gradient w.r.t. bottom data, if necessary.// 对bottom数据计算导数（传给下一层）// bottom_data与top_diff做相关计算，得到w权值更新量        if (propagate_down[i]) {          this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight,              bottom_diff + n * this->bottom_dim_);        }      }    }  }}

//卷积操作//用矩阵乘法实现的template <typename Dtype>void BaseConvolutionLayer<Dtype>::forward_cpu_gemm(const Dtype* input,    const Dtype* weights, Dtype* output, bool skip_im2col) {  const Dtype* col_buff = input;  if (!is_1x1_) {    if (!skip_im2col) {  // 如果没有1x1卷积，也没有skip_im2col        // 则使用conv_im2col_cpu对使用卷积核滑动过程中的每一个kernel大小的图像块      // 变成一个列向量，其中height=kernel_dim_        // width = 卷积后图像heght*卷积后图像width        conv_im2col_cpu(input, col_buffer_.mutable_cpu_data());    }    col_buff = col_buffer_.cpu_data();  }  //使用caffe的cpu_gemm(调用cblas的矩阵乘法)来进行计算    for (int g = 0; g < group_; ++g) {// 分组分别进行计算      // conv_out_channels_ / group_是每个卷积组的输出的channel      // kernel_dim_ = input channels per-group x kernel height x kernel width      // 计算的是output[output_offset_ * g] =      // weights[weight_offset_ * g] X col_buff[col_offset_ * g]      // weights的形状是 [conv_out_channel x kernel_dim_]      // col_buff的形状是[kernel_dim_ x (卷积后图像高度乘以卷积后图像宽度)]      // 所以output的形状自然就是conv_out_channel X (卷积后图像高度乘以卷积后图像宽度)    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, conv_out_channels_ /        group_, conv_out_spatial_dim_, kernel_dim_,        (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g,        (Dtype)0., output + output_offset_ * g);  }}

0 0