caffe源码：base_conv_layer

来源：互联网发布：海淘用哪个软件编辑：程序博客网时间：2024/04/26 03:24

caffe.proto

message ConvolutionParameter {  optional uint32 num_output = 1; // The number of outputs for the layer  optional bool bias_term = 2 [default = true]; // whether to have bias terms  // Pad, kernel size, and stride are all given as a single value for equal  // dimensions in all spatial dimensions, or once per spatial dimension.  repeated uint32 pad = 3; // The padding size; defaults to 0  repeated uint32 kernel_size = 4; // The kernel size  repeated uint32 stride = 6; // The stride; defaults to 1  // Factor used to dilate the kernel, (implicitly) zero-filling the resulting  // holes. (Kernel dilation is sometimes referred to by its use in the  // algorithme à trous from Holschneider et al. 1987.)  repeated uint32 dilation = 18; // The dilation; defaults to 1  // For 2D convolution only, the *_h and *_w versions may also be used to  // specify both spatial dimensions.  optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)  optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)  optional uint32 kernel_h = 11; // The kernel height (2D only)  optional uint32 kernel_w = 12; // The kernel width (2D only)  optional uint32 stride_h = 13; // The stride height (2D only)  optional uint32 stride_w = 14; // The stride width (2D only)  optional uint32 group = 5 [default = 1]; // The group size for group conv  optional FillerParameter weight_filler = 7; // The filler for the weight  optional FillerParameter bias_filler = 8; // The filler for the bias  enum Engine {    DEFAULT = 0;    CAFFE = 1;    CUDNN = 2;  }  optional Engine engine = 15 [default = DEFAULT];  // The axis to interpret as "channels" when performing convolution.  // Preceding dimensions are treated as independent inputs;  // succeeding dimensions are treated as "spatial".  // With (N, C, H, W) inputs, and axis == 1 (the default), we perform  // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for  // groups g>1) filters across the spatial axes (H, W) of the input.  // With (N, C, D, H, W) inputs, and axis == 1, we perform  // N independent 3D convolutions, sliding (C/g)-channels  // filters across the spatial axes (D, H, W) of the input.  optional int32 axis = 16 [default = 1];  // Whether to force use of the general ND convolution, even if a specific  // implementation for blobs of the appropriate number of spatial dimensions  // is available. (Currently, there is only a 2D-specific convolution  // implementation; for input blobs with num_axes != 2, this option is  // ignored and the ND implementation will be used.)  optional bool force_nd_im2col = 17 [default = false];}

base_conv_layer.hpp

#ifndef CAFFE_BASE_CONVOLUTION_LAYER_HPP_#define CAFFE_BASE_CONVOLUTION_LAYER_HPP_#include <vector>#include "caffe/blob.hpp"#include "caffe/layer.hpp"#include "caffe/proto/caffe.pb.h"#include "caffe/util/im2col.hpp"namespace caffe {/** * @brief Abstract base class that factors out the BLAS code common to *        ConvolutionLayer and DeconvolutionLayer. */template <typename Dtype>class BaseConvolutionLayer : public Layer<Dtype>{ public:  explicit BaseConvolutionLayer(const LayerParameter& param)      : Layer<Dtype>(param)    {    }  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  virtual inline int MinBottomBlobs() const { return 1; }  virtual inline int MinTopBlobs() const { return 1; }  virtual inline bool EqualNumBottomTopBlobs() const { return true; } protected:  // Helper functions that abstract away the column buffer and gemm arguments.  // The last argument in forward_cpu_gemm is so that we can skip the im2col if  // we just called weight_cpu_gemm with the same input.  void forward_cpu_gemm(const Dtype* input, const Dtype* weights,      Dtype* output, bool skip_im2col = false);  void forward_cpu_bias(Dtype* output, const Dtype* bias);  void backward_cpu_gemm(const Dtype* input, const Dtype* weights,      Dtype* output);  void weight_cpu_gemm(const Dtype* input, const Dtype* output, Dtype*      weights);  void backward_cpu_bias(Dtype* bias, const Dtype* input);#ifndef CPU_ONLY  void forward_gpu_gemm(const Dtype* col_input, const Dtype* weights,      Dtype* output, bool skip_im2col = false);  void forward_gpu_bias(Dtype* output, const Dtype* bias);  void backward_gpu_gemm(const Dtype* input, const Dtype* weights,      Dtype* col_output);  void weight_gpu_gemm(const Dtype* col_input, const Dtype* output, Dtype*      weights);  void backward_gpu_bias(Dtype* bias, const Dtype* input);#endif  /// @brief The spatial dimensions of the input.  /// given indice i , return the dimensions :  ///  i=0: return height  ///  i=1: return width  inline int input_shape(int i)  {    return (*bottom_shape_)[channel_axis_ + i];  }  // reverse_dimensions should return true iff we are implementing deconv, so  // that conv helpers know which dimensions are which.  virtual bool reverse_dimensions() = 0;  // Compute height_out_ and width_out_ from other parameters.  virtual void compute_output_shape() = 0;  /// @brief The spatial dimensions of a filter kernel.  ///  kernerl_shape_ [kernel_h, kernel_w]  Blob<int> kernel_shape_;  /// @brief The spatial dimensions of the stride.  ///  stride_ [stride_h, stride_w]  Blob<int> stride_;  /// @brief The spatial dimensions of the padding.  /// pad_ [pad_h, pad_w]  Blob<int> pad_;  /// @brief The spatial dimensions of the dilation.  /// ????  Blob<int> dilation_;  /// @brief The spatial dimensions of the convolution input.  /// conv_input_shape [channels, h, w]  Blob<int> conv_input_shape_;  /// @brief The spatial dimensions of the col_buffer.  /// col_buffer_shape_ [kernel_dim_, conv_out_spatial_dim_]  /// where:  ///    kernel_dim_ = input_image_dim * kernel_h * kernel_w  ///    conv_out_spatial_dim_ = conv_out_image_h * conv_out_image_w  vector<int> col_buffer_shape_;  /// @brief The spatial dimensions of the output.  vector<int> output_shape_;  // the shape of input!  const vector<int>* bottom_shape_;  // number of the spatial axes  int num_spatial_axes_;  // bottom_dim_ = input_channel * input_h * input_w  int bottom_dim_;  // top_dim_ = output_channel * output_h * output_w  int top_dim_;  // the channel indice number of the input image.  int channel_axis_;  // bachsize  int num_;  // the number of channels  int channels_;  // conv group ???  int group_;  // the spatial dim of out = out_conv_h * out_conv_w  int out_spatial_dim_;  // used only in conv group???  int weight_offset_;  //number of the conv out channel  int num_output_;  // if use the bias  bool bias_term_;  // if is conv 1*1  bool is_1x1_;  // if force to use n-dim conv  bool force_nd_im2col_; private:  // wrap im2col/col2im so we don't have to remember the (long) argument lists  inline void conv_im2col_cpu(const Dtype* data, Dtype* col_buff)  {    if (!force_nd_im2col_ && num_spatial_axes_ == 2)    {      im2col_cpu(data, conv_in_channels_,          conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],          kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],          pad_.cpu_data()[0], pad_.cpu_data()[1],          stride_.cpu_data()[0], stride_.cpu_data()[1],          dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);    }    else    {      im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(),          col_buffer_shape_.data(), kernel_shape_.cpu_data(),          pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), col_buff);    }  }  inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data)  {    if (!force_nd_im2col_ && num_spatial_axes_ == 2)    {      col2im_cpu(col_buff, conv_in_channels_,          conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],          kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],          pad_.cpu_data()[0], pad_.cpu_data()[1],          stride_.cpu_data()[0], stride_.cpu_data()[1],          dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);    }    else    {      col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(),          col_buffer_shape_.data(), kernel_shape_.cpu_data(),          pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), data);    }  }#ifndef CPU_ONLY  inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) {    if (!force_nd_im2col_ && num_spatial_axes_ == 2) {      im2col_gpu(data, conv_in_channels_,          conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],          kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],          pad_.cpu_data()[0], pad_.cpu_data()[1],          stride_.cpu_data()[0], stride_.cpu_data()[1],          dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);    } else {      im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_,          conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),          kernel_shape_.gpu_data(), pad_.gpu_data(),          stride_.gpu_data(), dilation_.gpu_data(), col_buff);    }  }  inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) {    if (!force_nd_im2col_ && num_spatial_axes_ == 2) {      col2im_gpu(col_buff, conv_in_channels_,          conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],          kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],          pad_.cpu_data()[0], pad_.cpu_data()[1],          stride_.cpu_data()[0], stride_.cpu_data()[1],          dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);    } else {      col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_,          conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),          kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),          dilation_.gpu_data(), data);    }  }#endif  int num_kernels_im2col_;  int num_kernels_col2im_;  // conv output channels  int conv_out_channels_;  // conv input channels  int conv_in_channels_;  //  conv_out_spatial_dim_ = conv_out_image_h * conv_out_image_w  int conv_out_spatial_dim_;  //  kernel_dim_ = input_image_dim * kernel_h * kernel_w  int kernel_dim_;  //  only used when group used??  int col_offset_;  int output_offset_;  // im2col buffer  Blob<Dtype> col_buffer_;  // change bias to matrix????  Blob<Dtype> bias_multiplier_;};}  // namespace caffe#endif  // CAFFE_BASE_CONVOLUTION_LAYER_HPP_

base_conv_layer.cpp

#include <algorithm>#include <vector>#include "caffe/filler.hpp"#include "caffe/layers/base_conv_layer.hpp"#include "caffe/util/im2col.hpp"#include "caffe/util/math_functions.hpp"namespace caffe {template <typename Dtype>void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top){  // Configure the kernel size, padding, stride, and inputs.  ConvolutionParameter conv_param = this->layer_param_.convolution_param();  force_nd_im2col_ = conv_param.force_nd_im2col();  channel_axis_ = bottom[0]->CanonicalAxisIndex(conv_param.axis());  const int first_spatial_axis = channel_axis_ + 1;  const int num_axes = bottom[0]->num_axes();  num_spatial_axes_ = num_axes - first_spatial_axis;  CHECK_GE(num_spatial_axes_, 0);  vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1);  vector<int> spatial_dim_blob_shape(1, std::max(num_spatial_axes_, 1));  // Setup filter kernel dimensions (kernel_shape_).  kernel_shape_.Reshape(spatial_dim_blob_shape);  int* kernel_shape_data = kernel_shape_.mutable_cpu_data();  if (conv_param.has_kernel_h() || conv_param.has_kernel_w())  {    CHECK_EQ(num_spatial_axes_, 2)        << "kernel_h & kernel_w can only be used for 2D convolution.";    CHECK_EQ(0, conv_param.kernel_size_size())        << "Either kernel_size or kernel_h/w should be specified; not both.";    kernel_shape_data[0] = conv_param.kernel_h();    kernel_shape_data[1] = conv_param.kernel_w();  }  else  {    const int num_kernel_dims = conv_param.kernel_size_size();    CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_)        << "kernel_size must be specified once, or once per spatial dimension "        << "(kernel_size specified " << num_kernel_dims << " times; "        << num_spatial_axes_ << " spatial dims).";      for (int i = 0; i < num_spatial_axes_; ++i)      {        kernel_shape_data[i] =            conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i);      }  }  for (int i = 0; i < num_spatial_axes_; ++i)  {    CHECK_GT(kernel_shape_data[i], 0) << "Filter dimensions must be nonzero.";  }  // Setup stride dimensions (stride_).  stride_.Reshape(spatial_dim_blob_shape);  int* stride_data = stride_.mutable_cpu_data();  if (conv_param.has_stride_h() || conv_param.has_stride_w())  {    CHECK_EQ(num_spatial_axes_, 2)        << "stride_h & stride_w can only be used for 2D convolution.";    CHECK_EQ(0, conv_param.stride_size())        << "Either stride or stride_h/w should be specified; not both.";    stride_data[0] = conv_param.stride_h();    stride_data[1] = conv_param.stride_w();  }  else  {    const int num_stride_dims = conv_param.stride_size();    CHECK(num_stride_dims == 0 || num_stride_dims == 1 ||          num_stride_dims == num_spatial_axes_)        << "stride must be specified once, or once per spatial dimension "        << "(stride specified " << num_stride_dims << " times; "        << num_spatial_axes_ << " spatial dims).";    const int kDefaultStride = 1;    for (int i = 0; i < num_spatial_axes_; ++i)    {      stride_data[i] = (num_stride_dims == 0) ? kDefaultStride :          conv_param.stride((num_stride_dims == 1) ? 0 : i);      CHECK_GT(stride_data[i], 0) << "Stride dimensions must be nonzero.";    }  }  // Setup pad dimensions (pad_).  pad_.Reshape(spatial_dim_blob_shape);  int* pad_data = pad_.mutable_cpu_data();  if (conv_param.has_pad_h() || conv_param.has_pad_w())  {    CHECK_EQ(num_spatial_axes_, 2)        << "pad_h & pad_w can only be used for 2D convolution.";    CHECK_EQ(0, conv_param.pad_size())        << "Either pad or pad_h/w should be specified; not both.";    pad_data[0] = conv_param.pad_h();    pad_data[1] = conv_param.pad_w();  }  else  {    const int num_pad_dims = conv_param.pad_size();    CHECK(num_pad_dims == 0 || num_pad_dims == 1 ||          num_pad_dims == num_spatial_axes_)        << "pad must be specified once, or once per spatial dimension "        << "(pad specified " << num_pad_dims << " times; "        << num_spatial_axes_ << " spatial dims).";    const int kDefaultPad = 0;    for (int i = 0; i < num_spatial_axes_; ++i)    {      pad_data[i] = (num_pad_dims == 0) ? kDefaultPad :          conv_param.pad((num_pad_dims == 1) ? 0 : i);    }  }  // Setup dilation dimensions (dilation_).  dilation_.Reshape(spatial_dim_blob_shape);  int* dilation_data = dilation_.mutable_cpu_data();  const int num_dilation_dims = conv_param.dilation_size();  CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||        num_dilation_dims == num_spatial_axes_)      << "dilation must be specified once, or once per spatial dimension "      << "(dilation specified " << num_dilation_dims << " times; "      << num_spatial_axes_ << " spatial dims).";  const int kDefaultDilation = 1;  for (int i = 0; i < num_spatial_axes_; ++i)  {    dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :                       conv_param.dilation((num_dilation_dims == 1) ? 0 : i);  }  // Special case: im2col is the identity for 1x1 convolution with stride 1  // and no padding, so flag for skipping the buffer and transformation.  is_1x1_ = true;  for (int i = 0; i < num_spatial_axes_; ++i)  {    is_1x1_ &=        kernel_shape_data[i] == 1 && stride_data[i] == 1 && pad_data[i] == 0;    if (!is_1x1_) { break; }  }  // Configure output channels and groups.  channels_ = bottom[0]->shape(channel_axis_);  num_output_ = this->layer_param_.convolution_param().num_output();  CHECK_GT(num_output_, 0);  group_ = this->layer_param_.convolution_param().group();  CHECK_EQ(channels_ % group_, 0);  CHECK_EQ(num_output_ % group_, 0)      << "Number of output should be multiples of group.";  if (reverse_dimensions())  {    conv_out_channels_ = channels_;    conv_in_channels_ = num_output_;  }  else  {    conv_out_channels_ = num_output_;    conv_in_channels_ = channels_;  }  // Handle the parameters: weights and biases.  // - blobs_[0] holds the filter weights  // - blobs_[1] holds the biases (optional)  vector<int> weight_shape(2);  weight_shape[0] = conv_out_channels_;  weight_shape[1] = conv_in_channels_ / group_;  for (int i = 0; i < num_spatial_axes_; ++i)  {    weight_shape.push_back(kernel_shape_data[i]);  }  bias_term_ = this->layer_param_.convolution_param().bias_term();  vector<int> bias_shape(bias_term_, num_output_);  if (this->blobs_.size() > 0)  {    CHECK_EQ(1 + bias_term_, this->blobs_.size())        << "Incorrect number of weight blobs.";    if (weight_shape != this->blobs_[0]->shape())    {      Blob<Dtype> weight_shaped_blob(weight_shape);      LOG(FATAL) << "Incorrect weight shape: expected shape "          << weight_shaped_blob.shape_string() << "; instead, shape was "          << this->blobs_[0]->shape_string();    }    if (bias_term_ && bias_shape != this->blobs_[1]->shape()) {      Blob<Dtype> bias_shaped_blob(bias_shape);      LOG(FATAL) << "Incorrect bias shape: expected shape "          << bias_shaped_blob.shape_string() << "; instead, shape was "          << this->blobs_[1]->shape_string();    }    LOG(INFO) << "Skipping parameter initialization";  }  else  {    if (bias_term_)    {      this->blobs_.resize(2);    }    else    {      this->blobs_.resize(1);    }    // Initialize and fill the weights:    // output channels x input channels per-group x kernel height x kernel width    this->blobs_[0].reset(new Blob<Dtype>(weight_shape));    shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(        this->layer_param_.convolution_param().weight_filler()));    weight_filler->Fill(this->blobs_[0].get());    // If necessary, initialize and fill the biases.    if (bias_term_)    {      this->blobs_[1].reset(new Blob<Dtype>(bias_shape));      shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(          this->layer_param_.convolution_param().bias_filler()));      bias_filler->Fill(this->blobs_[1].get());    }  }  kernel_dim_ = this->blobs_[0]->count(1);  weight_offset_ = conv_out_channels_ * kernel_dim_ / group_;  // Propagate gradients to the parameters (as directed by backward pass).  this->param_propagate_down_.resize(this->blobs_.size(), true);}template <typename Dtype>void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top){  const int first_spatial_axis = channel_axis_ + 1;  CHECK_EQ(bottom[0]->num_axes(), first_spatial_axis + num_spatial_axes_)      << "bottom num_axes may not change.";  num_ = bottom[0]->count(0, channel_axis_);  CHECK_EQ(bottom[0]->shape(channel_axis_), channels_)      << "Input size incompatible with convolution kernel.";  // TODO: generalize to handle inputs of different shapes.  for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id)  {    CHECK(bottom[0]->shape() == bottom[bottom_id]->shape())        << "All inputs must have the same shape.";  }  // Shape the tops.  bottom_shape_ = &bottom[0]->shape();  compute_output_shape();  vector<int> top_shape(bottom[0]->shape().begin(),      bottom[0]->shape().begin() + channel_axis_);  top_shape.push_back(num_output_);  for (int i = 0; i < num_spatial_axes_; ++i)  {    top_shape.push_back(output_shape_[i]);  }  for (int top_id = 0; top_id < top.size(); ++top_id)  {    top[top_id]->Reshape(top_shape);  }  if (reverse_dimensions())  {    conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis);  }  else  {    conv_out_spatial_dim_ = top[0]->count(first_spatial_axis);  }  col_offset_ = kernel_dim_ * conv_out_spatial_dim_;  output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_;  // Setup input dimensions (conv_input_shape_).  vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1);  conv_input_shape_.Reshape(bottom_dim_blob_shape);  int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data();  for (int i = 0; i < num_spatial_axes_ + 1; ++i)  {    if (reverse_dimensions())    {      conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i);    }    else    {      conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i);    }  }  // The im2col result buffer will only hold one image at a time to avoid  // overly large memory usage. In the special case of 1x1 convolution  // it goes lazily unused to save memory.  col_buffer_shape_.clear();  col_buffer_shape_.push_back(kernel_dim_ * group_);  for (int i = 0; i < num_spatial_axes_; ++i)  {    if (reverse_dimensions())    {      col_buffer_shape_.push_back(input_shape(i + 1));    }    else    {      col_buffer_shape_.push_back(output_shape_[i]);    }  }  col_buffer_.Reshape(col_buffer_shape_);  bottom_dim_ = bottom[0]->count(channel_axis_);  top_dim_ = top[0]->count(channel_axis_);  num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_;  num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_;  // Set up the all ones "bias multiplier" for adding biases by BLAS  out_spatial_dim_ = top[0]->count(first_spatial_axis);  if (bias_term_)  {    vector<int> bias_multiplier_shape(1, out_spatial_dim_);    bias_multiplier_.Reshape(bias_multiplier_shape);    caffe_set(bias_multiplier_.count(), Dtype(1),        bias_multiplier_.mutable_cpu_data());  }}template <typename Dtype>void BaseConvolutionLayer<Dtype>::forward_cpu_gemm(const Dtype* input,    const Dtype* weights, Dtype* output, bool skip_im2col){  const Dtype* col_buff = input;  if (!is_1x1_)  {    if (!skip_im2col)    {      conv_im2col_cpu(input, col_buffer_.mutable_cpu_data());    }    col_buff = col_buffer_.cpu_data();  }  for (int g = 0; g < group_; ++g)  {    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, conv_out_channels_ /        group_, conv_out_spatial_dim_, kernel_dim_,        (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g,        (Dtype)0., output + output_offset_ * g);  }}template <typename Dtype>void BaseConvolutionLayer<Dtype>::forward_cpu_bias(Dtype* output,    const Dtype* bias){  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,      out_spatial_dim_, 1, (Dtype)1., bias, bias_multiplier_.cpu_data(),      (Dtype)1., output);}template <typename Dtype>void BaseConvolutionLayer<Dtype>::backward_cpu_gemm(const Dtype* output,    const Dtype* weights, Dtype* input){  Dtype* col_buff = col_buffer_.mutable_cpu_data();  if (is_1x1_)  {    col_buff = input;  }  for (int g = 0; g < group_; ++g)  {    caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, kernel_dim_,        conv_out_spatial_dim_, conv_out_channels_ / group_,        (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g,        (Dtype)0., col_buff + col_offset_ * g);  }  if (!is_1x1_)  {    conv_col2im_cpu(col_buff, input);  }}template <typename Dtype>void BaseConvolutionLayer<Dtype>::weight_cpu_gemm(const Dtype* input,    const Dtype* output, Dtype* weights){  const Dtype* col_buff = input;  if (!is_1x1_)  {    conv_im2col_cpu(input, col_buffer_.mutable_cpu_data());    col_buff = col_buffer_.cpu_data();  }  for (int g = 0; g < group_; ++g)  {    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, conv_out_channels_ / group_,        kernel_dim_, conv_out_spatial_dim_,        (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g,        (Dtype)1., weights + weight_offset_ * g);  }}template <typename Dtype>void BaseConvolutionLayer<Dtype>::backward_cpu_bias(Dtype* bias,    const Dtype* input){  caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, out_spatial_dim_, 1.,      input, bias_multiplier_.cpu_data(), 1., bias);}#ifndef CPU_ONLYtemplate <typename Dtype>void BaseConvolutionLayer<Dtype>::forward_gpu_gemm(const Dtype* input,    const Dtype* weights, Dtype* output, bool skip_im2col) {  const Dtype* col_buff = input;  if (!is_1x1_) {    if (!skip_im2col) {      conv_im2col_gpu(input, col_buffer_.mutable_gpu_data());    }    col_buff = col_buffer_.gpu_data();  }  for (int g = 0; g < group_; ++g) {    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, conv_out_channels_ /        group_, conv_out_spatial_dim_, kernel_dim_,        (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g,        (Dtype)0., output + output_offset_ * g);  }}template <typename Dtype>void BaseConvolutionLayer<Dtype>::forward_gpu_bias(Dtype* output,    const Dtype* bias) {  caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,      out_spatial_dim_, 1, (Dtype)1., bias, bias_multiplier_.gpu_data(),      (Dtype)1., output);}template <typename Dtype>void BaseConvolutionLayer<Dtype>::backward_gpu_gemm(const Dtype* output,    const Dtype* weights, Dtype* input) {  Dtype* col_buff = col_buffer_.mutable_gpu_data();  if (is_1x1_) {    col_buff = input;  }  for (int g = 0; g < group_; ++g) {    caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, kernel_dim_,        conv_out_spatial_dim_, conv_out_channels_ / group_,        (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g,        (Dtype)0., col_buff + col_offset_ * g);  }  if (!is_1x1_) {    conv_col2im_gpu(col_buff, input);  }}template <typename Dtype>void BaseConvolutionLayer<Dtype>::weight_gpu_gemm(const Dtype* input,    const Dtype* output, Dtype* weights) {  const Dtype* col_buff = input;  if (!is_1x1_) {    conv_im2col_gpu(input, col_buffer_.mutable_gpu_data());    col_buff = col_buffer_.gpu_data();  }  for (int g = 0; g < group_; ++g) {    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, conv_out_channels_ / group_,        kernel_dim_, conv_out_spatial_dim_,        (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g,        (Dtype)1., weights + weight_offset_ * g);  }}template <typename Dtype>void BaseConvolutionLayer<Dtype>::backward_gpu_bias(Dtype* bias,    const Dtype* input) {  caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, out_spatial_dim_, 1.,      input, bias_multiplier_.gpu_data(), 1., bias);}#endif  // !CPU_ONLYINSTANTIATE_CLASS(BaseConvolutionLayer);}  // namespace caffe

详细介绍：

caffe代码阅读10：Caffe中卷积的实现细节（涉及到BaseConvolutionLayer、ConvolutionLayer、im2col等）-2016.4.3

0 0