caffe的concat层

来源：互联网发布：淘宝天猫培训编辑：程序博客网时间：2024/06/05 11:24

Concat layer

在Deep Neural Network中，最主要的两种提高模型性能的优化方向就是使模型wider or deeper。
在使模型变宽时，常需要把多个分支合并起来作为后续层的输入。它就是今天要介绍的concat layer。

按照惯例，我们先来看下concat layer的参数。

message ConcatParameter {  // The axis along which to concatenate -- may be negative to index from the  // end (e.g., -1 for the last axis).  Other axes must have the  // same dimension for all the bottom blobs.  // By default, ConcatLayer concatenates blobs along the "channels" axis (1).  optional int32 axis = 2 [default = 1]; //caffe中，blobs一般表示成NxCxHxW. 也就是说，axis默认在channel维度来进行concat.  // DEPRECATED: alias for "axis" -- does not support negative indexing. 已弃用，axis的别名，不支持负数索引  optional uint32 concat_dim = 1 [default = 1];

concat作为链接多个输入的工具层，其参数很少，只有一个指定是根据N维度还是根据C维度来进行链接的参数。该层要求至少有两个输入，即bottom的size >= 2,如下所示：

x 1 : = N \times C \times H \times W x 2 : = N \times C \times H \times W ⋮ x k : = N \times C \times H \times W o u t p u t : = k N \times C \times H \times W o r o u t p u t : = N \times k C \times H \times W

至此，我们大致了解了concat层怎么用呢。接下来，我们介绍介绍它的实现。

向前传播时，实现比较简单。

template <typename Dtype>void ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  if (bottom.size() == 1) { return; }     \\如果只有一个输入，不执行操作  Dtype* top_data = top[0]->mutable_cpu_data();  int offset_concat_axis = 0;  const int top_concat_axis = top[0]->shape(concat_axis_);  for (int i = 0; i < bottom.size(); ++i) {    const Dtype* bottom_data = bottom[i]->cpu_data();  \\第i个输入的读指针    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);    for (int n = 0; n < num_concats_; ++n) {      caffe_copy(bottom_concat_axis * concat_input_size_,          bottom_data + n * bottom_concat_axis * concat_input_size_,          top_data + (n * top_concat_axis + offset_concat_axis)              * concat_input_size_);   \\把所有输入根据指定的axis连接起来    }    offset_concat_axis += bottom_concat_axis;  }

单看主要函数显然有些不清不楚，接下来我们看看layersetup和reshape就能明白它具体是怎么做的了。

template <typename Dtype>void ConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  const ConcatParameter& concat_param = this->layer_param_.concat_param();  \\获取concat参数，即axis或者concat_dim，不能同时指定。  CHECK(!(concat_param.has_axis() && concat_param.has_concat_dim()))      << "Either axis or concat_dim should be specified; not both.";}template <typename Dtype>void ConcatLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  const int num_axes = bottom[0]->num_axes();                               \\获取输入维度数  const ConcatParameter& concat_param = this->layer_param_.concat_param();  if (concat_param.has_concat_dim()) {                                      \\如果指定concat_dim,判断是否非负    concat_axis_ = static_cast<int>(concat_param.concat_dim());    // Don't allow negative indexing for concat_dim, a uint32 -- almost    // certainly unintended.    CHECK_GE(concat_axis_, 0) << "casting concat_dim from uint32 to int32 "        << "produced negative result; concat_dim must satisfy "        << "0 <= concat_dim < " << kMaxBlobAxes;    CHECK_LT(concat_axis_, num_axes) << "concat_dim out of range.";         \\concat_dim不能超过输入的维度数  } else {    concat_axis_ = bottom[0]->CanonicalAxisIndex(concat_param.axis());      \\指定了axis，转换成非负索引得到concat_axis  }  // Initialize with the first blob.  vector<int> top_shape = bottom[0]->shape();                               \\初始化输出，shape与输入一致  num_concats_ = bottom[0]->count(0, concat_axis_);                         \\需要concat的个数，  concat_input_size_ = bottom[0]->count(concat_axis_ + 1);                  \\每个concat的数据量大小  int bottom_count_sum = bottom[0]->count();                                \\输入总的特征值个数，初始时只有第一个输入的个数  for (int i = 1; i < bottom.size(); ++i) {                                 \\    CHECK_EQ(num_axes, bottom[i]->num_axes())                               \\判断每个输入维度是否一致        << "All inputs must have the same #axes.";    for (int j = 0; j < num_axes; ++j) {                                    \\除了进行concat的那个维度外，其他维度的大小是否保持一致      if (j == concat_axis_) { continue; }      CHECK_EQ(top_shape[j], bottom[i]->shape(j))          << "All inputs must have the same shape, except at concat_axis.";    }    bottom_count_sum += bottom[i]->count();                                 \\累加第i个输入的个数    top_shape[concat_axis_] += bottom[i]->shape(concat_axis_);              \\累加输出的指定axis的值  }  top[0]->Reshape(top_shape);                                               \\reshape输出blob  CHECK_EQ(bottom_count_sum, top[0]->count());                              \\检查bottom_count_sum和top_count的数据量是否一致  if (bottom.size() == 1) {    top[0]->ShareData(*bottom[0]);                                          \\只有一个输入，直接复制成输出    top[0]->ShareDiff(*bottom[0]);                                          \\梯度shape也和输入一致  }

源码解析这里基本上就明白concat层的原理了，最后我们来看下它的后向传播。其原理十分简单，把输出求得的梯度直接复制给对应的输入即可。

template <typename Dtype>void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {  if (bottom.size() == 1) { return; }  const Dtype* top_diff = top[0]->cpu_diff();  int offset_concat_axis = 0;  const int top_concat_axis = top[0]->shape(concat_axis_);  for (int i = 0; i < bottom.size(); ++i) {    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);          \\从输出的梯度直接复制到对应的输入    if (propagate_down[i]) {      Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();      for (int n = 0; n < num_concats_; ++n) {        caffe_copy(bottom_concat_axis * concat_input_size_, top_diff +            (n * top_concat_axis + offset_concat_axis) * concat_input_size_,            bottom_diff + n * bottom_concat_axis * concat_input_size_);      }    }    offset_concat_axis += bottom_concat_axis;  }}

阅读全文

0 0