- Inner_Product Layer.hpp
先看Inner_Product Layer.hpp:
template <typename Dtype>
class InnerProductLayer : public Layer<Dtype> {
public:
explicit InnerProductLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
很明显,内积层是继承于Layer,然后是LayerSetUp,层建立,两个参数,bottom和top,Reshape每个SetUP之后都必须有一个Reshape实现。
virtual inline const char* type() const { return "InnerProduct"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; }
整个代码块规定了底部Blob的个数,Top Blob的个数,这里是全连接层,所以是bottom和top的Blob都只有一个。
protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
这部分是forward和backwards,具体实现见Inner_product Layer.cpp,最后代码:
int M_; int K_; int N_; bool bias_term_; Blob<Dtype> bias_multiplier_; bool transpose_;
这里是定义各种变量,M_是num_minibatch, N_是 num_output, bool bias_term_ 是否有bias项,bias_multiplier_ 暂时不说 ,后面解释。bool transpose是否w需要偏置。到此Inner _product实现了什么功能已经说明,具体的实现则需要去Inner_product Layer.cpp中一探究竟。
- Inner_Product Layer.cpp
const int num_output = this->layer_param_.inner_product_param().num_output() bias_term_ = this->layer_param_.inner_product_param().bias_term() transpose_ = this->layer_param_.inner_product_param().transpose()
声明了输出维数num_ output(即:N_),bias,transpose,
N_ = num_output; const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.inner_product_param().axis()); // Dimensions starting from "axis" are "flattened" into a single // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W), // and axis == 1, N inner products with dimension CHW are performed. K_ = bottom[0]->count(axis);
这部分写的很清楚了,注释里有,就不解释了。
接下来再看下面的代码:
if (this->blobs_.size() > 0) { LOG(INFO) << "Skipping parameter initialization"; } else { if (bias_term_) { this->blobs_.resize(2); } else { this->blobs_.resize(1); } vector<int> weight_shape(2); if (transpose_) { weight_shape[0] = K_; weight_shape[1] = N_; } else { weight_shape[0] = N_; weight_shape[1] = K_; }
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
上面代码片其实是对w和b进行初始化,而初始化有两种形式,一种是随机初始化,一种是利用现有保存的model进行初始化。
this->blobs_[0].reset(new Blob<Dtype>(weight_shape)); shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>( this->layer_param_.inner_product_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get());
先给Blob分配内存,然后获取一个filler(人为规定)——GetFiller,然后在fill这个filler。同理bias的填充代码一样,不贴了。
} this->param_propagate_down_.resize(this->blobs_.size(), true);}
这里是规定后向传播的Blob的个数
template <typename Dtype>void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.inner_product_param().axis()); const int new_K = bottom[0]->count(axis); CHECK_EQ(K_, new_K) << "Input size incompatible with inner product parameters."; M_ = bottom[0]->count(0, axis); vector<int> top_shape = bottom[0]->shape(); top_shape.resize(axis + 1); top_shape[axis] = N_; top[0]->Reshape(top_shape); if (bias_term_) { vector<int> bias_shape(1, M_); bias_multiplier_.Reshape(bias_shape); caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data()); }}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
代码分析注释在源码里了。下面看前向传播和后向传播
template <typename Dtype>void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* weight = this->blobs_[0]->cpu_data(); caffe_cpu_gemm<Dtype>(CblasNoTrans, transpose_ ? CblasNoTrans : CblasTrans, M_, N_, K_, (Dtype)1., bottom_data, weight, (Dtype)0., top_data); if (bias_term_) { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., bias_multiplier_.cpu_data(), this->blobs_[1]->cpu_data(), (Dtype)1., top_data); }}template <typename Dtype>void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (this->param_propagate_down_[0]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* bottom_data = bottom[0]->cpu_data(); if (transpose_) { caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, (Dtype)1., bottom_data, top_diff, (Dtype)1., this->blobs_[0]->mutable_cpu_diff()); } else { caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., top_diff, bottom_data, (Dtype)1., this->blobs_[0]->mutable_cpu_diff()); } } if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff, bias_multiplier_.cpu_data(), (Dtype)1., this->blobs_[1]->mutable_cpu_diff()); } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); if (transpose_) { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff()); } else { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff()); } }}backwards里的代码实现了三个东西,第一对w的导数,第二对b的导数,第三求求前一层delta。