Caffe源码:inner_product_layer.cpp

来源：互联网发布：知乎中国出生率编辑：程序博客网时间：2024/06/06 13:58

@brief: Caffe全连接层ip2–>ip1反向传播（不包含ReluLayer）

这里写图片描述
以LeNet全连接层反向传播为例，ip2–>ip1，如图示，输入神经元个数500，输出神经元个数10，batchsize为64，则要更新的权重矩阵为500x10。
对每一个输出神经元 yi=∑djwji∗xj+biasmultiplier∗bi,∴∂yi∂wji=xj, bi为偏置项共10个,∴∂yi∂bi=biasmultiplier=1（caffe中设置偏置项乘子为1）
对权重矩阵求梯度核心代码：

//this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff()//top_diff N_ x M_  bottom_data=M_ x K_ this->blobs_[0]->mutable_cpu_diff()=N_ x K_// C=alpha*A*B+beta*C   caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans,     N_, K_, M_,  (Dtype)1., top_diff, bottom_data,  (Dtype)1., this->blobs_[0]->mutable_cpu_diff());

top_diff为yi,i∈(1,10)的输入梯度（10∗64），bottom_data为xd，d∈(1,500)的输入(64∗500)，this->blobs_[0]->mutable_cpu_diff()为要更新的梯度矩阵 W(10∗500)。
采用矩阵形式运算，运算并更新：
这里写图片描述

this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff()

对偏置项求梯度核心代码：

if (bias_term_ && this->param_propagate_down_[1]) {    const Dtype* top_diff = top[0]->cpu_diff();    // Gradient with respect to bias    caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,        bias_multiplier_.cpu_data(), (Dtype)1.,        this->blobs_[1]->mutable_cpu_diff());//this->blobs_[1]->mutable_cpu_diff() N_ =alpha*top_diff (N_ *  M_ ) *bias_multiplier_.cpu_data()  M_  +beta*this->blobs_[1]->mutable_cpu_diff()  N_  }//偏置项是要学的 偏置项乘子设置为1 一个样本对应一个乘子

这里写图片描述

对输入求梯度核心代码：
根据链式法则 ∵ 输入 xj 对输出 yi 都有贡献 ∴ 所有 yi 的输入梯度都要传到 xj 上。

 if (propagate_down[0]) {    const Dtype* top_diff = top[0]->cpu_diff();    // Gradient with respect to bottom data    if (transpose_) {      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans,          M_, K_, N_,          (Dtype)1., top_diff, this->blobs_[0]->cpu_data(),          (Dtype)0., bottom[0]->mutable_cpu_diff());    } else {      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,          M_, K_, N_,          (Dtype)1., top_diff, this->blobs_[0]->cpu_data(),          (Dtype)0., bottom[0]->mutable_cpu_diff());    }  }

这里写图片描述

#include <vector>#include "caffe/filler.hpp"#include "caffe/layers/inner_product_layer.hpp"#include "caffe/util/math_functions.hpp"namespace caffe {template <typename Dtype>void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  const int num_output = this->layer_param_.inner_product_param().num_output();//内积输出个数  bias_term_ = this->layer_param_.inner_product_param().bias_term();//默认为true  transpose_ = this->layer_param_.inner_product_param().transpose();//默认为false  N_ = num_output;//全连接层所具有的输出神经元数目  const int axis = bottom[0]->CanonicalAxisIndex(      this->layer_param_.inner_product_param().axis());  // Dimensions starting from "axis" are "flattened" into a single  // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),  // and axis == 1, N inner products with dimension CHW are performed.  K_ = bottom[0]->count(axis);//如果是全连接层连到卷积层则是从axis向后的维度拉成一个向量 e.g ip1 64*50*4*4  则K_=50*4*4=800                              //如果是全连接层连全连接层则是输入全连接层所有的输入神经元数目  // Check if we need to set up the weights  if (this->blobs_.size() > 0) {    LOG(INFO) << "Skipping parameter initialization";  } else {    if (bias_term_) {      this->blobs_.resize(2);    } else {      this->blobs_.resize(1);    }    // Initialize the weights    vector<int> weight_shape(2);    if (transpose_) {      weight_shape[0] = K_;      weight_shape[1] = N_;    } else {      weight_shape[0] = N_;      weight_shape[1] = K_;    }    this->blobs_[0].reset(new Blob<Dtype>(weight_shape));    // fill the weights    shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(        this->layer_param_.inner_product_param().weight_filler()));    weight_filler->Fill(this->blobs_[0].get());//对全连接层的权重矩阵初始化    //debug info    /*Blob<Dtype>* blobip = this->blobs_[0].get();    Dtype* pointerip = blobip->mutable_cpu_data();    for (int i = 0; i < 400000; i++)        std::cout << "The num at " << i << " is " << *(pointerip + i);*/    // If necessary, intiialize and fill the bias term    if (bias_term_) {      vector<int> bias_shape(1, N_);      this->blobs_[1].reset(new Blob<Dtype>(bias_shape));      shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(          this->layer_param_.inner_product_param().bias_filler()));      bias_filler->Fill(this->blobs_[1].get());//对全连接层的偏置初始化      ////debug info      //Blob<Dtype>* blobip = this->blobs_[1].get();      //Dtype* pointerip = blobip->mutable_cpu_data();      //for (int i = 0; i < blobip->count(); i++)      //std::cout << "The num at " << i << " is " << *(pointerip + i);    }  }  // parameter initialization  this->param_propagate_down_.resize(this->blobs_.size(), true);}template <typename Dtype>void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  // Figure out the dimensions  const int axis = bottom[0]->CanonicalAxisIndex(      this->layer_param_.inner_product_param().axis());  const int new_K = bottom[0]->count(axis);  CHECK_EQ(K_, new_K)      << "Input size incompatible with inner product parameters.";  // The first "axis" dimensions are independent inner products; the total  // number of these is M_, the product over these dimensions.  M_ = bottom[0]->count(0, axis);//对于全连接至卷积：输出featuremap（三维张量）的个数（样本个数）                                 //FC->FC 样本个数  // The top shape will be the bottom shape with the flattened axes dropped,  // and replaced by a single axis with dimension num_output (N_).  vector<int> top_shape = bottom[0]->shape();  top_shape.resize(axis + 1);  top_shape[axis] = N_;  top[0]->Reshape(top_shape);  // Set up the bias multiplier     if (bias_term_) {    vector<int> bias_shape(1, M_);    bias_multiplier_.Reshape(bias_shape);    caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data());//设置偏置项乘子为1  }}template <typename Dtype>void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,    const vector<Blob<Dtype>*>& top) {  const Dtype* bottom_data = bottom[0]->cpu_data();  Dtype* top_data = top[0]->mutable_cpu_data();  const Dtype* weight = this->blobs_[0]->cpu_data();  caffe_cpu_gemm<Dtype>(CblasNoTrans, transpose_ ? CblasNoTrans : CblasTrans,      M_, N_, K_, (Dtype)1.,      bottom_data, weight, (Dtype)0., top_data);//计算输出神经元的值  if (bias_term_) {//将偏置项加入到输出神经元的值    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,        bias_multiplier_.cpu_data(),        this->blobs_[1]->cpu_data(), (Dtype)1., top_data);  }}template <typename Dtype>void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,    const vector<bool>& propagate_down,    const vector<Blob<Dtype>*>& bottom) {  if (this->param_propagate_down_[0]) {    const Dtype* top_diff = top[0]->cpu_diff();//拿到顶层梯度指针    const Dtype* bottom_data = bottom[0]->cpu_data();//拿到输入神经元指针    // Gradient with respect to weight    if (transpose_) {      caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans,          K_, N_, M_,          (Dtype)1., bottom_data, top_diff,          (Dtype)1., this->blobs_[0]->mutable_cpu_diff());    } else {        //this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff()        //top_diff N_ x M_  bottom_data=M_ x K_ this->blobs_[0]->mutable_cpu_diff()=N_ x K_        // C=alpha*A*B+beta*C       caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, //计算梯度并更新权重矩阵  math_functions中是按照M、N、K来排列          N_, K_, M_,          (Dtype)1., top_diff, bottom_data,          (Dtype)1., this->blobs_[0]->mutable_cpu_diff());//this->blobs_[0]->mutable_cpu_diff() 保存权重矩阵    }  }  if (bias_term_ && this->param_propagate_down_[1]) {    const Dtype* top_diff = top[0]->cpu_diff();    // Gradient with respect to bias    caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,        bias_multiplier_.cpu_data(), (Dtype)1.,        this->blobs_[1]->mutable_cpu_diff());//this->blobs_[1]->mutable_cpu_diff() N_ =alpha*top_diff (N_ *  M_ ) *bias_multiplier_.cpu_data()  M_  +beta*this->blobs_[1]->mutable_cpu_diff()  N_  }//偏置项是要学的 偏置项乘子设置为1 一个样本对应一个乘子 bias_multiplier_是一个值为1的单位向量  //const Dtype* p = bias_multiplier_.cpu_data();  //std::cout << *(p + 1);  if (propagate_down[0]) {    const Dtype* top_diff = top[0]->cpu_diff();    // Gradient with respect to bottom data    if (transpose_) {      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans,          M_, K_, N_,          (Dtype)1., top_diff, this->blobs_[0]->cpu_data(),          (Dtype)0., bottom[0]->mutable_cpu_diff());    } else {      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,          M_, K_, N_,//严格按照math_functions中的参数顺序来          (Dtype)1., top_diff, this->blobs_[0]->cpu_data(),          (Dtype)0., bottom[0]->mutable_cpu_diff());//bottom[0]->mutable_cpu_diff() M_ * K_ =top_diff  M_ * N_ *this->blobs_[0]->cpu_data() N_ * K_    }  }}#ifdef CPU_ONLYSTUB_GPU(InnerProductLayer);#endifINSTANTIATE_CLASS(InnerProductLayer);REGISTER_LAYER_CLASS(InnerProduct);}  // namespace caffe

阅读全文

0 0