Caffe源码:inner_product_layer.cpp
来源:互联网 发布:知乎 中国出生率 编辑:程序博客网 时间:2024/06/06 13:58
@brief: Caffe全连接层ip2–>ip1反向传播(不包含ReluLayer)
以LeNet全连接层反向传播为例,ip2–>ip1,如图示,输入神经元个数500,输出神经元个数10,batchsize为64,则要更新的权重矩阵为500x10。
对每一个输出神经元
对权重矩阵求梯度核心代码:
//this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff()//top_diff N_ x M_ bottom_data=M_ x K_ this->blobs_[0]->mutable_cpu_diff()=N_ x K_// C=alpha*A*B+beta*C caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., top_diff, bottom_data, (Dtype)1., this->blobs_[0]->mutable_cpu_diff());
top_diff为
采用矩阵形式运算,运算并更新:
this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff()
对偏置项求梯度核心代码:
if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bias caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff, bias_multiplier_.cpu_data(), (Dtype)1., this->blobs_[1]->mutable_cpu_diff());//this->blobs_[1]->mutable_cpu_diff() N_ =alpha*top_diff (N_ * M_ ) *bias_multiplier_.cpu_data() M_ +beta*this->blobs_[1]->mutable_cpu_diff() N_ }//偏置项是要学的 偏置项乘子设置为1 一个样本对应一个乘子
对输入求梯度核心代码:
根据链式法则
if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bottom data if (transpose_) { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff()); } else { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff()); } }
#include <vector>#include "caffe/filler.hpp"#include "caffe/layers/inner_product_layer.hpp"#include "caffe/util/math_functions.hpp"namespace caffe {template <typename Dtype>void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int num_output = this->layer_param_.inner_product_param().num_output();//内积输出个数 bias_term_ = this->layer_param_.inner_product_param().bias_term();//默认为true transpose_ = this->layer_param_.inner_product_param().transpose();//默认为false N_ = num_output;//全连接层所具有的输出神经元数目 const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.inner_product_param().axis()); // Dimensions starting from "axis" are "flattened" into a single // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W), // and axis == 1, N inner products with dimension CHW are performed. K_ = bottom[0]->count(axis);//如果是全连接层连到卷积层则是从axis向后的维度拉成一个向量 e.g ip1 64*50*4*4 则K_=50*4*4=800 //如果是全连接层连全连接层则是输入全连接层所有的输入神经元数目 // Check if we need to set up the weights if (this->blobs_.size() > 0) { LOG(INFO) << "Skipping parameter initialization"; } else { if (bias_term_) { this->blobs_.resize(2); } else { this->blobs_.resize(1); } // Initialize the weights vector<int> weight_shape(2); if (transpose_) { weight_shape[0] = K_; weight_shape[1] = N_; } else { weight_shape[0] = N_; weight_shape[1] = K_; } this->blobs_[0].reset(new Blob<Dtype>(weight_shape)); // fill the weights shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>( this->layer_param_.inner_product_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get());//对全连接层的权重矩阵初始化 //debug info /*Blob<Dtype>* blobip = this->blobs_[0].get(); Dtype* pointerip = blobip->mutable_cpu_data(); for (int i = 0; i < 400000; i++) std::cout << "The num at " << i << " is " << *(pointerip + i);*/ // If necessary, intiialize and fill the bias term if (bias_term_) { vector<int> bias_shape(1, N_); this->blobs_[1].reset(new Blob<Dtype>(bias_shape)); shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>( this->layer_param_.inner_product_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get());//对全连接层的偏置初始化 ////debug info //Blob<Dtype>* blobip = this->blobs_[1].get(); //Dtype* pointerip = blobip->mutable_cpu_data(); //for (int i = 0; i < blobip->count(); i++) //std::cout << "The num at " << i << " is " << *(pointerip + i); } } // parameter initialization this->param_propagate_down_.resize(this->blobs_.size(), true);}template <typename Dtype>void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Figure out the dimensions const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.inner_product_param().axis()); const int new_K = bottom[0]->count(axis); CHECK_EQ(K_, new_K) << "Input size incompatible with inner product parameters."; // The first "axis" dimensions are independent inner products; the total // number of these is M_, the product over these dimensions. M_ = bottom[0]->count(0, axis);//对于全连接至卷积:输出featuremap(三维张量)的个数(样本个数) //FC->FC 样本个数 // The top shape will be the bottom shape with the flattened axes dropped, // and replaced by a single axis with dimension num_output (N_). vector<int> top_shape = bottom[0]->shape(); top_shape.resize(axis + 1); top_shape[axis] = N_; top[0]->Reshape(top_shape); // Set up the bias multiplier if (bias_term_) { vector<int> bias_shape(1, M_); bias_multiplier_.Reshape(bias_shape); caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data());//设置偏置项乘子为1 }}template <typename Dtype>void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* weight = this->blobs_[0]->cpu_data(); caffe_cpu_gemm<Dtype>(CblasNoTrans, transpose_ ? CblasNoTrans : CblasTrans, M_, N_, K_, (Dtype)1., bottom_data, weight, (Dtype)0., top_data);//计算输出神经元的值 if (bias_term_) {//将偏置项加入到输出神经元的值 caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., bias_multiplier_.cpu_data(), this->blobs_[1]->cpu_data(), (Dtype)1., top_data); }}template <typename Dtype>void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (this->param_propagate_down_[0]) { const Dtype* top_diff = top[0]->cpu_diff();//拿到顶层梯度指针 const Dtype* bottom_data = bottom[0]->cpu_data();//拿到输入神经元指针 // Gradient with respect to weight if (transpose_) { caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, (Dtype)1., bottom_data, top_diff, (Dtype)1., this->blobs_[0]->mutable_cpu_diff()); } else { //this->blobs_[0]->mutable_cpu_diff()=1.0*top_diff*bottom_data+1.0*this->blobs_[0]->mutable_cpu_diff() //top_diff N_ x M_ bottom_data=M_ x K_ this->blobs_[0]->mutable_cpu_diff()=N_ x K_ // C=alpha*A*B+beta*C caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, //计算梯度并更新权重矩阵 math_functions中是按照M、N、K来排列 N_, K_, M_, (Dtype)1., top_diff, bottom_data, (Dtype)1., this->blobs_[0]->mutable_cpu_diff());//this->blobs_[0]->mutable_cpu_diff() 保存权重矩阵 } } if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bias caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff, bias_multiplier_.cpu_data(), (Dtype)1., this->blobs_[1]->mutable_cpu_diff());//this->blobs_[1]->mutable_cpu_diff() N_ =alpha*top_diff (N_ * M_ ) *bias_multiplier_.cpu_data() M_ +beta*this->blobs_[1]->mutable_cpu_diff() N_ }//偏置项是要学的 偏置项乘子设置为1 一个样本对应一个乘子 bias_multiplier_是一个值为1的单位向量 //const Dtype* p = bias_multiplier_.cpu_data(); //std::cout << *(p + 1); if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bottom data if (transpose_) { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff()); } else { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_,//严格按照math_functions中的参数顺序来 (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff());//bottom[0]->mutable_cpu_diff() M_ * K_ =top_diff M_ * N_ *this->blobs_[0]->cpu_data() N_ * K_ } }}#ifdef CPU_ONLYSTUB_GPU(InnerProductLayer);#endifINSTANTIATE_CLASS(InnerProductLayer);REGISTER_LAYER_CLASS(InnerProduct);} // namespace caffe
阅读全文
0 0
- Caffe源码:inner_product_layer.cpp
- caffe源码解析-inner_product_layer
- caffe源码理解之inner_product_layer
- 学习笔记: 源码 inner_product_layer.cpp 略识
- CAFFE源码学习笔记之内积层-inner_product_layer
- Caffe源码解析caffe.cpp
- caffe源码:math_functions.cpp
- Caffe源码:Softmax_loss_layer.cpp
- Caffe源码:relu_layer.cpp
- Caffe源码:pooling_layer.cpp
- caffe源码分析:softmax_layer.cpp && softmax_loss_layer.cpp
- caffe源码分析--data_layer.cpp
- caffe源码分析--softmax_layer.cpp
- caffe源码分析--poolinger_layer.cpp
- caffe源码之blob.cpp
- Caffe源码:io.cpp 分析
- 【Caffe】002 caffe.cpp源码解析
- caffe源码学习中-tools/caffe.cpp
- bzoj3504 [Cqoi2014]危桥 (网络流 最大流)
- C++基础知识3
- Android:这是一份非常详细的MVP+Rxjava2.0+Retrofit2.0相结合举例RecyclerView的实战篇章
- 1181 质数中的质数(质数筛法)
- Linux system samba案例
- Caffe源码:inner_product_layer.cpp
- 最大熵模型
- 5.输出亲朋字符串
- redis 集合set
- sql 面试题,常用sql 语句
- JavaScript == 和 ===
- HTML文本框录入字母自动大写
- swift报错Can't find itselfxxxxwith port 6001 in ring file, not replicating
- Java跨平台性