caffe之SoftmaxWithLoss层自定义实现

来源：互联网发布：淘宝网怎么是电脑版编辑：程序博客网时间：2024/05/22 17:14

caffe中的各层实现，因为封装了各种函数和为了扩展，在提升了效率的同时，降低了一定的代码可读性，这里，为了更好地理解softmax以及caffe中前向传播和反向传播的原理，我用通俗易懂的代码实现了SoftmaxWithLoss层（以下简称loss层），进行前向传播和反向传播，得到的训练结果和内置的代码结果是一样的。

这里定义batch_size为网络输入的批大小，label_num表示标签的类别数。而loss层的输入blob是两个，一个是全连接层，维度是batch_size*label_num，一个是标签层，维度是label_num*1，为了通俗易懂，我们举个例子，比如mnist问题的lenLet网络，是一个10类的分类问题（数字0~9），训练时，每个batch大小为64，所以，这里的batch_size=64，label_num=10。这里Softmax 层的各种原理，以及根据loss反向传播时的梯度推导，因为这里写公式不方便，我就在word里写了，如下图，

然后，贴代码吧：

头文件：

#ifndef CAFFE_MY_LOSS_LAYER_HPP_#define CAFFE_MY_LOSS_LAYER_HPP_#include <vector>#include "caffe/blob.hpp"#include "caffe/layer.hpp"#include "caffe/proto/caffe.pb.h"#include "caffe/layers/loss_layer.hpp"#include "caffe/layers/softmax_layer.hpp"namespace caffe {template <typename Dtype>class MyLossLayer : public LossLayer<Dtype> { public:  explicit MyLossLayer(const LayerParameter& param)      : LossLayer<Dtype>(param) {}  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  virtual inline const char* type() const { return "MyLoss"; }  virtual inline int ExactNumTopBlobs() const { return 1; }  virtual inline int MinTopBlobs() const { return 1; }  virtual inline int MaxTopBlobs() const { return 2; } protected:  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);  vector<vector<Dtype> > prob_;   //保存置信度  int label_num;    //标签个数  int batch_size;   //批大小};}  // namespace caffe#endif  // CAFFE_MY_LOSS_LAYER_HPP_

源文件，反向传播时，按照公式更新梯度就好了

#include <algorithm>#include <cfloat>#include <vector>#include "caffe/layers/my_loss_layer.hpp"#include "caffe/util/math_functions.hpp"using namespace std;namespace caffe {template <typename Dtype>void MyLossLayer<Dtype>::LayerSetUp(    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  LossLayer<Dtype>::LayerSetUp(bottom, top);}template <typename Dtype>void MyLossLayer<Dtype>::Reshape(    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  LossLayer<Dtype>::Reshape(bottom, top);  this->label_num=bottom[0]->channels();   //标签数 ，比如mnist为10  this->batch_size=bottom[0]->num();       //batch大小，比如mnist 一次输入64个  this->prob_=vector<vector<Dtype> >(batch_size,vector<Dtype>(label_num,Dtype(0)));  //置信度数组 64*10}template <typename Dtype>void MyLossLayer<Dtype>::Forward_cpu(    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {//为了避免数值问题，计算prob_时，先减最大值，再按照softmax公式计算各置信度for(int i=0;i<batch_size;++i){//求最大值，并减最大值Dtype mmax=-10000000;for(int j=0;j<label_num;++j)mmax=max<Dtype>(mmax,bottom[0]->data_at(i,j,0,0));for(int j=0;j<label_num;++j)prob_[i][j]=bottom[0]->data_at(i,j,0,0)-mmax;Dtype sum=0.0;   //求出分母for(int j=0;j<label_num;++j)sum+=exp(prob_[i][j]);for(int j=0;j<label_num;++j)   //计算各个置信度prob_[i][j]=exp(prob_[i][j])/sum;}//根据计算好的置信度，计算lossDtype loss=0.0;    const Dtype* label = bottom[1]->cpu_data();   //标签数组  64for(int i=0;i<batch_size;++i){int realLabel=static_cast<int>(label[i]);  //图片i的真实标签Dtype tmpProb=prob_[i][realLabel];         //属于真实标签的置信度        loss -= log(max<Dtype>(tmpProb,Dtype(FLT_MIN)));   //防止数据溢出问题}    top[0]->mutable_cpu_data()[0] = loss / batch_size;}//反向传播，计算梯度template <typename Dtype>void MyLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {  if (propagate_down[0]) {    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();    const Dtype* label = bottom[1]->cpu_data();   //标签 for(int i=0;i<batch_size;++i){int realLabel=static_cast<int>(label[i]);  //图片i的真实标签for(int j=0;j<label_num;++j){int offset=bottom[0]->offset(i,j);if(j==realLabel)                       //按照公式，如果分量就是真实标签，直接在置信度上减去1，就得到该分量的梯度bottom_diff[offset]=prob_[i][j]-1;else                                  //否则，梯度等于置信度bottom_diff[offset]=prob_[i][j]; }}for(int i=0;i<bottom[0]->count();++i)   //梯度归一化，除以batch大小bottom_diff[i]/=batch_size;  }}INSTANTIATE_CLASS(MyLossLayer);REGISTER_LAYER_CLASS(MyLoss);}  // namespace caffe

编译好后，用mnist的数据跑一下试试：

layer {    name: "my_loss"    type: "MyLoss"    bottom: "ip2"    bottom: "label"    top: "my_loss"}

最后结果：

阅读全文

2 0

caffe之SoftmaxWithLoss层 自定义实现

caffe之SoftmaxWithLoss层自定义实现