caffe SigmoidCrossEntropyLossLayer 理论代码学习

来源:互联网 发布:马上6无法连接网络 编辑:程序博客网 时间:2024/05/17 22:28

交叉熵损失函数

交叉熵损失函数的简单介绍的链接 
下面我们就介绍一下caffe里面实现交叉熵的流程: 
首先:下面这个式子就是交叉熵的损失表达式 

E=1nn=1n[pnlogp^n+(1pn)log(1p^n)]

SigmoidCrossEntropyLossLayer的输入bottom[0],bottom[1],其中bottom[0]是输入的预测的结果,bottom[1]是标签值。bottom的维度都是(N×C×H×W),bottom的表示符号是x,x[,+]p^n=σ(xn)[0,1],bottom[1]是p[0,1],输出的loss维度是(1×1×1×1)。 
σ(xn)=11+exn

反向传播的导数: 
Exn=Ep^np^nxn=1N(pn1p^n1pn1p^n)(p^n(1p^n))=1N(p^npn)

其中caffe里面计算loss的代码看起来有点跟表达式不相像可以参考: 
代码:

  Dtype loss = 0;  for (int i = 0; i < count; ++i) {    loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -        log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));  }  top[0]->mutable_cpu_data()[0] = loss / num;
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6

caffe loss理解

图片引用自链接: 
这里写图片描述

介绍完理论和注意的内容,接下来就贴代码和一些注释:

SigmoidCrossEntropyLossLayer的定义template <typename Dtype>class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> { public:  explicit SigmoidCrossEntropyLossLayer(const LayerParameter& param)      : LossLayer<Dtype>(param),          sigmoid_layer_(new SigmoidLayer<Dtype>(param)),          sigmoid_output_(new Blob<Dtype>()) {}  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  virtual inline const char* type() const { return "SigmoidCrossEntropyLoss"; } protected:  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);  /// The internal SigmoidLayer used to map predictions to probabilities.  shared_ptr<SigmoidLayer<Dtype> > sigmoid_layer_;//用于生成生成sigmoid结果  /// sigmoid_output stores the output of the SigmoidLayer.  shared_ptr<Blob<Dtype> > sigmoid_output_;//指向sigmoid的输出  /// bottom vector holder to call the underlying SigmoidLayer::Forward  vector<Blob<Dtype>*> sigmoid_bottom_vec_;//sigmoid的输入  /// top vector holder to call the underlying SigmoidLayer::Forward  vector<Blob<Dtype>*> sigmoid_top_vec_; //sigmoid的输出};
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
SigmoidCrossEntropyLossLayer类的成员函数实现template <typename Dtype>void SigmoidCrossEntropyLossLayer<Dtype>::LayerSetUp(    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  LossLayer<Dtype>::LayerSetUp(bottom, top);  sigmoid_bottom_vec_.clear();  sigmoid_bottom_vec_.push_back(bottom[0]);  sigmoid_top_vec_.clear();  sigmoid_top_vec_.push_back(sigmoid_output_.get());  sigmoid_layer_->SetUp(sigmoid_bottom_vec_, sigmoid_top_vec_);}template <typename Dtype>void SigmoidCrossEntropyLossLayer<Dtype>::Reshape(    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  LossLayer<Dtype>::Reshape(bottom, top);  CHECK_EQ(bottom[0]->count(), bottom[1]->count()) <<      "SIGMOID_CROSS_ENTROPY_LOSS layer inputs must have the same count.";  sigmoid_layer_->Reshape(sigmoid_bottom_vec_, sigmoid_top_vec_);}template <typename Dtype>void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  // The forward pass computes the sigmoid outputs.  sigmoid_bottom_vec_[0] = bottom[0];  sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);  // Compute the loss (negative log likelihood)  const int count = bottom[0]->count();  const int num = bottom[0]->num();  // Stable version of loss computation from input data  const Dtype* input_data = bottom[0]->cpu_data();  const Dtype* target = bottom[1]->cpu_data();  Dtype loss = 0;  for (int i = 0; i < count; ++i) {    loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -        log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));  }  top[0]->mutable_cpu_data()[0] = loss / num;}template <typename Dtype>void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,    const vector<Blob<Dtype>*>& bottom) {  if (propagate_down[1]) {    LOG(FATAL) << this->type()               << " Layer cannot backpropagate to label inputs.";  }  if (propagate_down[0]) {    // First, compute the diff    const int count = bottom[0]->count();    const int num = bottom[0]->num();    const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();    const Dtype* target = bottom[1]->cpu_data();    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();    caffe_sub(count, sigmoid_output_data, target, bottom_diff);    // Scale down gradient    const Dtype loss_weight = top[0]->cpu_diff()[0];    caffe_scal(count, loss_weight / num, bottom_diff);  }}
原创粉丝点击