Caffe—SigmoidCrossEntropyLossLayer

来源：互联网发布：男士靴子品牌知乎编辑：程序博客网时间：2024/05/21 18:00

首先看一下来自CaffeCN的一个帖子（突然发现CaffeCN是一个交流Caffe的好地方）
地址：http://caffecn.cn/?/question/25 截图如下

上图之所以要分xn正负讨论是为了让e的指数为负数，以防止溢出。整个表达式对应下面的代码第18行开始

template <typename Dtype>void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  // The forward pass computes the sigmoid outputs.  sigmoid_bottom_vec_[0] = bottom[0];  sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);  // Compute the loss (negative log likelihood)  // Stable version of loss computation from input data  const Dtype* input_data = bottom[0]->cpu_data();  const Dtype* target = bottom[1]->cpu_data();  int valid_count = 0;  Dtype loss = 0;  for (int i = 0; i < bottom[0]->count(); ++i) {    const int target_value = static_cast<int>(target[i]);    if (has_ignore_label_ && target_value == ignore_label_) {      continue;    }    loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -        log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));    ++valid_count;  }  normalizer_ = get_normalizer(normalization_, valid_count);  top[0]->mutable_cpu_data()[0] = loss / normalizer_;}

以上还只是计算的forward，下面看一下backward（CPU部分），首先要知道交叉熵损失函数的梯度怎么算，见下图：

所以，对应的代码就是

template <typename Dtype>void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,    const vector<Blob<Dtype>*>& bottom) {  if (propagate_down[1]) {    LOG(FATAL) << this->type()               << " Layer cannot backpropagate to label inputs.";  }  if (propagate_down[0]) {    // First, compute the diff    const int count = bottom[0]->count();    const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();    const Dtype* target = bottom[1]->cpu_data();    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();    caffe_sub(count, sigmoid_output_data, target, bottom_diff);    // Zero out gradient of ignored targets.    if (has_ignore_label_) {      for (int i = 0; i < count; ++i) {        const int target_value = static_cast<int>(target[i]);        if (target_value == ignore_label_) {          bottom_diff[i] = 0;        }      }    }    // Scale down gradient    Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer_;    caffe_scal(count, loss_weight, bottom_diff);  }}

阅读全文

1 0