Caffe源码:Softmax_loss_layer.cpp
来源:互联网 发布:mac系统可以装ps吗 编辑:程序博客网 时间:2024/05/18 22:54
@brief: Caffe损失层反向传播
SoftmaxLayer是LeNet中前向传播的最后一层,也是反向传播的第一层。SoftmaxLayer作用是将网络的最后一层ip2的10个输出神经元
#include <algorithm>#include <cfloat>#include <vector>#include "caffe/layers/softmax_loss_layer.hpp"#include "caffe/util/math_functions.hpp"namespace caffe {template <typename Dtype>void SoftmaxWithLossLayer<Dtype>::LayerSetUp( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { LossLayer<Dtype>::LayerSetUp(bottom, top); LayerParameter softmax_param(this->layer_param_);//初始化 softmax_param.set_type("Softmax"); softmax_layer_ = LayerRegistry<Dtype>::CreateLayer(softmax_param);//创建softmax_layer softmax_bottom_vec_.clear(); softmax_bottom_vec_.push_back(bottom[0]);//填充输出的神经元 softmax_top_vec_.clear(); softmax_top_vec_.push_back(&prob_);//填充预测的概率值 softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_);//一个指向softmax_layer_指针 并设置该层 softmax层的输出与输入一致 has_ignore_label_ =//如果有设置需要忽略某个label对应的 实例/sample this->layer_param_.loss_param().has_ignore_label();//调用基类数据成员 if (has_ignore_label_) { ignore_label_ = this->layer_param_.loss_param().ignore_label(); } if (!this->layer_param_.loss_param().has_normalization() && this->layer_param_.loss_param().has_normalize()) { normalization_ = this->layer_param_.loss_param().normalize() ? LossParameter_NormalizationMode_VALID : LossParameter_NormalizationMode_BATCH_SIZE; } else { normalization_ = this->layer_param_.loss_param().normalization();//默认归一化的方式为valid }}template <typename Dtype>void SoftmaxWithLossLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { //this->Reshape(bottom, top);//调用的是本函数 this??????????? LossLayer<Dtype>::Reshape(bottom, top);//top输出一个常数 softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_);//是不是重复了? 不同的在于 top没有初始化 由LossLayer的Reshape函数进行top的初始化 softmax_axis_ = bottom[0]->CanonicalAxisIndex(this->layer_param_.softmax_param().axis()); outer_num_ = bottom[0]->count(0, softmax_axis_); inner_num_ = bottom[0]->count(softmax_axis_ + 1); CHECK_EQ(outer_num_ * inner_num_, bottom[1]->count()) << "Number of labels must match number of predictions; " << "e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), " << "label count (number of labels) must be N*H*W, " << "with integer values in {0, 1, ..., C-1}."; if (top.size() >= 2) { // softmax output top[1]->ReshapeLike(*bottom[0]); }}template <typename Dtype>Dtype SoftmaxWithLossLayer<Dtype>::get_normalizer( LossParameter_NormalizationMode normalization_mode, int valid_count) { Dtype normalizer; switch (normalization_mode) { case LossParameter_NormalizationMode_FULL: normalizer = Dtype(outer_num_ * inner_num_); break; case LossParameter_NormalizationMode_VALID: if (valid_count == -1) { normalizer = Dtype(outer_num_ * inner_num_); } else { normalizer = Dtype(valid_count); } break; case LossParameter_NormalizationMode_BATCH_SIZE: normalizer = Dtype(outer_num_); break; case LossParameter_NormalizationMode_NONE: normalizer = Dtype(1); break; default: LOG(FATAL) << "Unknown normalization mode: " << LossParameter_NormalizationMode_Name(normalization_mode); } // Some users will have no labels for some examples in order to 'turn off' a // particular loss in a multi-task setup. The max prevents NaNs in that case. return std::max(Dtype(1.0), normalizer);}template <typename Dtype>void SoftmaxWithLossLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // The forward pass computes the softmax prob values. softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);//通过SoftmaxLayer映射成概率 /*const Dtype* data_pointer = softmax_top_vec_[0]->cpu_data(); for (int i = 0; i < 10; i++) std::cout << *(data_pointer++) << " ";*/ const Dtype* prob_data = prob_.cpu_data();//prob_指针已经被压倒vector<Blob<Dtype>*> softmax_top_vec_中 /* const Dtype* data_pointerto_prob = prob_data; for (int i = 0; i < 10; i++) std::cout << *(data_pointerto_prob++) << " ";*///they are the same const Dtype* label = bottom[1]->cpu_data(); int dim = prob_.count() / outer_num_; int count = 0; Dtype loss = 0; for (int i = 0; i < outer_num_; ++i) { for (int j = 0; j < inner_num_; j++) { const int label_value = static_cast<int>(label[i * inner_num_ + j]); if (has_ignore_label_ && label_value == ignore_label_) { continue; } DCHECK_GE(label_value, 0); DCHECK_LT(label_value, prob_.shape(softmax_axis_)); loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j], Dtype(FLT_MIN)));//计算一个batch_size的所有样本的损失函数 ++count; } } top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count); if (top.size() == 2) { top[1]->ShareData(prob_); }}template <typename Dtype>void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {//bottom[0]为NxKx1x1 N为批量数 K为总类别数目 bottom[1]为Nx1x1x1 为真实标签 输出top为计算得到的交叉熵分类损失E 1x1x1x1 //输出loss的值 std::cout << "Loss is: " << *(top[0]->mutable_cpu_data()) << std::endl; if (propagate_down[1]) { LOG(FATAL) << this->type() << " Layer cannot backpropagate to label inputs."; } if (propagate_down[0]) { Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const Dtype* prob_data = prob_.cpu_data(); caffe_copy(prob_.count(), prob_data, bottom_diff);//将一个batch_size的概率值拷贝到bottom_diff const Dtype* label = bottom[1]->cpu_data();//拿到batch的所有label指针 //print all labels from one batch //for (int i = 0; i < bottom[1]->count(); i++) // std::cout << *(label++) << " ";//这个有问题啊 因为你把label的指针向后移了当然后面的label_value有错 int dim = prob_.count() / outer_num_; int count = 0; for (int i = 0; i < outer_num_; ++i) {//对batch中的每一张图片 for (int j = 0; j < inner_num_; ++j) {//???????????? const int label_value = static_cast<int>(label[i * inner_num_ + j]);//groundtruth //std::cout << label_value; if (has_ignore_label_ && label_value == ignore_label_) { for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) { bottom_diff[i * dim + c * inner_num_ + j] = 0; } } else { bottom_diff[i * dim + label_value * inner_num_ + j] -= 1;//根据推导的公式,SoftmaxlossLayer层的梯度只在label所在的神经元更新,其余神经元的梯度即原输入。 ++count;//batch中第i个样本对应正确label的值 理想的label对应的概率值为1,其它为0 实际上优化达不到1 所以最小化损失使得概率值尽可能的大 } } } // Scale gradient //std::cout << *(top[0]->cpu_diff()) << " " << top[0]->cpu_diff()[0]; Dtype loss_weight = top[0]->cpu_diff()[0] / get_normalizer(normalization_, count); std::cout << "Original value of bottom_diff[0] is: " << bottom_diff[0] << " " << std::endl; caffe_scal(prob_.count(), loss_weight, bottom_diff);//bottom_diff=loss_weight*bottom_diff,prob_.count()为bottom_diff的元素个数 std::cout <<"loss_weight is: "<<loss_weight<< "After operating on X=alpah*X, the result of bottom_diff[0] is: " << bottom_diff[0] << std::endl; }}#ifdef CPU_ONLYSTUB_GPU(SoftmaxWithLossLayer);#endifINSTANTIATE_CLASS(SoftmaxWithLossLayer);REGISTER_LAYER_CLASS(SoftmaxWithLoss);} // namespace caffe
http://blog.csdn.net/mounty_fsc/article/details/51379395
http://blog.csdn.net/mounty_fsc/article/details/51092906#t10
阅读全文
0 0
- Caffe源码:Softmax_loss_layer.cpp
- caffe源码分析:softmax_layer.cpp && softmax_loss_layer.cpp
- Caffe softmax_loss_layer.cpp 学习
- 学习笔记: 源码 softmax_loss_layer.cpp 略析
- Caffe源码解读: Softmax_loss_Layer的前向与反向传播
- Caffe源码解析caffe.cpp
- caffe源码:math_functions.cpp
- Caffe源码:inner_product_layer.cpp
- Caffe源码:relu_layer.cpp
- Caffe源码:pooling_layer.cpp
- caffe源码分析--data_layer.cpp
- caffe源码分析--softmax_layer.cpp
- caffe源码分析--poolinger_layer.cpp
- caffe源码之blob.cpp
- Caffe源码:io.cpp 分析
- 【Caffe】002 caffe.cpp源码解析
- caffe源码学习中-tools/caffe.cpp
- caffe softmax_loss_layer 对于梯度下降的理解
- 实验二 交换机路由器基本配置--自我操作
- Java集合
- UI5开发 – 使用SAP Fiori Client在手机测试应用
- SpringBoot文件上传下载和多文件上传(图文详解)
- Android 内存优化(一)
- Caffe源码:Softmax_loss_layer.cpp
- 乐观锁与悲观锁
- work10
- 字符串数组初始化相关问题总结
- 如何查看JAVA程序的汇编代码
- Spring Boot的web开发
- EPC协议
- javabean,pojo,po,vo,dao的定义与不同
- Java 重写ServletResponse中的内容