centerloss 理解

来源：互联网发布：java web start 编辑：程序博客网时间：2024/06/01 09:35

今天看了一下centerloss 源码，觉得还是挺值得借鉴的，所以分享一下。
centerloss主要在caffe的基础上加了三项。

一. caffe.proto

1.加proto 格式的 ID号147

这里写图片描述

2.加proto格式的定义

这里写图片描述
这里面的参数都是层的参数，也是超参，在prototxt中赋值。
axis是默认为1的。

二.加入hpp 文件到include/caffe/layers/ ,

这里写图片描述

主要包含cpp文件的方法和参数的定义。

三.加入cpp文件到src/caffe/layers/

这里写图片描述

#include <vector>#include "caffe/filler.hpp"#include "caffe/layers/center_loss_layer.hpp"#include "caffe/util/math_functions.hpp"namespace caffe {template <typename Dtype>void CenterLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  const int num_output = this->layer_param_.center_loss_param().num_output();    N_ = num_output;  const int axis = bottom[0]->CanonicalAxisIndex( //一个常见的blob数据包括（N，C，H，W）四个维度，可以通过设定axis来选取哪一个维度      this->layer_param_.center_loss_param().axis());//把axis后面的都拉为一个向量，最后是chw变成k维的  // Dimensions starting from "axis" are "flattened" into a single  // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),  // and axis == 1, N inner products with dimension CHW are performed.  K_ = bottom[0]->count(axis);  // Check if we need to set up the weights  if (this->blobs_.size() > 0) {    LOG(INFO) << "Skipping parameter initialization";  } else {    this->blobs_.resize(1);    // Intialize the weight    vector<int> center_shape(2);    center_shape[0] = N_;    center_shape[1] = K_;    this->blobs_[0].reset(new Blob<Dtype>(center_shape));//尺寸设成centershape的大小    // fill the weights    shared_ptr<Filler<Dtype> > center_filler(GetFiller<Dtype>(        this->layer_param_.center_loss_param().center_filler()));// Fillers are random number generators that fills a blob using the specified algorithm. //这个很重要，就是如何初始化，这里是使用prototxt中定义的xavier方法初始化，可以看到        //center_loss_param {      // num_output: 10572     //center_filler {    // type: "xavier"   // }  // }    center_filler->Fill(this->blobs_[0].get());  }  // parameter initialization  this->param_propagate_down_.resize(this->blobs_.size(), true);}template <typename Dtype>void CenterLossLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  CHECK_EQ(bottom[1]->channels(), 1);  CHECK_EQ(bottom[1]->height(), 1);  CHECK_EQ(bottom[1]->width(), 1);  M_ = bottom[0]->num();  // The top shape will be the bottom shape with the flattened axes dropped,  // and replaced by a single axis with dimension num_output (N_).  LossLayer<Dtype>::Reshape(bottom, top);  distance_.ReshapeLike(*bottom[0]);  variation_sum_.ReshapeLike(*this->blobs_[0]);}template <typename Dtype>void CenterLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,    const vector<Blob<Dtype>*>& top) {  const Dtype* bottom_data = bottom[0]->cpu_data();//输入的倒数第二个卷积层的feature  const Dtype* label = bottom[1]->cpu_data();//输入的label  const Dtype* center = this->blobs_[0]->cpu_data();//指针指向，后面会赋值给这个地方  Dtype* distance_data = distance_.mutable_cpu_data();//存储每一类xi-cyi的地方  // the i-th distance_data  for (int i = 0; i < M_; i++) {    const int label_value = static_cast<int>(label[i]);//第i类    // D(i,:) = X(i,:) - C(y(i),:)    caffe_sub(K_, bottom_data + i * K_, center + label_value * K_, distance_data + i * K_);//算好减法，这里这个center很有意思，因为只在前向传播中出现，让人怀疑为什么它存储了center的值。其实是这样，它指向的是blob_[0]中的cpu_data，然后在更新参数时，会自动加上反向传播时计算好的center_diff，即blobs_[0]->mutable_cpu_diff()。 }  Dtype dot = caffe_cpu_dot(M_ * K_, distance_.cpu_data(), distance_.cpu_data());//这是vector的乘积，而非matrix，算好l2范数  Dtype loss = dot / M_ / Dtype(2);//算好前面的项  top[0]->mutable_cpu_data()[0] = loss;//存好loss}template <typename Dtype>void CenterLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,    const vector<bool>& propagate_down,    const vector<Blob<Dtype>*>& bottom) {  // Gradient with respect to centers  if (this->param_propagate_down_[0]) {    const Dtype* label = bottom[1]->cpu_data();//标签    Dtype* center_diff = this->blobs_[0]->mutable_cpu_diff();／／存center更新值的地方。    Dtype* variation_sum_data = variation_sum_.mutable_cpu_data();    const Dtype* distance_data = distance_.cpu_data();／／上面这两个都是存中间的值。    // \sum_{y_i==j}    caffe_set(N_ * K_, (Dtype)0., variation_sum_.mutable_cpu_data());／／设置为0    for (int n = 0; n < N_; n++) {      int count = 0;      for (int m = 0; m < M_; m++) {        const int label_value = static_cast<int>(label[m]);        if (label_value == n) {          count++;          caffe_sub(K_, variation_sum_data + n * K_, distance_data + m * K_, variation_sum_data + n * K_);        }   //distance_data里面存的是前向时存的D(i,:) = X(i,:) - C(y(i),:)      }      caffe_axpy(K_, (Dtype)1./(count + (Dtype)1.), variation_sum_data + n * K_, center_diff + n * K_);前面第二第三个相乘存到最后一个    }  }  // Gradient with respect to bottom data //就是Lc对xi求偏导数  if (propagate_down[0]) {    caffe_copy(M_ * K_, distance_.cpu_data(), bottom[0]->mutable_cpu_diff());    caffe_scal(M_ * K_, top[0]->cpu_diff()[0] / M_, bottom[0]->mutable_cpu_diff());//这里求l2范数的导数 ,bottom[0]->mutable_cpu_diff()只是D(i,:) = X(i,:) - C(y(i),:)。  }  if (propagate_down[1]) {    LOG(FATAL) << this->type()               << " Layer cannot backpropagate to label inputs.";  }}#ifdef CPU_ONLYSTUB_GPU(CenterLossLayer);#endifINSTANTIATE_CLASS(CenterLossLayer);REGISTER_LAYER_CLASS(CenterLoss);}  // namespace caffe

最后附上我随手写的非常潦草的论文阅读笔记，上面一些备注有利于加深对centerloss的理解
这里写图片描述

阅读全文

1 0