SSD：Single Shot MultiBox Detector代码解读（四）

来源：互联网发布：php方法覆盖和方法重载编辑：程序博客网时间：2024/06/03 16:45
SSD：Single Shot MultiBox Detector代码解读（一）： http://blog.csdn.net/u011956147/article/details/73028773
SSD：Single Shot MultiBox Detector代码解读（二）： http://blog.csdn.net/u011956147/article/details/73030116
SSD：Single Shot MultiBox Detector代码解读（三）： http://blog.csdn.net/u011956147/article/details/73032867
SSD：Single Shot MultiBox Detector代码解读（四）： http://blog.csdn.net/u011956147/article/details/73033170
SSD：Single Shot MultiBox Detector代码解读（五）： http://blog.csdn.net/u011956147/article/details/73033282
这篇博客主要写multibox_loss_layer，multibox_loss_layer也是SSD比较关键内容，主要包括内建了两个layer进行loss回归，还包括比如FindMatches，MineHardExamples，EncodeLocPrediction && EncodeConfPrediction等都是比较重要的函数（其中有一部分在bbox_util中，后面会介绍）
代码：
#include <algorithm>#include <map>#include <utility>#include <vector>#include "caffe/layers/multibox_loss_layer.hpp"#include "caffe/util/math_functions.hpp"namespace caffe {// layer setup，在这个函数里面还分别新建了两个layer用于loc回归和conf loss的计算template <typename Dtype>void MultiBoxLossLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  LossLayer<Dtype>::LayerSetUp(bottom, top);  if (this->layer_param_.propagate_down_size() == 0) {    this->layer_param_.add_propagate_down(true);  // 定位    this->layer_param_.add_propagate_down(true);  // 分类得分    this->layer_param_.add_propagate_down(false); // prior    this->layer_param_.add_propagate_down(false); // ground truth  }  const MultiBoxLossParameter& multibox_loss_param =      this->layer_param_.multibox_loss_param();  multibox_loss_param_ = this->layer_param_.multibox_loss_param();  // 这句话多余吧？  num_ = bottom[0]->num();  // batch size  num_priors_ = bottom[2]->height() / 4; // 先验的个数，每个先验包含左上角和右下角的点坐标  // Get other parameters.  CHECK(multibox_loss_param.has_num_classes()) << "Must provide num_classes.";   num_classes_ = multibox_loss_param.num_classes();    // 类别个数  CHECK_GE(num_classes_, 1) << "num_classes should not be less than 1.";  share_location_ = multibox_loss_param.share_location();  // 共享类别位置预测 default = true  loc_classes_ = share_location_ ? 1 : num_classes_;   // 如果shared表示所有的类别同用一个location prediction，否则每一类各自预测。还不是很懂这样做的原因  background_label_id_ = multibox_loss_param.background_label_id(); // background的id  use_difficult_gt_ = multibox_loss_param.use_difficult_gt();  // 是否使用difficutlt的ground truth，这个具体是什么还有待考虑  mining_type_ = multibox_loss_param.mining_type();  // 这里跟老版SSD代码有些许不同  if (multibox_loss_param.has_do_neg_mining()) {    LOG(WARNING) << "do_neg_mining is deprecated, use mining_type instead.";    do_neg_mining_ = multibox_loss_param.do_neg_mining();  // 难例挖掘 true    CHECK_EQ(do_neg_mining_,             mining_type_ != MultiBoxLossParameter_MiningType_NONE);  // MultiBoxLossParameter_MiningType_NONE变量？还不清楚具体的用法  }  do_neg_mining_ = mining_type_ != MultiBoxLossParameter_MiningType_NONE;  if (!this->layer_param_.loss_param().has_normalization() &&  // loss normalization,出自LossParameter，默认VALID      this->layer_param_.loss_param().has_normalize()) {    normalization_ = this->layer_param_.loss_param().normalize() ?                     LossParameter_NormalizationMode_VALID :                     LossParameter_NormalizationMode_BATCH_SIZE;  } else {    normalization_ = this->layer_param_.loss_param().normalization();  }  if (do_neg_mining_) {    CHECK(share_location_)        << "Currently only support negative mining if share_location is true.";  }  vector<int> loss_shape(1, 1);  // Set up localization loss layer.  // 定位loss  loc_weight_ = multibox_loss_param.loc_weight();   // loc weight 1.0  loc_loss_type_ = multibox_loss_param.loc_loss_type();  // loss 类型  SMOOTH_L1  // fake shape.  vector<int> loc_shape(1, 1);  // 1维  loc_shape.push_back(4);       // 1,4  loc_pred_.Reshape(loc_shape); // 1*2  [1,4]  loc_gt_.Reshape(loc_shape);   // [1,4]  loc_bottom_vec_.push_back(&loc_pred_); // 存放前面的指针  loc_bottom_vec_.push_back(&loc_gt_);   // 存放gt的指针  loc_loss_.Reshape(loss_shape);         // location的loss [1,4]  loc_top_vec_.push_back(&loc_loss_);    // 存放top的指针  if (loc_loss_type_ == MultiBoxLossParameter_LocLossType_L2) {   // 新建一个层，实现对locationloss的计算    LayerParameter layer_param;    layer_param.set_name(this->layer_param_.name() + "_l2_loc");      layer_param.set_type("EuclideanLoss");    layer_param.add_loss_weight(loc_weight_);    loc_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);    loc_loss_layer_->SetUp(loc_bottom_vec_, loc_top_vec_);  } else if (loc_loss_type_ == MultiBoxLossParameter_LocLossType_SMOOTH_L1) {  // SMOOTH_L1,SSD是选这个    LayerParameter layer_param;    layer_param.set_name(this->layer_param_.name() + "_smooth_L1_loc"); // mbox_loss_smooth_L1_loc    layer_param.set_type("SmoothL1Loss");    layer_param.add_loss_weight(loc_weight_);  // 1.0    loc_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);  // 创建layer    loc_loss_layer_->SetUp(loc_bottom_vec_, loc_top_vec_);  //送入推断和gt，输出loc_loss，有一点不太清楚loc_bottom_vec_是两个地址，后面怎么弄？  } else {    LOG(FATAL) << "Unknown localization loss type.";  }  // Set up confidence loss layer.   // 新建一个层，实现的是对confidence loss的计算  conf_loss_type_ = multibox_loss_param.conf_loss_type();  // SOFTMAX  conf_bottom_vec_.push_back(&conf_pred_);  // conf_pred_ 是blob  conf_bottom_vec_.push_back(&conf_gt_);    // conf_gt_ 是blob  conf_loss_.Reshape(loss_shape);           // [1,4]  conf_top_vec_.push_back(&conf_loss_);     // 也是一维向量  if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_SOFTMAX) {    CHECK_GE(background_label_id_, 0)        << "background_label_id should be within [0, num_classes) for Softmax.";    CHECK_LT(background_label_id_, num_classes_)        << "background_label_id should be within [0, num_classes) for Softmax.";    LayerParameter layer_param;    layer_param.set_name(this->layer_param_.name() + "_softmax_conf");  // mbox_loss_softmax_conf    layer_param.set_type("SoftmaxWithLoss");    layer_param.add_loss_weight(Dtype(1.));                             // 1.0    layer_param.mutable_loss_param()->set_normalization(        LossParameter_NormalizationMode_NONE);      SoftmaxParameter* softmax_param = layer_param.mutable_softmax_param();    softmax_param->set_axis(1);    // Fake reshape.    vector<int> conf_shape(1, 1);    conf_gt_.Reshape(conf_shape);        // [1]    conf_shape.push_back(num_classes_);  // 这两个参数没有用到    conf_pred_.Reshape(conf_shape);    conf_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);    conf_loss_layer_->SetUp(conf_bottom_vec_, conf_top_vec_);  } else if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_LOGISTIC) {    LayerParameter layer_param;    layer_param.set_name(this->layer_param_.name() + "_logistic_conf");    layer_param.set_type("SigmoidCrossEntropyLoss");    layer_param.add_loss_weight(Dtype(1.));    // Fake reshape.    vector<int> conf_shape(1, 1);    conf_shape.push_back(num_classes_);    conf_gt_.Reshape(conf_shape);    conf_pred_.Reshape(conf_shape);    conf_loss_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);    conf_loss_layer_->SetUp(conf_bottom_vec_, conf_top_vec_);  } else {    LOG(FATAL) << "Unknown confidence loss type.";  }}  // layer setup 结束template <typename Dtype>void MultiBoxLossLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  LossLayer<Dtype>::Reshape(bottom, top);  num_ = bottom[0]->num();                   // batch num  num_priors_ = bottom[2]->height() / 4;     // 这里的blob维度还需要再仔细分析下  num_gt_ = bottom[3]->height();  CHECK_EQ(bottom[0]->num(), bottom[1]->num());  CHECK_EQ(num_priors_ * loc_classes_ * 4, bottom[0]->channels())  // loc_classes_共享是1，不共享就是classes数      << "Number of priors must match number of location predictions.";  CHECK_EQ(num_priors_ * num_classes_, bottom[1]->channels())      << "Number of priors must match number of confidence predictions.";}// 预测loction bottom[0] dimension is [N*C*1*1],confidence bottom[1] dimension is [N*C*1*1]// priors bottom[2] dimension is [N*1*2*W], gound truth bottom[3] dimension is [N*1*H*8]template <typename Dtype>void MultiBoxLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,    const vector<Blob<Dtype>*>& top) {  const Dtype* loc_data = bottom[0]->cpu_data();  const Dtype* conf_data = bottom[1]->cpu_data();  const Dtype* prior_data = bottom[2]->cpu_data();  const Dtype* gt_data = bottom[3]->cpu_data();  // Retrieve all ground truth.  /*  message NormalizedBBox {  optional float xmin = 1;  optional float ymin = 2;  optional float xmax = 3;  optional float ymax = 4;  optional int32 label = 5;  optional bool difficult = 6;  optional float score = 7;  optional float size = 8;  }  */  // Retrieve all ground truth.  map<int, vector<NormalizedBBox> > all_gt_bboxes; //转化ground truth bounding box，存放在all_gt_bboxes  GetGroundTruth(gt_data, num_gt_, background_label_id_, use_difficult_gt_,  // background_label_id_=0,use_difficult_gt_=true                 &all_gt_bboxes);    // Retrieve all prior bboxes. It is same within a batch since we assume all  // images in a batch are of same dimension.  // 把prior box 存入prior_bboxes，把variances存入prior_variances  vector<NormalizedBBox> prior_bboxes;  vector<vector<float> > prior_variances;  GetPriorBBoxes(prior_data, num_priors_, &prior_bboxes, &prior_variances);    // Retrieve all predictions.  vector<LabelBBox> all_loc_preds;   // map<int, vector<NormalizedBBox> > LabelBBox;  GetLocPredictions(loc_data, num_, num_priors_, loc_classes_, share_location_,                    &all_loc_preds);  // 这里是把所有预测的box写入了all_loc_preds，这些box就是bottom[0]，loc_data  // Find matches between source bboxes and ground truth bboxes.  vector<map<int, vector<float> > > all_match_overlaps;  FindMatches(all_loc_preds, all_gt_bboxes, prior_bboxes, prior_variances,              multibox_loss_param_, &all_match_overlaps, &all_match_indices_);  num_matches_ = 0;  int num_negs = 0;  // Sample hard negative (and positive) examples based on mining type.  MineHardExamples(*bottom[1], all_loc_preds, all_gt_bboxes, prior_bboxes,                   prior_variances, all_match_overlaps, multibox_loss_param_,                   &num_matches_, &num_negs, &all_match_indices_,                   &all_neg_indices_);  if (num_matches_ >= 1) {    // Form data to pass on to loc_loss_layer_.    vector<int> loc_shape(2);    loc_shape[0] = 1;    loc_shape[1] = num_matches_ * 4;    loc_pred_.Reshape(loc_shape);  // 地址已经存放进了loc_bottom_vec_    loc_gt_.Reshape(loc_shape);        Dtype* loc_pred_data = loc_pred_.mutable_cpu_data();    Dtype* loc_gt_data = loc_gt_.mutable_cpu_data();    EncodeLocPrediction(all_loc_preds, all_gt_bboxes, all_match_indices_,                        prior_bboxes, prior_variances, multibox_loss_param_,                        loc_pred_data, loc_gt_data);    loc_loss_layer_->Reshape(loc_bottom_vec_, loc_top_vec_);    loc_loss_layer_->Forward(loc_bottom_vec_, loc_top_vec_);   // 前向计算  } else {    loc_loss_.mutable_cpu_data()[0] = 0;  }   // 这里完成loc的loss前向计算  // Form data to pass on to conf_loss_layer_.  if (do_neg_mining_) {  // 计算positive和negative样本    num_conf_ = num_matches_ + num_negs;  } else {    num_conf_ = num_ * num_priors_;  }  if (num_conf_ >= 1) {    // Reshape the confidence data.    vector<int> conf_shape;    if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_SOFTMAX) {  // 选softmax      conf_shape.push_back(num_conf_);      conf_gt_.Reshape(conf_shape);      conf_shape.push_back(num_classes_);      conf_pred_.Reshape(conf_shape);    } else if (conf_loss_type_ == MultiBoxLossParameter_ConfLossType_LOGISTIC) {      conf_shape.push_back(1);      conf_shape.push_back(num_conf_);      conf_shape.push_back(num_classes_);      conf_gt_.Reshape(conf_shape);      conf_pred_.Reshape(conf_shape);    } else {      LOG(FATAL) << "Unknown confidence loss type.";    }    if (!do_neg_mining_) {      // Consider all scores.      // Share data and diff with bottom[1].      CHECK_EQ(conf_pred_.count(), bottom[1]->count());      conf_pred_.ShareData(*(bottom[1]));    }    Dtype* conf_pred_data = conf_pred_.mutable_cpu_data();    Dtype* conf_gt_data = conf_gt_.mutable_cpu_data();    caffe_set(conf_gt_.count(), Dtype(background_label_id_), conf_gt_data);    EncodeConfPrediction(conf_data, num_, num_priors_, multibox_loss_param_,                         all_match_indices_, all_neg_indices_, all_gt_bboxes,                         conf_pred_data, conf_gt_data);    conf_loss_layer_->Reshape(conf_bottom_vec_, conf_top_vec_);    conf_loss_layer_->Forward(conf_bottom_vec_, conf_top_vec_);  } else {    conf_loss_.mutable_cpu_data()[0] = 0;  }  // 这里结束conf的loss计算  top[0]->mutable_cpu_data()[0] = 0;  if (this->layer_param_.propagate_down(0)) {  // true 正则化一下 loc_loss    Dtype normalizer = LossLayer<Dtype>::GetNormalizer(        normalization_, num_, num_priors_, num_matches_);    top[0]->mutable_cpu_data()[0] +=        loc_weight_ * loc_loss_.cpu_data()[0] / normalizer;  }  if (this->layer_param_.propagate_down(1)) {  // true   conf_loss    Dtype normalizer = LossLayer<Dtype>::GetNormalizer(        normalization_, num_, num_priors_, num_matches_);    top[0]->mutable_cpu_data()[0] += conf_loss_.cpu_data()[0] / normalizer;  }} // 结束Forward计算template <typename Dtype>void MultiBoxLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,    const vector<bool>& propagate_down,    const vector<Blob<Dtype>*>& bottom) {  if (propagate_down[2]) {       LOG(FATAL) << this->type()        << " Layer cannot backpropagate to prior inputs.";  }  if (propagate_down[3]) {    LOG(FATAL) << this->type()        << " Layer cannot backpropagate to label inputs.";  }  // Back propagate on location prediction.  if (propagate_down[0]) {   // 先回传 loc_loss    Dtype* loc_bottom_diff = bottom[0]->mutable_cpu_diff();    caffe_set(bottom[0]->count(), Dtype(0), loc_bottom_diff);    if (num_matches_ >= 1) {      vector<bool> loc_propagate_down;      // Only back propagate on prediction, not ground truth.      loc_propagate_down.push_back(true);      loc_propagate_down.push_back(false);      loc_loss_layer_->Backward(loc_top_vec_, loc_propagate_down,                                loc_bottom_vec_);      // Scale gradient.      Dtype normalizer = LossLayer<Dtype>::GetNormalizer(          normalization_, num_, num_priors_, num_matches_);      Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer;      caffe_scal(loc_pred_.count(), loss_weight, loc_pred_.mutable_cpu_diff());      // Copy gradient back to bottom[0].      const Dtype* loc_pred_diff = loc_pred_.cpu_diff();      int count = 0;      for (int i = 0; i < num_; ++i) {        for (map<int, vector<int> >::iterator it =             all_match_indices_[i].begin();             it != all_match_indices_[i].end(); ++it) {          const int label = share_location_ ? 0 : it->first;          const vector<int>& match_index = it->second;          for (int j = 0; j < match_index.size(); ++j) {            if (match_index[j] <= -1) {              continue;            }            // Copy the diff to the right place.            int start_idx = loc_classes_ * 4 * j + label * 4;            caffe_copy<Dtype>(4, loc_pred_diff + count * 4,                              loc_bottom_diff + start_idx);            ++count;          }        }        loc_bottom_diff += bottom[0]->offset(1);      }    }  }  // Back propagate on confidence prediction.  if (propagate_down[1]) {    Dtype* conf_bottom_diff = bottom[1]->mutable_cpu_diff();    caffe_set(bottom[1]->count(), Dtype(0), conf_bottom_diff);    if (num_conf_ >= 1) {      vector<bool> conf_propagate_down;      // Only back propagate on prediction, not ground truth.      conf_propagate_down.push_back(true);      conf_propagate_down.push_back(false);      conf_loss_layer_->Backward(conf_top_vec_, conf_propagate_down,                                 conf_bottom_vec_);      // Scale gradient.      Dtype normalizer = LossLayer<Dtype>::GetNormalizer(          normalization_, num_, num_priors_, num_matches_);      Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer;      caffe_scal(conf_pred_.count(), loss_weight,                 conf_pred_.mutable_cpu_diff());      // Copy gradient back to bottom[1].      const Dtype* conf_pred_diff = conf_pred_.cpu_diff();      if (do_neg_mining_) {        int count = 0;        for (int i = 0; i < num_; ++i) {          // Copy matched (positive) bboxes scores' diff.          const map<int, vector<int> >& match_indices = all_match_indices_[i];          for (map<int, vector<int> >::const_iterator it =               match_indices.begin(); it != match_indices.end(); ++it) {            const vector<int>& match_index = it->second;            CHECK_EQ(match_index.size(), num_priors_);            for (int j = 0; j < num_priors_; ++j) {              if (match_index[j] <= -1) {                continue;              }              // Copy the diff to the right place.              caffe_copy<Dtype>(num_classes_,                                conf_pred_diff + count * num_classes_,                                conf_bottom_diff + j * num_classes_);              ++count;            }          }          // Copy negative bboxes scores' diff.          for (int n = 0; n < all_neg_indices_[i].size(); ++n) {            int j = all_neg_indices_[i][n];            CHECK_LT(j, num_priors_);            caffe_copy<Dtype>(num_classes_,                              conf_pred_diff + count * num_classes_,                              conf_bottom_diff + j * num_classes_);            ++count;          }          conf_bottom_diff += bottom[1]->offset(1);        }      } else {        // The diff is already computed and stored.        bottom[1]->ShareDiff(conf_pred_);      }    }  }  // After backward, remove match statistics.  all_match_indices_.clear();  all_neg_indices_.clear();}INSTANTIATE_CLASS(MultiBoxLossLayer);REGISTER_LAYER_CLASS(MultiBoxLoss);}  // namespace caffe
本文链接：http://blog.csdn.net/u011956147/article/details/73033170
阅读全文
0 0