R-FCN源代码解读

来源：互联网发布：斯太尔wt数据俯仰角编辑：程序博客网时间：2024/05/22 15:32
本文针对目标检测算法R-FCN源代码中psroi_pooling_layer.cu的forward过程进行详细解读，具体示意图理解参考的是http://blog.csdn.net/shadow_guo/article/details/51767036，下面给出代码解读：
// ------------------------------------------------------------------// R-FCN// Copyright (c) 2016 Microsoft// Licensed under The MIT License [see r-fcn/LICENSE for details]// Written by Yi Li// ------------------------------------------------------------------#include <cfloat>#include "caffe/rfcn_layers.hpp"#include "caffe/util/gpu_util.cuh"using std::max;using std::min;namespace caffe {  template <typename Dtype>  __global__ void PSROIPoolingForward(    const int nthreads,    const Dtype* bottom_data,  //输入的feature map的像素值    const Dtype spatial_scale, //由之前所有卷积层的strides决定，此处为16    const int channels,  //feature map的channels=k*k*(C+1)    const int height,   //feature map的宽度    const int width,  //feature map的高度    const int pooled_height,  //==k=7    const int pooled_width,   //==k=7    const Dtype* bottom_rois,  //输入的roi，包括[batch_ind,x1,y1,x2,y2]    const int output_dim,  //输出score map的dim,psroipooled_cls_rois时为21，psroipooled_loc_rois时为8    const int group_size,   //==k=7    Dtype* top_data, //socre map的输出    int* mapping_channel) {    //使用CUDA多线程计算    CUDA_KERNEL_LOOP(index, nthreads) {   //index为最终score map上所有，共有(C+1)*k*k个值      // The output is in order (n, ctop, ph, pw)，类似于图像的BIL逐行扫描      int pw = index % pooled_width;   //score map上第i=[0,k-1]列      int ph = (index / pooled_width) % pooled_height;   //score map上第j=[0,k-1]行      int ctop = (index / pooled_width / pooled_height) % output_dim;   //score map上第ctop个层(class)      int n = index / pooled_width / pooled_height / output_dim;   //第n个roi      // [start, end) interval for spatial sampling      bottom_rois += n * 5;    //bottom_rois每次移动5      int roi_batch_ind = bottom_rois[0];   //bottom_rois第0个位置存放的是batch_ind      //此处计算的是roi在feature_map上的坐标范围      Dtype roi_start_w = static_cast<Dtype>(round(bottom_rois[1])) * spatial_scale;      Dtype roi_start_h = static_cast<Dtype>(round(bottom_rois[2])) * spatial_scale;      Dtype roi_end_w = static_cast<Dtype>(round(bottom_rois[3]) + 1.) * spatial_scale;      Dtype roi_end_h = static_cast<Dtype>(round(bottom_rois[4]) + 1.) * spatial_scale;      // Force too small ROIs to be 1x1      Dtype roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0      Dtype roi_height = max(roi_end_h - roi_start_h, 0.1);      // Compute w and h at bottom       //计算bin桶的大小      Dtype bin_size_h = roi_height / static_cast<Dtype>(pooled_height);      Dtype bin_size_w = roi_width / static_cast<Dtype>(pooled_width);      //计算第(i,j)个bin桶在feature map上的坐标范围      int hstart = floor(static_cast<Dtype>(ph) * bin_size_h                          + roi_start_h);      int wstart = floor(static_cast<Dtype>(pw)* bin_size_w                          + roi_start_w);      int hend = ceil(static_cast<Dtype>(ph + 1) * bin_size_h                        + roi_start_h);      int wend = ceil(static_cast<Dtype>(pw + 1) * bin_size_w                        + roi_start_w);      // Add roi offsets and clip to input boundaries      // 超出feature map尺寸的范围舍弃      hstart = min(max(hstart, 0), height);      hend = min(max(hend, 0), height);      wstart = min(max(wstart, 0),width);      wend = min(max(wend, 0), width);      bool is_empty = (hend <= hstart) || (wend <= wstart);      int gw = pw;      int gh = ph;      //ctop*group_size*group_size+gh*gh*group_size+gw，计算得到的是第ctop类的(ph,pw)位置索引      //例如，score map上第ctop=1类的第(i,j)=(1,1)位置，c=1*49+1*7+1，对于feature map上第c个颜色层中(实际包含C=21层)的第2(ctop+1)层      int c = (ctop*group_size + gh)*group_size + gw;        //每次只计算一层的avg pooling得分      //第roi_batch_ind个roi的时候，bottom_data需要移动roi_batch_ind*channels层(每层需移动height * width)      //score map上第(i,j,class)=(ph,pw,ctop)位置（索引为c），移动c层      //bottom_data为数据指针，此处是在移动指针，而不是代数求和      bottom_data += (roi_batch_ind * channels + c) * height * width;         Dtype out_sum = 0;      for (int h = hstart; h < hend; ++h){        for (int w = wstart; w < wend; ++w){          int bottom_index = h*width + w;          out_sum += bottom_data[bottom_index];        }      }      Dtype bin_area = (hend - hstart)*(wend - wstart);        //计算第(i,j)个bin桶在feature map上的面积      top_data[index] = is_empty? 0. : out_sum/bin_area;         //如果第(i,j)个bin桶宽高不合乎逻辑，则输出为0，否则为平均池化值      mapping_channel[index] = c;      //记录第index次迭代计算socre map上索引位置    }  }  template <typename Dtype>  void PSROIPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,    const vector<Blob<Dtype>*>& top) {    const Dtype* bottom_data = bottom[0]->gpu_data();    const Dtype* bottom_rois = bottom[1]->gpu_data();    Dtype* top_data = top[0]->mutable_gpu_data();    int* mapping_channel_ptr = mapping_channel_.mutable_gpu_data();    int count = top[0]->count();    caffe_gpu_set(count, Dtype(0), top_data);    caffe_gpu_set(count, -1, mapping_channel_ptr);    // NOLINT_NEXT_LINE(whitespace/operators)    PSROIPoolingForward<Dtype> << <CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS >> >(      count, bottom_data, spatial_scale_, channels_, height_, width_, pooled_height_,      pooled_width_, bottom_rois, output_dim_, group_size_, top_data, mapping_channel_ptr);    //调研上面的PSROIPoolingForward函数，传入线程数量及其他需要的数据    CUDA_POST_KERNEL_CHECK;  }}
好几年没写C++了，CUDA也没研究过，所以有些解读只是yy一下，望各位大虾勿喷~
0 0