R-FCN源代码解读
来源:互联网 发布:斯太尔wt数据俯仰角 编辑:程序博客网 时间:2024/05/22 15:32
本文针对目标检测算法R-FCN源代码中psroi_pooling_layer.cu的forward过程进行详细解读,具体示意图理解参考的是http://blog.csdn.net/shadow_guo/article/details/51767036,下面给出代码解读:
// ------------------------------------------------------------------// R-FCN// Copyright (c) 2016 Microsoft// Licensed under The MIT License [see r-fcn/LICENSE for details]// Written by Yi Li// ------------------------------------------------------------------#include <cfloat>#include "caffe/rfcn_layers.hpp"#include "caffe/util/gpu_util.cuh"using std::max;using std::min;namespace caffe { template <typename Dtype> __global__ void PSROIPoolingForward( const int nthreads, const Dtype* bottom_data, //输入的feature map的像素值 const Dtype spatial_scale, //由之前所有卷积层的strides决定,此处为16 const int channels, //feature map的channels=k*k*(C+1) const int height, //feature map的宽度 const int width, //feature map的高度 const int pooled_height, //==k=7 const int pooled_width, //==k=7 const Dtype* bottom_rois, //输入的roi,包括[batch_ind,x1,y1,x2,y2] const int output_dim, //输出score map的dim,psroipooled_cls_rois时为21,psroipooled_loc_rois时为8 const int group_size, //==k=7 Dtype* top_data, //socre map的输出 int* mapping_channel) { //使用CUDA多线程计算 CUDA_KERNEL_LOOP(index, nthreads) { //index为最终score map上所有,共有(C+1)*k*k个值 // The output is in order (n, ctop, ph, pw),类似于图像的BIL逐行扫描 int pw = index % pooled_width; //score map上第i=[0,k-1]列 int ph = (index / pooled_width) % pooled_height; //score map上第j=[0,k-1]行 int ctop = (index / pooled_width / pooled_height) % output_dim; //score map上第ctop个层(class) int n = index / pooled_width / pooled_height / output_dim; //第n个roi // [start, end) interval for spatial sampling bottom_rois += n * 5; //bottom_rois每次移动5 int roi_batch_ind = bottom_rois[0]; //bottom_rois第0个位置存放的是batch_ind //此处计算的是roi在feature_map上的坐标范围 Dtype roi_start_w = static_cast<Dtype>(round(bottom_rois[1])) * spatial_scale; Dtype roi_start_h = static_cast<Dtype>(round(bottom_rois[2])) * spatial_scale; Dtype roi_end_w = static_cast<Dtype>(round(bottom_rois[3]) + 1.) * spatial_scale; Dtype roi_end_h = static_cast<Dtype>(round(bottom_rois[4]) + 1.) * spatial_scale; // Force too small ROIs to be 1x1 Dtype roi_width = max(roi_end_w - roi_start_w, 0.1); //avoid 0 Dtype roi_height = max(roi_end_h - roi_start_h, 0.1); // Compute w and h at bottom //计算bin桶的大小 Dtype bin_size_h = roi_height / static_cast<Dtype>(pooled_height); Dtype bin_size_w = roi_width / static_cast<Dtype>(pooled_width); //计算第(i,j)个bin桶在feature map上的坐标范围 int hstart = floor(static_cast<Dtype>(ph) * bin_size_h + roi_start_h); int wstart = floor(static_cast<Dtype>(pw)* bin_size_w + roi_start_w); int hend = ceil(static_cast<Dtype>(ph + 1) * bin_size_h + roi_start_h); int wend = ceil(static_cast<Dtype>(pw + 1) * bin_size_w + roi_start_w); // Add roi offsets and clip to input boundaries // 超出feature map尺寸的范围舍弃 hstart = min(max(hstart, 0), height); hend = min(max(hend, 0), height); wstart = min(max(wstart, 0),width); wend = min(max(wend, 0), width); bool is_empty = (hend <= hstart) || (wend <= wstart); int gw = pw; int gh = ph; //ctop*group_size*group_size+gh*gh*group_size+gw,计算得到的是第ctop类的(ph,pw)位置索引 //例如,score map上第ctop=1类的第(i,j)=(1,1)位置,c=1*49+1*7+1,对于feature map上第c个颜色层中(实际包含C=21层)的第2(ctop+1)层 int c = (ctop*group_size + gh)*group_size + gw; //每次只计算一层的avg pooling得分 //第roi_batch_ind个roi的时候,bottom_data需要移动roi_batch_ind*channels层(每层需移动height * width) //score map上第(i,j,class)=(ph,pw,ctop)位置(索引为c),移动c层 //bottom_data为数据指针,此处是在移动指针,而不是代数求和 bottom_data += (roi_batch_ind * channels + c) * height * width; Dtype out_sum = 0; for (int h = hstart; h < hend; ++h){ for (int w = wstart; w < wend; ++w){ int bottom_index = h*width + w; out_sum += bottom_data[bottom_index]; } } Dtype bin_area = (hend - hstart)*(wend - wstart); //计算第(i,j)个bin桶在feature map上的面积 top_data[index] = is_empty? 0. : out_sum/bin_area; //如果第(i,j)个bin桶宽高不合乎逻辑,则输出为0,否则为平均池化值 mapping_channel[index] = c; //记录第index次迭代计算socre map上索引位置 } } template <typename Dtype> void PSROIPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->gpu_data(); const Dtype* bottom_rois = bottom[1]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); int* mapping_channel_ptr = mapping_channel_.mutable_gpu_data(); int count = top[0]->count(); caffe_gpu_set(count, Dtype(0), top_data); caffe_gpu_set(count, -1, mapping_channel_ptr); // NOLINT_NEXT_LINE(whitespace/operators) PSROIPoolingForward<Dtype> << <CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS >> >( count, bottom_data, spatial_scale_, channels_, height_, width_, pooled_height_, pooled_width_, bottom_rois, output_dim_, group_size_, top_data, mapping_channel_ptr); //调研上面的PSROIPoolingForward函数,传入线程数量及其他需要的数据 CUDA_POST_KERNEL_CHECK; }}
好几年没写C++了,CUDA也没研究过,所以有些解读只是yy一下,望各位大虾勿喷~
0 0
- R-FCN源代码解读
- R-FCN解读
- r-fcn
- R-FCN
- R-FCN
- R-FCN
- R-FCN
- R-FCN:基于区域的全卷积网络来检测物体 论文解读
- 我读R-FCN
- R-FCN网络
- R-FCN阅读笔记
- r-fcn论文
- R-FCN论文翻译
- 代码阅读:R-FCN
- 【转】R-FCN
- Windows下配置R-FCN
- Windows下配置R-FCN
- py-R-FCN源码分析
- js定时器的简单使用
- 高精度乘法
- for,foreach,iterator的用法和区别
- java 多线程 死锁 哲学家就餐问题
- Python调用Java--Jpype_demo
- R-FCN源代码解读
- 一些CSS的奇淫技巧
- 有点意思
- 分享两个BPM配置小技巧
- 用Python编写MapReduce代码与调用-某一天之前的所有活跃用户统计(1)
- mysql中触发器性能问题
- Golang 接收参数和返回参数的问题
- 使用ViewDragHelper带你一步步实现仿照qq的左滑删除事件
- laravel excel 导入excel