faster-rcnn cpu实现
来源:互联网 发布:数据库安全测试工具 编辑:程序博客网 时间:2024/06/07 17:27
py-faster-rcnn-cpu
py-faster-rcnn在测试模型的时候,可以选择使用cpu mode或者gpu mode,但是如果使用该框架训练自己的模型,就只能使用gpu了。应该是作者考虑训练速度的原因,对roi_pooling_layer和smooth_L1_loss_layer只使用和提供了gpu版本的代码.
这两个文件在py-fast-rcnn/caffe-fast-rcnn/src/caffe/layers
。打开这两个文件,可以看到smooth_L1_loss_layer.cpp中forward和backward处都是NOT_IMPLEMENTED
。 所以如果没有一块满足性能的GPU就做不了训练了。
下边是我对这两个文件的修改,实现了CPU版本的函数,如有错误,欢迎指正交流。另外,在我的github上也可以找到这两个文件。使用时,直接替换原文件,重新make即可。
roi_pooling_layer.cpp
// ------------------------------------------------------------------// Fast R-CNN// Copyright (c) 2015 Microsoft// Licensed under The MIT License [see fast-rcnn/LICENSE for details]// Written by Ross Girshick// ------------------------------------------------------------------#include <cfloat>#include "caffe/fast_rcnn_layers.hpp"using std::max;using std::min;using std::floor;using std::ceil;namespace caffe {template <typename Dtype>void ROIPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { ROIPoolingParameter roi_pool_param = this->layer_param_.roi_pooling_param(); CHECK_GT(roi_pool_param.pooled_h(), 0) << "pooled_h must be > 0"; CHECK_GT(roi_pool_param.pooled_w(), 0) << "pooled_w must be > 0"; pooled_height_ = roi_pool_param.pooled_h(); pooled_width_ = roi_pool_param.pooled_w(); spatial_scale_ = roi_pool_param.spatial_scale(); LOG(INFO) << "Spatial scale: " << spatial_scale_;}template <typename Dtype>void ROIPoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { channels_ = bottom[0]->channels(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); top[0]->Reshape(bottom[1]->num(), channels_, pooled_height_, pooled_width_); max_idx_.Reshape(bottom[1]->num(), channels_, pooled_height_, pooled_width_);}template <typename Dtype>void ROIPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); const Dtype* bottom_rois = bottom[1]->cpu_data(); // Number of ROIs int num_rois = bottom[1]->num(); int batch_size = bottom[0]->num(); int top_count = top[0]->count(); Dtype* top_data = top[0]->mutable_cpu_data(); // Init top_data to -∞ caffe_set(top_count, Dtype(-FLT_MAX), top_data); int* argmax_data = max_idx_.mutable_cpu_data(); // Init argmax_data t0 -1 caffe_set(top_count, -1, argmax_data); // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R for (int n = 0; n < num_rois; ++n) { int roi_batch_ind = bottom_rois[0]; int roi_start_w = round(bottom_rois[1] * spatial_scale_); int roi_start_h = round(bottom_rois[2] * spatial_scale_); int roi_end_w = round(bottom_rois[3] * spatial_scale_); int roi_end_h = round(bottom_rois[4] * spatial_scale_); CHECK_GE(roi_batch_ind, 0); CHECK_LT(roi_batch_ind, batch_size); int roi_height = max(roi_end_h - roi_start_h + 1, 1); int roi_width = max(roi_end_w - roi_start_w + 1, 1); const Dtype bin_size_h = static_cast<Dtype>(roi_height) / static_cast<Dtype>(pooled_height_); const Dtype bin_size_w = static_cast<Dtype>(roi_width) / static_cast<Dtype>(pooled_width_); const Dtype* batch_data = bottom_data + bottom[0]->offset(roi_batch_ind); for (int c = 0; c < channels_; ++c) { for (int ph = 0; ph < pooled_height_; ++ph) { for (int pw = 0; pw < pooled_width_; ++pw) { // Compute pooling region for this output unit: // start (included) = floor(ph * roi_height / pooled_height_) // end (excluded) = ceil((ph + 1) * roi_height / pooled_height_) int hstart = static_cast<int>(floor(static_cast<Dtype>(ph) * bin_size_h)); int wstart = static_cast<int>(floor(static_cast<Dtype>(pw) * bin_size_w)); int hend = static_cast<int>(ceil(static_cast<Dtype>(ph + 1) * bin_size_h)); int wend = static_cast<int>(ceil(static_cast<Dtype>(pw + 1) * bin_size_w)); hstart = min(max(hstart + roi_start_h, 0), height_); hend = min(max(hend + roi_start_h, 0), height_); wstart = min(max(wstart + roi_start_w, 0), width_); wend = min(max(wend + roi_start_w, 0), width_); bool is_empty = (hend <= hstart) || (wend <= wstart); const int pool_index = ph * pooled_width_ + pw; if (is_empty) { top_data[pool_index] = 0; argmax_data[pool_index] = -1; continue; } for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { const int index = h * width_ + w; if (batch_data[index] > top_data[pool_index]) { top_data[pool_index] = batch_data[index]; argmax_data[pool_index] = index; } } } } } // Increment all data pointers by one channel batch_data += bottom[0]->offset(0, 1); top_data += top[0]->offset(0, 1); argmax_data += max_idx_.offset(0, 1); } // Increment ROI data pointer bottom_rois += bottom[1]->offset(1); }}template <typename Dtype>void ROIPoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { // NOT_IMPLEMENTED; //*** cpu implementation *** if(!propagate_down[0]){ return; } const Dtype* bottom_rois = bottom[1]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); const int nums = bottom[0]->num(); const int count = bottom[0]->count(); const int batch_size = bottom[0]->num(); caffe_set(count, Dtype(0), bottom_diff); const int* argmax_data = max_idx_.cpu_data(); CHECK_EQ(top[0]->num(),bottom[1]->num())<<"top and bottom num not equal!"; for (int n = 0; n < nums; ++n){ int roi_batch_ind = bottom_rois[0]; CHECK_GE(roi_batch_ind,0); CHECK_LT(roi_batch_ind, batch_size); int roi_start_w = round(bottom_rois[1] * spatial_scale_); int roi_start_h = round(bottom_rois[2] * spatial_scale_); int roi_end_w = round(bottom_rois[3] * spatial_scale_); int roi_end_h = round(bottom_rois[4] * spatial_scale_); int roi_height = max(roi_end_h - roi_start_h + 1, 1); int roi_width = max(roi_end_w - roi_start_w + 1, 1); Dtype bin_size_h = static_cast<Dtype>(roi_height) / static_cast<Dtype>(pooled_height_); Dtype bin_size_w = static_cast<Dtype>(roi_width) / static_cast<Dtype>(pooled_width_); Dtype* batch_bottom_diff = bottom_diff + bottom[0]->offset(roi_batch_ind); for(int c = 0; c < channels_; ++c){ for(int h = 0; h < height_; ++h){ for(int w =0; w< width_; ++w){ // skip if ROI doesn't include (h,w) const bool in_roi = (w >= roi_start_w && w <= roi_end_w && h >= roi_start_h && h <= roi_end_h); if(!in_roi) continue; // output index int index = h * width_ + w;// check if width_ // compute outputs' size, phstart, pwstart, phend, pwend** int phstart = floor(static_cast<Dtype>(h - roi_start_h) / bin_size_h); int phend = ceil(static_cast<Dtype>(h - roi_start_h + 1) / bin_size_h); int pwstart = floor(static_cast<Dtype>(w - roi_start_w) / bin_size_w); int pwend = ceil(static_cast<Dtype>(w - roi_start_w + 1) / bin_size_w); phstart = min(max(phstart, 0), pooled_height_); phend = min(max(phend, 0), pooled_height_); pwstart = min(max(pwstart, 0), pooled_width_); pwend = min(max(pwend, 0), pooled_width_); for(int ph = phstart; ph < phend; ++ph){ for( int pw = pwstart; pw < pwend; ++ pw){ if(argmax_data[ph * pooled_width_ + pw] == (h *width_ + w)){ batch_bottom_diff[index] += top_diff[ph * pooled_width_ + pw]; } } } } } batch_bottom_diff += bottom[0]->offset(0, 1); top_diff += top[0]->offset(0, 1); argmax_data += max_idx_.offset(0, 1); } bottom_rois += bottom[1]->offset(1); } // ***end cpu implementation ***}#ifdef CPU_ONLYSTUB_GPU(ROIPoolingLayer);#endifINSTANTIATE_CLASS(ROIPoolingLayer);REGISTER_LAYER_CLASS(ROIPooling);} // namespace caffe
smooth_L1_loss_layer.cpp
// ------------------------------------------------------------------// Fast R-CNN// Copyright (c) 2015 Microsoft// Licensed under The MIT License [see fast-rcnn/LICENSE for details]// Written by Ross Girshick// ------------------------------------------------------------------#include "caffe/fast_rcnn_layers.hpp"namespace caffe {template <typename Dtype>void SmoothL1LossLayer<Dtype>::LayerSetUp( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { SmoothL1LossParameter loss_param = this->layer_param_.smooth_l1_loss_param(); sigma2_ = loss_param.sigma() * loss_param.sigma(); has_weights_ = (bottom.size() >= 3); if (has_weights_) { CHECK_EQ(bottom.size(), 4) << "If weights are used, must specify both " "inside and outside weights"; }}template <typename Dtype>void SmoothL1LossLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { LossLayer<Dtype>::Reshape(bottom, top); CHECK_EQ(bottom[0]->channels(), bottom[1]->channels()); CHECK_EQ(bottom[0]->height(), bottom[1]->height()); CHECK_EQ(bottom[0]->width(), bottom[1]->width()); if (has_weights_) { CHECK_EQ(bottom[0]->channels(), bottom[2]->channels()); CHECK_EQ(bottom[0]->height(), bottom[2]->height()); CHECK_EQ(bottom[0]->width(), bottom[2]->width()); CHECK_EQ(bottom[0]->channels(), bottom[3]->channels()); CHECK_EQ(bottom[0]->height(), bottom[3]->height()); CHECK_EQ(bottom[0]->width(), bottom[3]->width()); } diff_.Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[0]->height(), bottom[0]->width()); errors_.Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[0]->height(), bottom[0]->width()); // vector of ones used to sum ones_.Reshape(bottom[0]->num(), bottom[0]->channels(), bottom[0]->height(), bottom[0]->width()); for (int i = 0; i < bottom[0]->count(); ++i) { ones_.mutable_cpu_data()[i] = Dtype(1); }}template <typename Dtype>void SmoothL1LossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { //NOT_IMPLEMENTED; // cpu implementation CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1)) << "Inputs must have the same dimension."; int count = bottom[0]->count(); caffe_sub(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), diff_.mutable_cpu_data()); if(has_weights_){ caffe_mul(count, bottom[2]->cpu_data(), diff_.cpu_data(), diff_.mutable_cpu_data()); } // f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma // |x| - 0.5 / sigma / sigma otherwise const Dtype* in = diff_.cpu_data(); Dtype* out = errors_.mutable_cpu_data(); for(int index=0; index<count; ++index){ Dtype val = in[index]; Dtype abs_val = abs(val); if(abs_val < 1.0 / sigma2_){ out[index] = 0.5 * val * val * sigma2_; } else{ out[index] = abs_val - 0.5 / sigma2_; } } if(has_weights_){ caffe_mul(count, bottom[3]->cpu_data(), out, errors_.mutable_cpu_data()); } // compute loss Dtype loss = caffe_cpu_dot(count, ones_.cpu_data(), errors_.cpu_data()); top[0]->mutable_cpu_data()[0] = loss / bottom[0]->num(); // end cpu implementation}template <typename Dtype>void SmoothL1LossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { //NOT_IMPLEMENTED; // cpu implementation int count = diff_.count(); const Dtype* in = diff_.cpu_data(); Dtype* out = diff_.mutable_cpu_data(); for(int index=0; index < count; index++){ Dtype val = in[index]; Dtype abs_val = abs(val); if(abs_val < 1.0 / sigma2_){ out[index] = sigma2_ * val; } else{ out[index] = (Dtype(0) < val) - (val < Dtype(0)); } } for(int i=0; i<2; ++i){ if(propagate_down[i]){ const Dtype sign = (i == 0) ? 1 : -1; const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num(); caffe_cpu_axpby( count, alpha, out,//diff_.cpu_data(), Dtype(0), bottom[i]->mutable_cpu_diff()); if(has_weights_){ caffe_mul( count, bottom[2]->cpu_data(), bottom[i]->cpu_diff(), bottom[i]->mutable_cpu_data()); caffe_mul( count, bottom[3]->cpu_data(), bottom[i]->cpu_diff(), bottom[i]->mutable_cpu_data()); } } } // end cpu implementation}#ifdef CPU_ONLYSTUB_GPU(SmoothL1LossLayer);#endifINSTANTIATE_CLASS(SmoothL1LossLayer);REGISTER_LAYER_CLASS(SmoothL1Loss);} // namespace caffe
0 0
- faster-rcnn cpu实现
- faster rcnn CPU mode
- Faster RCNN CPU安装记录
- Faster RCNN CPU模式下进行训练
- faster-rcnn+ubuntu14.04+matlab2014a+cpu配置
- faster rcnn winodws cpu配置笔记
- windows matlab cpu 测试faster rcnn
- Ubuntu 17.04 py-faster-rcnn only CPU
- Faster-RCNN实现物体检测
- faster rcnn在Ubuntu系统下cpu模式的算法实现
- Faster rcnn
- Faster-rcnn
- Faster rcnn
- Faster-Rcnn
- faster-rcnn
- faster-RCNN
- Faster-RCNN
- Faster-rcnn
- 函数式编程初探
- “领导喊你去办公室”电信诈骗现沪 专骗公务员
- OC 字符串
- Execution failed for task 'app:mergeDebugResources' Crunching Cruncher…png failed
- mac下查看端口使用的程序并强制关闭
- faster-rcnn cpu实现
- Android socket 编程 实现消息推送(二)
- Python中matplotlib pyplot常用的一些使用方法[Python 3.x]
- 计算机视觉
- 实验六课后作业
- Windows Driver Samples源码剖析(一) 综述
- 多线程语句添加
- 安卓的通知组件
- commons-fileupload下载文件