SSD MyDetect记录

来源:互联网 发布:大麦盒子安装软件 编辑:程序博客网 时间:2024/06/01 10:49

最近学习了SSD,想自己实现SSD后面检测的过程(虽然SSD已经有提供代码,但是自己练练手)。

SSD怎么由先验框得到bounding box的框:

//prior_data_mean表示的是先验框的位置的均值,prior_data_std表示先验框的位置的方差,loc_pre表示预测    prior_w = prior_data_mean[i*4+2] - prior_data_mean[0+i*4];//先验的宽    prior_h = prior_data_mean[i*4+3] - prior_data_mean[1+i*4];//先验的高    prior_x = (prior_data_mean[i*4+2] + prior_data_mean[0+i*4])/2;//先验的中心位置x坐标    prior_y = (prior_data_mean[i*4+1] + prior_data_mean[3+i*4])/2;//先验的中心位置y坐标    bb_x = prior_data_std[i*4+0]*loc_pre[i*4+0]*prior_w + prior_x;    bb_y = prior_data_std[i*4+1]*loc_pre[i*4+1]*prior_h + prior_y;    bb_w = exp(prior_data_std[i*4+2]*loc_pre[i*4+2])*prior_w;    bb_h = exp(prior_data_std[i*4+3]*loc_pre[i*4+3])*prior_h;

自己添加的层MyDetect(功能是对于输入进来的prior bounding box, location predict, confidence predict进行处理,输出top[0]维度是1×1×num×6(num表示的是输出目标框的个数,这个是层进行forward的时候回进行调整的,后面维度6分别表示一个框的xmin,ymin,xmax,ymax,class,confidence)

#ifndef CAFFE_MY_DETECT_LAYER_HPP_#define CAFFE_MY_DETECT_LAYER_HPP_#include "caffe/blob.hpp"#include "caffe/layer.hpp"#include "caffe/proto/caffe.pb.h"#include <vector>#include <string>namespace caffe{template <typename Dtype>class MyDetectLayer : public Layer<Dtype>{public:    explicit MyDetectLayer(const LayerParameter& param)        :Layer<Dtype>(param){}    virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,            const vector<Blob<Dtype>*>& top);    virtual void Reshape(const vector<Blob<Dtype>*>& bottom,            const vector<Blob<Dtype>*>& top);    virtual inline const char* type() const { return "MyDetect";}    virtual inline int ExactNumBottomBlobs() const { return 3; }    virtual inline int ExactNumTopBlobs() const { return 1; }protected:    virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,            const vector<Blob<Dtype>*>& top);    virtual void Backward_cpu(const vector<Blob<Dtype>*>& bottom,            const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& top);    int prior_num;};}#endif#include "caffe/layers/mydetect.hpp"#include <vector>namespace caffe{template<typename Dtype>Dtype lap(Dtype x1_min,Dtype x1_max,Dtype x2_min,Dtype x2_max){    if(x1_min < x2_min){        if(x1_max < x2_min){            return 0;        }else{            if(x1_max > x2_min){                if(x1_max < x2_max){                    return x1_max - x2_min;                }else{                    return x2_max - x2_min;                }            }else{                return 0;            }        }    }else{        if(x1_min < x2_max){            if(x1_max < x2_max)                return x1_max-x1_min;            else                return x2_max-x1_min;        }else{            return 0;        }    }}template<typename Dtype>Dtype jaccard(vector<Dtype> x1, vector<Dtype>x2){    Dtype lap1 = lap<Dtype>(x1[0],x1[2],x2[0],x2[2]);    Dtype lap2 = lap<Dtype>(x1[1],x1[3],x2[1],x2[3]);    if(lap1 < Dtype(0.00000001) || lap2 < Dtype(0.00000001))        return Dtype(0);    else        return lap1*lap2/((x1[2]-x1[0])*(x1[3]-x1[1])+(x2[2]-x2[0])*(x2[3]-x2[1])-lap1*lap2);}template<typename Dtype>void MyDetectLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,        const vector<Blob<Dtype>*>& top){    CHECK_EQ(bottom.size(), 3);    CHECK_EQ(top.size(), 1);    prior_num = bottom[2]->height()/4;    CHECK_EQ(prior_num*4, bottom[0]->channels())        << "Number of priors must match number of location predictions.";    CHECK_EQ(prior_num*21, bottom[1]->channels())    << "Number of priors must match number of confidence predictions.";}template<typename Dtype>void MyDetectLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,        const vector<Blob<Dtype>*>& top){    vector<int> shape;    shape.push_back(1);    shape.push_back(1);    shape.push_back(1);    shape.push_back(6);    top[0]->Reshape(shape);}template<typename Dtype>void MyDetectLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,        const vector<Blob<Dtype>*>& top){    std::vector<std::pair<std::pair<int,int>,Dtype> > idx_class_conf;    const Dtype* conf_data = bottom[1]->cpu_data();    const Dtype* prior_data_mean = bottom[2]->cpu_data();    const Dtype* prior_data_std = bottom[2]->cpu_data()+prior_num*4;    const Dtype* loc_pre = bottom[0]->cpu_data();    for(int prior_idx = 0; prior_idx < prior_num; ++prior_idx){        int idx = prior_idx*21;        Dtype max = 0;        int max_idx = -1;        for(int class_idx = 1; class_idx < 21; ++class_idx){//class_idx = 0 is background            if(conf_data[idx+class_idx] > max){                max = conf_data[idx+class_idx];                max_idx = class_idx;            }        }        if(max > 0.5){//threshold            idx_class_conf.push_back(make_pair(make_pair(prior_idx,max_idx),conf_data[idx+max_idx]));        }    }    vector<vector<Dtype> > bboxes;    for(int i = 0; i < prior_num; ++i){        vector<Dtype> temp;        Dtype prior_x,prior_y,prior_w,prior_h;        prior_w = prior_data_mean[i*4+2] - prior_data_mean[0+i*4];        prior_h = prior_data_mean[i*4+3] - prior_data_mean[1+i*4];        prior_x = (prior_data_mean[i*4+2] + prior_data_mean[0+i*4])/2;        prior_y = (prior_data_mean[i*4+1] + prior_data_mean[3+i*4])/2;        Dtype bb_x,bb_y,bb_w,bb_h;        bb_x = prior_data_std[i*4+0]*loc_pre[i*4+0]*prior_w + prior_x;        bb_y = prior_data_std[i*4+1]*loc_pre[i*4+1]*prior_h + prior_y;        bb_w = exp(prior_data_std[i*4+2]*loc_pre[i*4+2])*prior_w;        bb_h = exp(prior_data_std[i*4+3]*loc_pre[i*4+3])*prior_h;        temp.push_back(bb_x-bb_w/2);        temp.push_back(bb_y-bb_h/2);        temp.push_back(bb_x+bb_w/2);        temp.push_back(bb_y+bb_h/2);        temp[0] = temp[0] < 0 ? 0:temp[0];        temp[1] = temp[1] < 0 ? 0:temp[1];        temp[2] = temp[2] < 0 ? 0:temp[2];        temp[3] = temp[3] < 0 ? 0:temp[3];        temp[0] = temp[0] > 1 ? 1:temp[0];        temp[1] = temp[1] > 1 ? 1:temp[1];        temp[2] = temp[2] > 1 ? 1:temp[2];        temp[3] = temp[3] > 1 ? 1:temp[3];        bboxes.push_back(temp);    }    for(int i = 0; i < idx_class_conf.size(); ++i){        for(int j = idx_class_conf.size()-1; j > i; --j){            if(idx_class_conf[i].first.second == idx_class_conf[j].first.second){                //如果iou大于0.5,并且confidence小的删除,如果是第i个小,并且要break,不用进行后面的比较,--i,否这不用。                if(jaccard(bboxes[idx_class_conf[i].first.first],bboxes[idx_class_conf[j].first.first]) > 0.5){                    if(idx_class_conf[i].second < idx_class_conf[j].second){                        idx_class_conf.erase(idx_class_conf.begin()+i);                        --i;                        break;                    }else{                        idx_class_conf.erase(idx_class_conf.begin()+j);                    }                }            }        }    }    vector<int> top_shape;    top_shape.push_back(1);    top_shape.push_back(1);    top_shape.push_back(idx_class_conf.size());    top_shape.push_back(6);    top[0]->Reshape(top_shape);    Dtype* top_data = top[0]->mutable_cpu_data();    int top_idx = 0;    for(int i = 0 ; i < idx_class_conf.size(); ++i){        vector<Dtype> bbox = bboxes[idx_class_conf[i].first.first];        top_data[top_idx++] = bbox[0];        top_data[top_idx++] = bbox[1];        top_data[top_idx++] = bbox[2];        top_data[top_idx++] = bbox[3];        top_data[top_idx++] = idx_class_conf[i].first.second;        top_data[top_idx++] = idx_class_conf[i].second;    }}template<typename Dtype>void MyDetectLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& bottom,        const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& top){    NOT_IMPLEMENTED;}template float jaccard(vector<float> x1, vector<float> x2);template double jaccard(vector<double> x1, vector<double> x2);template double lap(double x1_min, double x1_max, double x2_min, double x2_max);template float lap(float x1_min, float x1_max, float x2_min, float x2_max);#ifdef CPU_ONLYSTUB_GPU_FORWARD(MyDetect, Forward);#endifINSTANTIATE_CLASS(MyDetectLayer);REGISTER_LAYER_CLASS(MyDetect);}

检测与画框程序

#include <string>#include <vector>#include "boost/algorithm/string.hpp"#include "google/protobuf/text_format.h"#include <opencv2/opencv.hpp>#include "caffe/blob.hpp"#include "caffe/layer.hpp"#include "caffe/common.hpp"#include "caffe/net.hpp"#include "caffe/proto/caffe.pb.h"#include "caffe/util/db.hpp"#include "caffe/util/format.hpp"#include "caffe/util/io.hpp"#include <stdio.h>#include <malloc.h>#include <fstream>#include <boost/progress.hpp>#include <boost/math/special_functions/next.hpp>#include <boost/random.hpp>#include <limits>#include "caffe/common.hpp"#include "caffe/util/math_functions.hpp"#include "caffe/util/rng.hpp"//#include "caffe/util/math_functions.hpp"using caffe::Blob;using caffe::Caffe;using caffe::Datum;using caffe::Net;using caffe::Layer;using std::string;namespace db = caffe::db;void loaddata(boost::shared_ptr<Net<float> >& net, std::string image_path);//int lap(int x1_min,int x1_max,int x2_min,int x2_max);int main(int argc, char** argv){  char *labelname[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};  Caffe::set_mode(Caffe::GPU);  boost::shared_ptr<Net<float> > net(new Net<float>(argv[1], caffe::TEST));  net->CopyTrainedLayersFromBinaryProto(argv[2]);  loaddata(net,std::string(argv[3]));  net->Forward();  Blob<float>* output_layer = net->output_blobs()[0];  const float* begin = output_layer->cpu_data();  const float* end = begin + output_layer->channels()*output_layer->height()*output_layer->width();  std::vector<float> result(begin, end);  cv::Mat image = cv::imread(argv[3]);  for(int i = 0; i < output_layer->height(); ++i){      cv::Point point1(result[i*6+0]*image.cols, result[i*6+1]*image.rows);      cv::Point point2(result[i*6+2]*image.cols, result[i*6+3]*image.rows);      cv::rectangle(image, cv::Rect(point1,point2),cv::Scalar(0,result[i*6+4]/20.0*225,255),result[i*6+5]*50/8);      char ch[100];      sprintf(ch,"%s %.2f",labelname[int(result[i*6+4]-1)], result[i*6+5]*1.0);      std::string temp(ch);      cv::putText(image,temp,point1,CV_FONT_HERSHEY_COMPLEX,0.4,cv::Scalar(255,255,255));  }  cv::imshow("SSD",image);  cv::waitKey(0);  return 1;}void loaddata(boost::shared_ptr<Net<float> >& net, std::string image_path){  Blob<float>* input_layer = net->input_blobs()[0];  int width, height;  width = input_layer->width();  height = input_layer->height();  int size = width*height;  cv::Mat image = cv::imread(image_path,-1);  cv::Mat image_resized;  cv::resize(image, image_resized, cv::Size(height, width));  float* input_data = input_layer->mutable_cpu_data();  int temp,idx;  for(int i = 0; i < height; ++i){    uchar* pdata = image_resized.ptr<uchar>(i);    for(int j = 0; j < width; ++j){      temp = 3*j;      idx = i*width+j;      input_data[idx] = (pdata[temp+2]);      input_data[idx+size] = (pdata[temp+1]);      input_data[idx+2*size] = (pdata[temp+0]);    }  }  //cv::imshow("image",image_resized);}

结果展示:
这里写图片描述
这里写图片描述
这里写图片描述

1 2
原创粉丝点击