使用caffe实现多标签输入

来源：互联网发布：中国航信网络面试编辑：程序博客网时间：2024/05/20 09:07

最近需要使用caffe训练一个回归网络，这个网络的输入label是多维的float型数据，在搜索了大量资料后，整理了一份比较简洁的操作流程，这里记录下来。

本篇博客主要参照了这篇博客，并进行了一些小幅度的修改和完善：

http://blog.csdn.net/qq295456059/article/details/53142574

首先，我们用于做回归的训练文件txt的格式为：

cat_1.jpg 0.03 0.45 0.55 0.66  cat_2.jpg 0.44 0.31 0.05 0.34  dog_1.jpg 0.67 0.25 0.79 0.56  dog_2.jpg 0.89 0.46 0.91 0.38

那么我们一共需要对以下文件进行修改：

1.tools/convert_imageset.cpp

这里建议复制convert_imageset.cpp文件，并将其命名为convert_imageset_regression.cpp。依然将其放在tools文件夹下面。

首先我们添加一些头文件

#include <boost/tokenizer.hpp> //### To use tokenizer  #include <iostream> //###  using namespace std;  //###

我们将int类型的label改成float，如下：

 //std::vector<std::pair<std::string, int> > lines;  //###    std::vector<std::pair<std::string, std::vector<float> > > lines;    std::string line;    //size_t pos;    //int label;  //###    std::vector<float> labels;

在while函数中，将float型的label放入lines中

  while (std::getline(infile, line)) {      // pos = line.find_last_of(' ');      // label = atoi(line.substr(pos + 1).c_str());      // lines.push_back(std::make_pair(line.substr(0, pos), label));      //###      std::vector<std::string> tokens;      boost::char_separator<char> sep(" ");      boost::tokenizer<boost::char_separator<char> > tok(line, sep);      tokens.clear();      std::copy(tok.begin(), tok.end(), std::back_inserter(tokens));          for (int i = 1; i < tokens.size(); ++i)      {        labels.push_back(atof(tokens.at(i).c_str()));      }            lines.push_back(std::make_pair(tokens.at(0), labels));      //###To clear the vector labels      labels.clear();    }

修改完之后的整个文件如下：

// This program converts a set of images to a lmdb/leveldb by storing them// as Datum proto buffers.// Usage://   convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME//// where ROOTFOLDER is the root folder that holds all the images, and LISTFILE// should be a list of files as well as their labels, in the format as//   subfolder1/file1.JPEG 7//   ....#include <algorithm>#include <fstream>  // NOLINT(readability/streams)#include <string>#include <utility>#include <vector>#include "boost/scoped_ptr.hpp"#include "gflags/gflags.h"#include "glog/logging.h"#include "caffe/proto/caffe.pb.h"#include "caffe/util/db.hpp"#include "caffe/util/format.hpp"#include "caffe/util/io.hpp"#include "caffe/util/rng.hpp"#include <boost/tokenizer.hpp> //### To use tokenizer  #include <iostream>using namespace caffe;  // NOLINT(build/namespaces)using std::pair;using boost::scoped_ptr;DEFINE_bool(gray, false,    "When this option is on, treat images as grayscale ones");DEFINE_bool(shuffle, false,    "Randomly shuffle the order of images and their labels");DEFINE_string(backend, "lmdb",        "The backend {lmdb, leveldb} for storing the result");DEFINE_int32(resize_width, 0, "Width images are resized to");DEFINE_int32(resize_height, 0, "Height images are resized to");DEFINE_bool(check_size, false,    "When this option is on, check that all the datum have the same size");DEFINE_bool(encoded, false,    "When this option is on, the encoded image will be save in datum");DEFINE_string(encode_type, "",    "Optional: What type should we encode the image as ('png','jpg',...).");int main(int argc, char** argv) {#ifdef USE_OPENCV  ::google::InitGoogleLogging(argv[0]);  // Print output to stderr (while still logging)  FLAGS_alsologtostderr = 1;#ifndef GFLAGS_GFLAGS_H_  namespace gflags = google;#endif  gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n"        "format used as input for Caffe.\n"        "Usage:\n"        "    convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n"        "The ImageNet dataset for the training demo is at\n"        "    http://www.image-net.org/download-images\n");  gflags::ParseCommandLineFlags(&argc, &argv, true);  if (argc < 4) {    gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset");    return 1;  }  const bool is_color = !FLAGS_gray;  const bool check_size = FLAGS_check_size;  const bool encoded = FLAGS_encoded;  const string encode_type = FLAGS_encode_type;  std::ifstream infile(argv[2]);  //std::vector<std::pair<std::string, int> > lines;  std::vector<std::pair<std::string, std::vector<float> > > lines;    std::string line;  //size_t pos;  //int label;  std::vector<float> labels;    while (std::getline(infile, line)) {    // pos = line.find_last_of(' ');      // label = atoi(line.substr(pos + 1).c_str());      // lines.push_back(std::make_pair(line.substr(0, pos), label));      //###      std::vector<std::string> tokens;      boost::char_separator<char> sep(" ");      boost::tokenizer<boost::char_separator<char> > tok(line, sep);      tokens.clear();      std::copy(tok.begin(), tok.end(), std::back_inserter(tokens));          for (int i = 1; i < tokens.size(); ++i)      {        labels.push_back(atof(tokens.at(i).c_str()));      }            lines.push_back(std::make_pair(tokens.at(0), labels));      //###To clear the vector labels      labels.clear();    }  if (FLAGS_shuffle) {    // randomly shuffle data    LOG(INFO) << "Shuffling data";    shuffle(lines.begin(), lines.end());  }  LOG(INFO) << "A total of " << lines.size() << " images.";  if (encode_type.size() && !encoded)    LOG(INFO) << "encode_type specified, assuming encoded=true.";  int resize_height = std::max<int>(0, FLAGS_resize_height);  int resize_width = std::max<int>(0, FLAGS_resize_width);  // Create new DB  scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));  db->Open(argv[3], db::NEW);  scoped_ptr<db::Transaction> txn(db->NewTransaction());  // Storing to db  std::string root_folder(argv[1]);  Datum datum;  int count = 0;  int data_size = 0;  bool data_size_initialized = false;  for (int line_id = 0; line_id < lines.size(); ++line_id) {    bool status;    std::string enc = encode_type;    if (encoded && !enc.size()) {      // Guess the encoding type from the file name      string fn = lines[line_id].first;      size_t p = fn.rfind('.');      if ( p == fn.npos )        LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'";      enc = fn.substr(p);      std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower);    }    status = ReadImageToDatum(root_folder + lines[line_id].first,        lines[line_id].second, resize_height, resize_width, is_color,        enc, &datum);    if (status == false) continue;    if (check_size) {      if (!data_size_initialized) {        data_size = datum.channels() * datum.height() * datum.width();        data_size_initialized = true;      } else {        const std::string& data = datum.data();        CHECK_EQ(data.size(), data_size) << "Incorrect data field size "            << data.size();      }    }    // sequential    string key_str = caffe::format_int(line_id, 8) + "_" + lines[line_id].first;    // Put in db    string out;    CHECK(datum.SerializeToString(&out));    txn->Put(key_str, out);    if (++count % 1000 == 0) {      // Commit db      txn->Commit();      txn.reset(db->NewTransaction());      LOG(INFO) << "Processed " << count << " files.";    }  }  // write the last batch  if (count % 1000 != 0) {    txn->Commit();    LOG(INFO) << "Processed " << count << " files.";  }#else  LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV.";#endif  // USE_OPENCV  return 0;}

2. include/caffe/util/io.hpp

修改io.hpp文件，在这个文件中添加一个函数定义：

bool ReadImageToDatum(const string& filename, const vector<float> labels,      const int height, const int width, const bool is_color,      const std::string & encoding, Datum* datum);

3. src/caffe/util/io.cpp

在io.cpp文件中写上之前io.hpp中添加的函数的实现：

bool ReadImageToDatum(const string& filename, const vector<float> labels,      const int height, const int width, const bool is_color,      const std::string & encoding, Datum* datum) {    cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color);    if (cv_img.data) {      // if (encoding.size()) {      //   if ( (cv_img.channels() == 3) == is_color && !height && !width &&      //       matchExt(filename, encoding) )      //     return ReadFileToDatum(filename, label, datum);      //   std::vector<uchar> buf;      //   cv::imencode("."+encoding, cv_img, buf);      //   datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]),      //                   buf.size()));      //   datum->set_label(label);      //   datum->set_encoded(true);      //   return true;      // }                            CVMatToDatum(cv_img, datum);      //datum->set_label(label);        //###      for (int i = 0; i < labels.size(); ++i)      {        datum->add_float_data(labels.at(i));      }        return true;    } else {      return false;    }  }

4. src/caffe/proto/caffe.proto

修改caffe.proto文件，在message Datum中，将

optional uint32 label = 5;

改成

optional float label = 5;

将

optional float float_data = 6;

改成

repeated float float_data = 6;

修改完后的message Datum如下：

message Datum {  optional int32 channels = 1;  optional int32 height = 2;  optional int32 width = 3;  // the actual image data, in bytes  optional bytes data = 4;  optional float label = 5;  // Optionally, the datum could also hold float data.  repeated float float_data = 6;  // If true data contains an encoded image that need to be decoded  optional bool encoded = 7 [default = false];}

在DataParameter中，在最下面添加一行

  optional uint32 label_num = 11 [default = 4];

修改完后的DataParameter如下：

message DataParameter {  enum DB {    LEVELDB = 0;    LMDB = 1;  }  // Specify the data source.  optional string source = 1;  // Specify the batch size.  optional uint32 batch_size = 4;  // The rand_skip variable is for the data layer to skip a few data points  // to avoid all asynchronous sgd clients to start at the same point. The skip  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not  // be larger than the number of keys in the database.  // DEPRECATED. Each solver accesses a different subset of the database.  optional uint32 rand_skip = 7 [default = 0];  optional DB backend = 8 [default = LEVELDB];  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do  // simple scaling and subtracting the data mean, if provided. Note that the  // mean subtraction is always carried out before scaling.  optional float scale = 2 [default = 1];  optional string mean_file = 3;  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly  // crop an image.  optional uint32 crop_size = 5 [default = 0];  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror  // data.  optional bool mirror = 6 [default = false];  // Force the encoded image to have 3 color channels  optional bool force_encoded_color = 9 [default = false];  // Prefetch queue (Increase if data feeding bandwidth varies, within the  // limit of device memory for GPU training)  optional uint32 prefetch = 10 [default = 4];  optional uint32 label_num = 11 [default = 4];}

5.src/caffe/layers/data_layer.cpp

首先是DataLayerSetup函数，

1.// label2.  //###  3.  // if (this->output_labels_) {  4.  //   vector<int> label_shape(1, batch_size);  5.  //   top[1]->Reshape(label_shape);  6.  //   for (int i = 0; i < this->PREFETCH_COUNT; ++i) {  7.  //     this->prefetch_[i].label_.Reshape(label_shape);  8.  //   }  9.  // }  10.  11.  //###  12.  int labelNum = this->layer_param_.data_param().label_num();     //#####13.14.  if (this->output_labels_) {  15.  16.    vector<int> label_shape;  17.    label_shape.push_back(batch_size);  18.    label_shape.push_back(labelNum);  19.    label_shape.push_back(1);  20.    label_shape.push_back(1);  21.    top[1]->Reshape(label_shape);22.    for (int i = 0; i < this->prefetch_.size(); ++i) {23.      this->prefetch_[i]->label_.Reshape(label_shape);24.    }25.  }26.

第二处修改在load_batch函数

1.// Copy label.2.    // ###3.    // if (this->output_labels_) {4.    //  Dtype* top_label = batch->label_.mutable_cpu_data();5.    //  top_label[item_id] = datum.label();6.    // }7.8.    // ###9.     10.    if (this->output_labels_) { 11.      Dtype* top_label = batch->label_.mutable_cpu_data(); 12.      for(int i=0;i<labelNum;i++){  13.        top_label[item_id*labelNum+i] = datum.float_data(i); //read float labels  14.      }  15.    }

修改完后的data_layer.cpp文件如下：

#ifdef USE_OPENCV#include <opencv2/core/core.hpp>#endif  // USE_OPENCV#include <stdint.h>#include <vector>#include "caffe/data_transformer.hpp"#include "caffe/layers/data_layer.hpp"#include "caffe/util/benchmark.hpp"namespace caffe {template <typename Dtype>DataLayer<Dtype>::DataLayer(const LayerParameter& param)  : BasePrefetchingDataLayer<Dtype>(param),    offset_() {  db_.reset(db::GetDB(param.data_param().backend()));  db_->Open(param.data_param().source(), db::READ);  cursor_.reset(db_->NewCursor());}template <typename Dtype>DataLayer<Dtype>::~DataLayer() {  this->StopInternalThread();}template <typename Dtype>void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  const int batch_size = this->layer_param_.data_param().batch_size();    // Read a data point, and use it to initialize the top blob.  Datum datum;  datum.ParseFromString(cursor_->value());  // Use data_transformer to infer the expected blob shape from datum.  vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);  this->transformed_data_.Reshape(top_shape);  // Reshape top[0] and prefetch_data according to the batch_size.  top_shape[0] = batch_size;  top[0]->Reshape(top_shape);  for (int i = 0; i < this->prefetch_.size(); ++i) {    this->prefetch_[i]->data_.Reshape(top_shape);  }  LOG_IF(INFO, Caffe::root_solver())      << "output data size: " << top[0]->num() << ","      << top[0]->channels() << "," << top[0]->height() << ","      << top[0]->width();  // label  //###    // if (this->output_labels_) {    //   vector<int> label_shape(1, batch_size);    //   top[1]->Reshape(label_shape);    //   for (int i = 0; i < this->PREFETCH_COUNT; ++i) {    //     this->prefetch_[i].label_.Reshape(label_shape);    //   }    // }      //###    int labelNum = this->layer_param_.data_param().label_num();     //#####  if (this->output_labels_) {        vector<int> label_shape;      label_shape.push_back(batch_size);      label_shape.push_back(labelNum);      label_shape.push_back(1);      label_shape.push_back(1);      top[1]->Reshape(label_shape);    for (int i = 0; i < this->prefetch_.size(); ++i) {      this->prefetch_[i]->label_.Reshape(label_shape);    }  }}template <typename Dtype>bool DataLayer<Dtype>::Skip() {  int size = Caffe::solver_count();  int rank = Caffe::solver_rank();  bool keep = (offset_ % size) == rank ||              // In test mode, only rank 0 runs, so avoid skipping              this->layer_param_.phase() == TEST;  return !keep;}template<typename Dtype>void DataLayer<Dtype>::Next() {  cursor_->Next();  if (!cursor_->valid()) {    LOG_IF(INFO, Caffe::root_solver())        << "Restarting data prefetching from start.";    cursor_->SeekToFirst();  }  offset_++;}// This function is called on prefetch threadtemplate<typename Dtype>void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {  CPUTimer batch_timer;  batch_timer.Start();  double read_time = 0;  double trans_time = 0;  CPUTimer timer;  CHECK(batch->data_.count());  CHECK(this->transformed_data_.count());  const int batch_size = this->layer_param_.data_param().batch_size();  Datum datum;  for (int item_id = 0; item_id < batch_size; ++item_id) {    timer.Start();    while (Skip()) {      Next();    }    datum.ParseFromString(cursor_->value());    read_time += timer.MicroSeconds();    if (item_id == 0) {      // Reshape according to the first datum of each batch      // on single input batches allows for inputs of varying dimension.      // Use data_transformer to infer the expected blob shape from datum.      vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);      this->transformed_data_.Reshape(top_shape);      // Reshape batch according to the batch_size.      top_shape[0] = batch_size;      batch->data_.Reshape(top_shape);    }    // Apply data transformations (mirror, scale, crop...)    timer.Start();    int offset = batch->data_.offset(item_id);    Dtype* top_data = batch->data_.mutable_cpu_data();    this->transformed_data_.set_cpu_data(top_data + offset);    this->data_transformer_->Transform(datum, &(this->transformed_data_));    // Copy label.    // ###    // if (this->output_labels_) {    //  Dtype* top_label = batch->label_.mutable_cpu_data();    //  top_label[item_id] = datum.label();    // }    // ###    int labelNum = this->layer_param_.data_param().label_num();       //####    if (this->output_labels_) {       Dtype* top_label = batch->label_.mutable_cpu_data();       for(int i=0;i<labelNum;i++){          top_label[item_id*labelNum+i] = datum.float_data(i); //read float labels        }      }      trans_time += timer.MicroSeconds();    Next();  }  timer.Stop();  batch_timer.Stop();  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";}INSTANTIATE_CLASS(DataLayer);REGISTER_LAYER_CLASS(Data);}  // namespace caffe

至此，已经完成了所有源码的修改。重新编译caffe会在build/tools文件夹下生成一个convert_imageset_regression的可执行文件，这个文件就是我们用来生成LMDB的文件。之后我们就可以使用这个版本的caffe来完成多标签训练的任务，这里需要注意几点；

1.需要在网络结构文件中写明label的个数，就是在data层中添加一个参数label_num: 4。如果是4个参数就写4, 8个参数就写8.

2.做回归任务一般不使用accuracy层。

3.做回归任务的时候如果要对数据做crop和resize等操作的时候，也要对label数据进行相应的映射变换。

我们可以使用convert_imageset_regression文件生成用于训练和测试的lmdb文件：

convert_imageset_regression IMGROOT train.txt train_lmdb

生成好lmdb文件后，加上我们的网络，就可以使用caffe进行训练

./caffe train -–solverl= regression_solver.prototxt

最后附上我使用vgg16训练的回归网络的网络结构图

name: "VGG16"  layer {    name: "data"    type: "Data"    top: "data"    top: "label"    include {      phase: TRAIN    }    transform_param {      mirror: true      crop_size: 224      mean_value: 103.939      mean_value: 116.779      mean_value: 123.68    }    data_param {      source: "data/ilsvrc12_shrt_256/ilsvrc12_train_leveldb"      batch_size: 64      label_num: 4    backend: LEVELDB    }  }  layer {    name: "data"    type: "Data"    top: "data"    top: "label"    include {      phase: TEST    }    transform_param {      mirror: false      crop_size: 224      mean_value: 103.939      mean_value: 116.779      mean_value: 123.68    }    data_param {      source: "data/ilsvrc12_shrt_256/ilsvrc12_val_leveldb"      batch_size: 50      label_num: 4    backend: LEVELDB    }  }  layer {    bottom: "data"    top: "conv1_1"    name: "conv1_1"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 64      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv1_1"    top: "conv1_1"    name: "relu1_1"    type: "ReLU"  }  layer {    bottom: "conv1_1"    top: "conv1_2"    name: "conv1_2"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 64      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv1_2"    top: "conv1_2"    name: "relu1_2"    type: "ReLU"  }  layer {    bottom: "conv1_2"    top: "pool1"    name: "pool1"    type: "Pooling"    pooling_param {      pool: MAX      kernel_size: 2      stride: 2    }  }  layer {    bottom: "pool1"    top: "conv2_1"    name: "conv2_1"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 128      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv2_1"    top: "conv2_1"    name: "relu2_1"    type: "ReLU"  }  layer {    bottom: "conv2_1"    top: "conv2_2"    name: "conv2_2"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 128      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv2_2"    top: "conv2_2"    name: "relu2_2"    type: "ReLU"  }  layer {    bottom: "conv2_2"    top: "pool2"    name: "pool2"    type: "Pooling"    pooling_param {      pool: MAX      kernel_size: 2      stride: 2    }  }  layer {    bottom: "pool2"    top: "conv3_1"    name: "conv3_1"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 256      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv3_1"    top: "conv3_1"    name: "relu3_1"    type: "ReLU"  }  layer {    bottom: "conv3_1"    top: "conv3_2"    name: "conv3_2"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 256      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv3_2"    top: "conv3_2"    name: "relu3_2"    type: "ReLU"  }  layer {    bottom: "conv3_2"    top: "conv3_3"    name: "conv3_3"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 256      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv3_3"    top: "conv3_3"    name: "relu3_3"    type: "ReLU"  }  layer {    bottom: "conv3_3"    top: "pool3"    name: "pool3"    type: "Pooling"    pooling_param {      pool: MAX      kernel_size: 2      stride: 2    }  }  layer {    bottom: "pool3"    top: "conv4_1"    name: "conv4_1"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 512      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv4_1"    top: "conv4_1"    name: "relu4_1"    type: "ReLU"  }  layer {    bottom: "conv4_1"    top: "conv4_2"    name: "conv4_2"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 512      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv4_2"    top: "conv4_2"    name: "relu4_2"    type: "ReLU"  }  layer {    bottom: "conv4_2"    top: "conv4_3"    name: "conv4_3"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 512      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv4_3"    top: "conv4_3"    name: "relu4_3"    type: "ReLU"  }  layer {    bottom: "conv4_3"    top: "pool4"    name: "pool4"    type: "Pooling"    pooling_param {      pool: MAX      kernel_size: 2      stride: 2    }  }  layer {    bottom: "pool4"    top: "conv5_1"    name: "conv5_1"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 512      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv5_1"    top: "conv5_1"    name: "relu5_1"    type: "ReLU"  }  layer {    bottom: "conv5_1"    top: "conv5_2"    name: "conv5_2"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 512      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv5_2"    top: "conv5_2"    name: "relu5_2"    type: "ReLU"  }  layer {    bottom: "conv5_2"    top: "conv5_3"    name: "conv5_3"    type: "Convolution"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    convolution_param {      num_output: 512      pad: 1      kernel_size: 3      weight_filler {        type: "gaussian"        std: 0.01      }      bias_filler {        type: "constant"        value: 0      }    }  }  layer {    bottom: "conv5_3"    top: "conv5_3"    name: "relu5_3"    type: "ReLU"  }  layer {    bottom: "conv5_3"    top: "pool5"    name: "pool5"    type: "Pooling"    pooling_param {      pool: MAX      kernel_size: 2      stride: 2    }  }  layer {    bottom: "pool5"    top: "fc6"    name: "fc6"    type: "InnerProduct"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    inner_product_param {      num_output: 4096      weight_filler {        type: "gaussian"        std: 0.005      }      bias_filler {        type: "constant"        value: 0.1      }    }  }  layer {    bottom: "fc6"    top: "fc6"    name: "relu6"    type: "ReLU"  }  layer {    bottom: "fc6"    top: "fc6"    name: "drop6"    type: "Dropout"    dropout_param {      dropout_ratio: 0.5    }  }  layer {    bottom: "fc6"    top: "fc7"    name: "fc7"    type: "InnerProduct"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    inner_product_param {      num_output: 4096      weight_filler {        type: "gaussian"        std: 0.005      }      bias_filler {        type: "constant"        value: 0.1      }    }  }  layer {    bottom: "fc7"    top: "fc7"    name: "relu7"    type: "ReLU"  }  layer {    bottom: "fc7"    top: "fc7"    name: "drop7"    type: "Dropout"    dropout_param {      dropout_ratio: 0.5    }  }  layer {    bottom: "fc7"    top: "fc8"    name: "fc8"    type: "InnerProduct"    param {      lr_mult: 1      decay_mult: 1    }    param {      lr_mult: 2      decay_mult: 0    }    inner_product_param {      num_output: 4      weight_filler {        type: "gaussian"        std: 0.005      }      bias_filler {        type: "constant"        value: 0.1      }    }  }    layer {  name: "EuclideanLoss"  type: "EuclideanLoss"  bottom: "fc8"  bottom: "label"   top: "loss"}

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

训练好网络之后，我们需要测试网络的表现。这里附上我写的调用接口

CRegression.h

#include <caffe/caffe.hpp>#include <opencv2/core/core.hpp>#include <opencv2/highgui/highgui.hpp>#include <opencv2/imgproc/imgproc.hpp>#include <algorithm>#include <iosfwd>#include <memory>#include <string>#include <utility>#include <vector>using namespace caffe;using namespace cv;using std::string;class Regression{public:Regression();~Regression();int Initial(const string &model_file, const string &trained_file);Rect FillBuf(Mat Img);int FillBuf(Mat Img, int tag);private:shared_ptr<Net<float> > net_;};

CRegression.cpp

#include "CRegression.h"Regression::Regression(){}Regression::~Regression(){}int Regression::Initial(const string &model_file, const string &trained_file){Caffe::set_mode(Caffe::GPU);/* Load the network. */net_.reset(new Net<float>(model_file, TEST));net_->CopyTrainedLayersFrom(trained_file);return 0;}Rect Regression::FillBuf(Mat Img){Mat sample = Img.clone();sample.convertTo(sample, CV_32FC3);Blob<float>* inputBlob = net_->input_blobs()[0];int width = inputBlob->width();int height = inputBlob->height();Size inputSize = Size(width, height);CHECK(sample.size() == inputSize)<< "sample size is not equal to inputSize";Mat mean(sample.size(), CV_32FC3, Scalar(103.939, 116.779, 123.68));subtract(sample, mean, sample);float* data = inputBlob->mutable_cpu_data();   //将图片的像素值，复制进网络的输入Blob  for (int k = 0; k < 3; ++k){for (int i = 0; i < height; ++i){for (int j = 0; j < width; ++j){int index = (k*height + i)*width + j;  //获取偏移量  data[index] = sample.at<Vec3f>(i, j)[k];}}}vector<Blob<float>* > inputs(1, inputBlob);const vector<Blob<float>* >& outputBlobs = net_->Forward(inputs);   //进行前向传播，并返回最后一层的blob  Blob<float>* outputBlob = outputBlobs[0];      //输出blob  const float* value = outputBlob->cpu_data();std::cout << value[0] << "  " << value[1] << "  " << value[2] << " " << value[3] << std::endl;return Rect();}

阅读全文

0 0