如何用caffe解决回归问题

来源：互联网发布：dg恢复数据编辑：程序博客网时间：2024/06/06 05:50

最近在基于caffe做目标检测的问题，需要利用caffe来训练一个回归网络，用来预测object在图像中的位置（x1,y1,width,height）。但是现有的caffe版本（happynear版本）只适用于二分类问题的数据集转换，所以需要修改caffe源码，使之也可以转换回归问题的数据集。

主要是参照 http://blog.csdn.net/baobei0112/article/details/47606559 进行修改。但是这份博客使用的不是happynear的caffe版本，所以源码改动的地方差异较大。下面我会记录我改动的地方。

一.源码修改

1.修改caffe.proto，位于/src/caffe/proto

36行改成 repeated float label = 5;，然后运行extract_proto.bat

2.修改data_layer.hpp

#ifndef CAFFE_DATA_LAYERS_HPP_  #define CAFFE_DATA_LAYERS_HPP_  #include <string>  #include <utility>  #include <vector>  #include "hdf5/hdf5.h"  #include "caffe/blob.hpp"  #include "caffe/common.hpp"  #include "caffe/data_reader.hpp"  #include "caffe/data_transformer.hpp"  #include "caffe/filler.hpp"  #include "caffe/internal_thread.hpp"  #include "caffe/layer.hpp"  #include "caffe/proto/caffe.pb.h"  #include "caffe/util/blocking_queue.hpp"  #include "caffe/util/db.hpp"  #define HDF5_DATA_DATASET_NAME "data"  #define HDF5_DATA_LABEL_NAME "label"  namespace caffe {  /** * @brief Provides base for data layers that feed blobs to the Net. * * TODO(dox): thorough documentation for Forward and proto params. */  template <typename Dtype>  class BaseDataLayer : public Layer<Dtype> {  public:  explicit BaseDataLayer(const LayerParameter& param);  // LayerSetUp: implements common data layer setup functionality, and calls  // DataLayerSetUp to do special data layer setup for individual layer types.  // This method may not be overridden except by the BasePrefetchingDataLayer.  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  // Data layers should be shared by multiple solvers in parallel  virtual inline bool ShareInParallel() const { return true; }  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top) {  }  // Data layers have no bottoms, so reshaping is trivial.  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top) {  }  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {  }  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {  }  protected:  TransformationParameter transform_param_;  shared_ptr<DataTransformer<Dtype> > data_transformer_;  bool output_labels_;  };  template <typename Dtype>  class Batch {  public:  Blob<Dtype> data_, label_;  };  template <typename Dtype>  class BasePrefetchingDataLayer :  public BaseDataLayer<Dtype>, public InternalThread {  public:  explicit BasePrefetchingDataLayer(const LayerParameter& param);  // LayerSetUp: implements common data layer setup functionality, and calls  // DataLayerSetUp to do special data layer setup for individual layer types.  // This method may not be overridden.  void LayerSetUp(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  // Prefetches batches (asynchronously if to GPU memory)  static const int PREFETCH_COUNT = 3;  protected:  virtual void InternalThreadEntry();  virtual void load_batch(Batch<Dtype>* batch) = 0;  Batch<Dtype> prefetch_[PREFETCH_COUNT];  BlockingQueue<Batch<Dtype>*> prefetch_free_;  BlockingQueue<Batch<Dtype>*> prefetch_full_;  Blob<Dtype> transformed_data_;  };  template <typename Dtype>  class DataLayer : public BasePrefetchingDataLayer<Dtype> {  public:  explicit DataLayer(const LayerParameter& param);  virtual ~DataLayer();  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  // DataLayer uses DataReader instead for sharing for parallelism  virtual inline bool ShareInParallel() const { return false; }  virtual inline const char* type() const { return "Data"; }  virtual inline int ExactNumBottomBlobs() const { return 0; }  virtual inline int MinTopBlobs() const { return 1; }  virtual inline int MaxTopBlobs() const { return 2; }  protected:  virtual void load_batch(Batch<Dtype>* batch);  DataReader reader_;  };  /** * @brief Provides data to the Net generated by a Filler. * * TODO(dox): thorough documentation for Forward and proto params. */  template <typename Dtype>  class DummyDataLayer : public Layer<Dtype> {  public:  explicit DummyDataLayer(const LayerParameter& param)  : Layer<Dtype>(param) {}  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  // Data layers should be shared by multiple solvers in parallel  virtual inline bool ShareInParallel() const { return true; }  // Data layers have no bottoms, so reshaping is trivial.  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top) {  }  virtual inline const char* type() const { return "DummyData"; }  virtual inline int ExactNumBottomBlobs() const { return 0; }  virtual inline int MinTopBlobs() const { return 1; }  protected:  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {  }  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {  }  vector<shared_ptr<Filler<Dtype> > > fillers_;  vector<bool> refill_;  };  /** * @brief Provides data to the Net from HDF5 files. * * TODO(dox): thorough documentation for Forward and proto params. */  template <typename Dtype>  class HDF5DataLayer : public Layer<Dtype> {  public:  explicit HDF5DataLayer(const LayerParameter& param)  : Layer<Dtype>(param) {}  virtual ~HDF5DataLayer();  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  // Data layers should be shared by multiple solvers in parallel  virtual inline bool ShareInParallel() const { return true; }  // Data layers have no bottoms, so reshaping is trivial.  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top) {  }  virtual inline const char* type() const { return "HDF5Data"; }  virtual inline int ExactNumBottomBlobs() const { return 0; }  virtual inline int MinTopBlobs() const { return 1; }  protected:  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {  }  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {  }  virtual void LoadHDF5FileData(const char* filename);  std::vector<std::string> hdf_filenames_;  unsigned int num_files_;  unsigned int current_file_;  hsize_t current_row_;  std::vector<shared_ptr<Blob<Dtype> > > hdf_blobs_;  std::vector<unsigned int> data_permutation_;  std::vector<unsigned int> file_permutation_;  };  /** * @brief Write blobs to disk as HDF5 files. * * TODO(dox): thorough documentation for Forward and proto params. */  template <typename Dtype>  class HDF5OutputLayer : public Layer<Dtype> {  public:  explicit HDF5OutputLayer(const LayerParameter& param)  : Layer<Dtype>(param), file_opened_(false) {}  virtual ~HDF5OutputLayer();  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  // Data layers should be shared by multiple solvers in parallel  virtual inline bool ShareInParallel() const { return true; }  // Data layers have no bottoms, so reshaping is trivial.  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top) {  }  virtual inline const char* type() const { return "HDF5Output"; }  // TODO: no limit on the number of blobs  virtual inline int ExactNumBottomBlobs() const { return 2; }  virtual inline int ExactNumTopBlobs() const { return 0; }  inline std::string file_name() const { return file_name_; }  protected:  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);  virtual void SaveBlobs();  bool file_opened_;  std::string file_name_;  hid_t file_id_;  Blob<Dtype> data_blob_;  Blob<Dtype> label_blob_;  };  /** * @brief Provides data to the Net from image files. * * TODO(dox): thorough documentation for Forward and proto params. */  template <typename Dtype>  class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> {  public:  explicit ImageDataLayer(const LayerParameter& param)  : BasePrefetchingDataLayer<Dtype>(param) {}  virtual ~ImageDataLayer();  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  virtual inline const char* type() const { return "ImageData"; }  virtual inline int ExactNumBottomBlobs() const { return 0; }  virtual inline int ExactNumTopBlobs() const { return 2; }  vector<std::pair<std::string, std:: vector<float>> > lines_;  shared_ptr<Caffe::RNG> prefetch_rng_;  virtual void ShuffleImages();  virtual void load_batch(Batch<Dtype>* batch);  int lines_id_;  };  /** * @brief Provides data to the Net from memory. * * TODO(dox): thorough documentation for Forward and proto params. */  template <typename Dtype>  class MemoryDataLayer : public BaseDataLayer<Dtype> {  public:  explicit MemoryDataLayer(const LayerParameter& param)  : BaseDataLayer<Dtype>(param), has_new_data_(false) {}  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  virtual inline const char* type() const { return "MemoryData"; }  virtual inline int ExactNumBottomBlobs() const { return 0; }  virtual inline int ExactNumTopBlobs() const { return 2; }  virtual void AddDatumVector(const vector<Datum>& datum_vector);  #ifdef USE_OPENCV  virtual void AddMatVector(const vector<cv::Mat>& mat_vector,  const vector<int>& labels);  #endif // USE_OPENCV  // Reset should accept const pointers, but can't, because the memory  // will be given to Blob, which is mutable  void Reset(Dtype* data, Dtype* label, int n);  void set_batch_size(int new_size);  int batch_size() { return batch_size_; }  int channels() { return channels_; }  int height() { return height_; }  int width() { return width_; }  protected:  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  int batch_size_, channels_, height_, width_, size_;  Dtype* data_;  Dtype* labels_;  int n_;  size_t pos_;  Blob<Dtype> added_data_;  Blob<Dtype> added_label_;  bool has_new_data_;  };  /** * @brief Provides data to the Net from windows of images files, specified * by a window data file. * * TODO(dox): thorough documentation for Forward and proto params. */  template <typename Dtype>  class WindowDataLayer : public BasePrefetchingDataLayer<Dtype> {  public:  explicit WindowDataLayer(const LayerParameter& param)  : BasePrefetchingDataLayer<Dtype>(param) {}  virtual ~WindowDataLayer();  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  virtual inline const char* type() const { return "WindowData"; }  virtual inline int ExactNumBottomBlobs() const { return 0; }  virtual inline int ExactNumTopBlobs() const { return 2; }  protected:  virtual unsigned int PrefetchRand();  virtual void load_batch(Batch<Dtype>* batch);  shared_ptr<Caffe::RNG> prefetch_rng_;  vector<std::pair<std::string, vector<int> > > image_database_;  enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM };  vector<vector<float> > fg_windows_;  vector<vector<float> > bg_windows_;  Blob<Dtype> data_mean_;  vector<Dtype> mean_values_;  bool has_mean_file_;  bool has_mean_values_;  bool cache_images_;  vector<std::pair<std::string, Datum > > image_database_cache_;  };  /** * @brief Provides data to the Net from image files. * * TODO(dox): thorough documentation for Forward and proto params. */  template <typename Dtype>  class MultiLabelImageDataLayer : public BasePrefetchingDataLayer<Dtype> {  public:  explicit MultiLabelImageDataLayer(const LayerParameter& param)  : BasePrefetchingDataLayer<Dtype>(param) {}  virtual ~MultiLabelImageDataLayer();  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,  const vector<Blob<Dtype>*>& top);  virtual inline const char* type() const { return "MultiLabelImageData"; }  virtual inline int ExactNumBottomBlobs() const { return 0; }  virtual inline int ExactNumTopBlobs() const { return 2; }  protected:  shared_ptr<Caffe::RNG> prefetch_rng_;  virtual void ShuffleImages();  virtual void load_batch(Batch<Dtype>* batch);  vector<std::pair<std::string, shared_ptr<vector<Dtype> > > > lines_;  int label_count;  int lines_id_;  };  } // namespace caffe  #endif // CAFFE_DATA_LAYERS_HPP_

3.改动data_layer.cpp

#ifdef USE_OPENCV  #include <opencv2/core/core.hpp>  #endif  // USE_OPENCV  #include <stdint.h>  #include <vector>  #include "caffe/data_layers.hpp"  #include "caffe/proto/caffe.pb.h"  #include "caffe/util/benchmark.hpp"  namespace caffe {  template <typename Dtype>  DataLayer<Dtype>::DataLayer(const LayerParameter& param)    : BasePrefetchingDataLayer<Dtype>(param),      reader_(param) {  }  template <typename Dtype>  DataLayer<Dtype>::~DataLayer() {    this->StopInternalThread();  }  template <typename Dtype>  void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,        const vector<Blob<Dtype>*>& top) {    const int batch_size = this->layer_param_.data_param().batch_size();    // Read a data point, and use it to initialize the top blob.    Datum& datum = *(reader_.full().peek());    // Use data_transformer to infer the expected blob shape from datum.    vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);    this->transformed_data_.Reshape(top_shape);    // Reshape top[0] and prefetch_data according to the batch_size.    top_shape[0] = batch_size;    top[0]->Reshape(top_shape);    for (int i = 0; i < this->PREFETCH_COUNT; ++i) {      this->prefetch_[i].data_.Reshape(top_shape);    }    LOG(INFO) << "output data size: " << top[0]->num() << ","        << top[0]->channels() << "," << top[0]->height() << ","        << top[0]->width();    // label    if (this->output_labels_) {        /*       vector<int> label_shape(1, batch_size);     top[1]->Reshape(label_shape);     for (int i = 0; i < this->PREFETCH_COUNT; ++i) {       this->prefetch_[i].label_.Reshape(label_shape);     }     */        top[1]->Reshape(batch_size,4,1,1);        for (int i = 0; i < this->PREFETCH_COUNT; ++i) {            this->prefetch_[i].label_.Reshape(batch_size, 4, 1, 1);        }    }  }  // This function is called on prefetch thread  template<typename Dtype>  void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {    CPUTimer batch_timer;    batch_timer.Start();    double read_time = 0;    double trans_time = 0;    CPUTimer timer;    CHECK(batch->data_.count());    CHECK(this->transformed_data_.count());    // Reshape according to the first datum of each batch    // on single input batches allows for inputs of varying dimension.    const int batch_size = this->layer_param_.data_param().batch_size();    Datum& datum = *(reader_.full().peek());    // Use data_transformer to infer the expected blob shape from datum.    vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);    this->transformed_data_.Reshape(top_shape);    // Reshape batch according to the batch_size.    top_shape[0] = batch_size;    batch->data_.Reshape(top_shape);    Dtype* top_data = batch->data_.mutable_cpu_data();    Dtype* top_label = NULL;  // suppress warnings about uninitialized variables    if (this->output_labels_) {        top_label = batch->label_.mutable_cpu_data();    }    /*   if (this->output_labels_) {       for (int label_i = 0; label_i < datum.label_size(); label_i++){           top_label[item_id*datum.label_size() + label_i] = datum.label(label_i);       }   }   */    for (int item_id = 0; item_id < batch_size; ++item_id) {      timer.Start();      // get a datum      Datum& datum = *(reader_.full().pop("Waiting for data"));      read_time += timer.MicroSeconds();      timer.Start();      // Apply data transformations (mirror, scale, crop...)      int offset = batch->data_.offset(item_id);      this->transformed_data_.set_cpu_data(top_data + offset);      this->data_transformer_->Transform(datum, &(this->transformed_data_));      // Copy label.      if (this->output_labels_) {       // top_label[item_id] = datum.label();          for (int label_i = 0; label_i < datum.label_size(); label_i++){              top_label[item_id*datum.label_size()+label_i] = datum.label(label_i);          }      }      trans_time += timer.MicroSeconds();      reader_.free().push(const_cast<Datum*>(&datum));    }    timer.Stop();    batch_timer.Stop();    DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";    DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";    DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";  }  INSTANTIATE_CLASS(DataLayer);  REGISTER_LAYER_CLASS(Data);  }  // namespace caffe

4.修改image_data_layer.cpp中label部分

#ifdef USE_OPENCV  #include <opencv2/core/core.hpp>  #include <fstream>  // NOLINT(readability/streams)  #include <iostream>  // NOLINT(readability/streams)  #include <string>  #include <utility>  #include <vector>  #include "caffe/data_layers.hpp"  #include "caffe/util/benchmark.hpp"  #include "caffe/util/io.hpp"  #include "caffe/util/math_functions.hpp"  #include "caffe/util/rng.hpp"  namespace caffe {  template <typename Dtype>  ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() {    this->StopInternalThread();  }  template <typename Dtype>  void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,        const vector<Blob<Dtype>*>& top) {    const int new_height = this->layer_param_.image_data_param().new_height();    const int new_width  = this->layer_param_.image_data_param().new_width();    const bool is_color  = this->layer_param_.image_data_param().is_color();    string root_folder = this->layer_param_.image_data_param().root_folder();    CHECK((new_height == 0 && new_width == 0) ||        (new_height > 0 && new_width > 0)) << "Current implementation requires "        "new_height and new_width to be set at the same time.";    // Read the file with filenames and labels    const string& source = this->layer_param_.image_data_param().source();    LOG(INFO) << "Opening file " << source;    std::ifstream infile(source.c_str());    string filename;    //int label;    float x1, y1, x2, y2;    while (infile >> filename >> x1 >> y1 >> x2 >> y2) {        std::vector<float> vec_label;        vec_label.push_back(x1);        vec_label.push_back(y1);        vec_label.push_back(x2);        vec_label.push_back(y2);      lines_.push_back(std::make_pair(filename, vec_label));    }    if (this->layer_param_.image_data_param().shuffle()) {      // randomly shuffle data      LOG(INFO) << "Shuffling data";      const unsigned int prefetch_rng_seed = caffe_rng_rand();      prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));      ShuffleImages();    }    LOG(INFO) << "A total of " << lines_.size() << " images.";    lines_id_ = 0;    // Check if we would need to randomly skip a few data points    if (this->layer_param_.image_data_param().rand_skip()) {      unsigned int skip = caffe_rng_rand() %          this->layer_param_.image_data_param().rand_skip();      LOG(INFO) << "Skipping first " << skip << " data points.";      CHECK_GT(lines_.size(), skip) << "Not enough points to skip";      lines_id_ = skip;    }    // Read an image, and use it to initialize the top blob.    cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,                                      new_height, new_width, is_color);    CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;    // Use data_transformer to infer the expected blob shape from a cv_image.    vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);    this->transformed_data_.Reshape(top_shape);    // Reshape prefetch_data and top[0] according to the batch_size.    const int batch_size = this->layer_param_.image_data_param().batch_size();    CHECK_GT(batch_size, 0) << "Positive batch size required";    top_shape[0] = batch_size;    for (int i = 0; i < this->PREFETCH_COUNT; ++i) {      this->prefetch_[i].data_.Reshape(top_shape);    }    top[0]->Reshape(top_shape);    LOG(INFO) << "output data size: " << top[0]->num() << ","        << top[0]->channels() << "," << top[0]->height() << ","        << top[0]->width();    // label    vector<int> label_shape(1, batch_size);    top[1]->Reshape(batch_size,4,1,1);    for (int i = 0; i < this->PREFETCH_COUNT; ++i) {        this->prefetch_[i].label_.Reshape(batch_size, 4, 1, 1);    }  }  template <typename Dtype>  void ImageDataLayer<Dtype>::ShuffleImages() {    caffe::rng_t* prefetch_rng =        static_cast<caffe::rng_t*>(prefetch_rng_->generator());    shuffle(lines_.begin(), lines_.end(), prefetch_rng);  }  // This function is called on prefetch thread  template <typename Dtype>  void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {    CPUTimer batch_timer;    batch_timer.Start();    double read_time = 0;    double trans_time = 0;    CPUTimer timer;    CHECK(batch->data_.count());    CHECK(this->transformed_data_.count());    ImageDataParameter image_data_param = this->layer_param_.image_data_param();    const int batch_size = image_data_param.batch_size();    const int new_height = image_data_param.new_height();    const int new_width = image_data_param.new_width();    const bool is_color = image_data_param.is_color();    string root_folder = image_data_param.root_folder();    // Reshape according to the first image of each batch    // on single input batches allows for inputs of varying dimension.    cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,        new_height, new_width, is_color);    CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;    // Use data_transformer to infer the expected blob shape from a cv_img.    vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);    this->transformed_data_.Reshape(top_shape);    // Reshape batch according to the batch_size.    top_shape[0] = batch_size;    batch->data_.Reshape(top_shape);    Dtype* prefetch_data = batch->data_.mutable_cpu_data();    //Dtype* prefetch_label = batch->label_.mutable_cpu_data();    Dtype* prefetch_label = NULL;    // datum scales    const int lines_size = lines_.size();    for (int item_id = 0; item_id < batch_size; ++item_id) {      // get a blob      timer.Start();      CHECK_GT(lines_size, lines_id_);      cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,          new_height, new_width, is_color);      CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first;      read_time += timer.MicroSeconds();      timer.Start();      // Apply transformations (mirror, crop...) to the image      int offset = batch->data_.offset(item_id);      this->transformed_data_.set_cpu_data(prefetch_data + offset);      this->data_transformer_->Transform(cv_img, &(this->transformed_data_));      trans_time += timer.MicroSeconds();      for (int label_i = 0; label_i < (lines_[lines_id_].second).size(); label_i++){          prefetch_label[item_id*(lines_[lines_id_].second).size() + label_i] = (lines_[lines_id_].second)[label_i];      }      //prefetch_label[item_id] = lines_[lines_id_].second;      // go to the next iter      lines_id_++;      if (lines_id_ >= lines_size) {        // We have reached the end. Restart from the first.        DLOG(INFO) << "Restarting data prefetching from start.";        lines_id_ = 0;        if (this->layer_param_.image_data_param().shuffle()) {          ShuffleImages();        }      }    }    batch_timer.Stop();    DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";    DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";    DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";  }  INSTANTIATE_CLASS(ImageDataLayer);  REGISTER_LAYER_CLASS(ImageData);  }  // namespace caffe  #endif  // USE_OPENCV

5.修改memory_data_layer.cpp

#ifdef USE_OPENCV  #include <opencv2/core/core.hpp>  #endif  // USE_OPENCV  #include <vector>  #include "caffe/data_layers.hpp"  namespace caffe {  template <typename Dtype>  void MemoryDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,       const vector<Blob<Dtype>*>& top) {    batch_size_ = this->layer_param_.memory_data_param().batch_size();    channels_ = this->layer_param_.memory_data_param().channels();    height_ = this->layer_param_.memory_data_param().height();    width_ = this->layer_param_.memory_data_param().width();    size_ = channels_ * height_ * width_;    CHECK_GT(batch_size_ * size_, 0) <<        "batch_size, channels, height, and width must be specified and"        " positive in memory_data_param";    vector<int> label_shape(1, batch_size_);    top[0]->Reshape(batch_size_, channels_, height_, width_);    top[1]->Reshape(label_shape);    added_data_.Reshape(batch_size_, channels_, height_, width_);    added_label_.Reshape(label_shape);    data_ = NULL;    labels_ = NULL;    added_data_.cpu_data();    added_label_.cpu_data();  }  template <typename Dtype>  void MemoryDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) {    CHECK(!has_new_data_) <<        "Can't add data until current data has been consumed.";    size_t num = datum_vector.size();    CHECK_GT(num, 0) << "There is no datum to add.";    CHECK_EQ(num % batch_size_, 0) <<        "The added data must be a multiple of the batch size.";    added_data_.Reshape(num, channels_, height_, width_);    added_label_.Reshape(num, 1, 1, 1);    // Apply data transformations (mirror, scale, crop...)    this->data_transformer_->Transform(datum_vector, &added_data_);    // Copy Labels    Dtype* top_label = added_label_.mutable_cpu_data();    for (int item_id = 0; item_id < num; ++item_id) {      //top_label[item_id] = datum_vector[item_id].label();        int label_num = datum_vector[item_id].label_size();        for (int label_i = 0; label_i < label_num; label_i++){            top_label[item_id*label_num + label_i] = datum_vector[item_id].label(label_i);        }    }    // num_images == batch_size_    Dtype* top_data = added_data_.mutable_cpu_data();    Reset(top_data, top_label, num);    has_new_data_ = true;  }  #ifdef USE_OPENCV  template <typename Dtype>  void MemoryDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector,      const vector<int>& labels) {    size_t num = mat_vector.size();    CHECK(!has_new_data_) <<        "Can't add mat until current data has been consumed.";    CHECK_GT(num, 0) << "There is no mat to add";    CHECK_EQ(num % batch_size_, 0) <<        "The added data must be a multiple of the batch size.";    added_data_.Reshape(num, channels_, height_, width_);    added_label_.Reshape(num, 1, 1, 1);    // Apply data transformations (mirror, scale, crop...)    this->data_transformer_->Transform(mat_vector, &added_data_);    // Copy Labels    Dtype* top_label = added_label_.mutable_cpu_data();    for (int item_id = 0; item_id < num; ++item_id) {      top_label[item_id] = labels[item_id];    }    // num_images == batch_size_    Dtype* top_data = added_data_.mutable_cpu_data();    Reset(top_data, top_label, num);    has_new_data_ = true;  }  #endif  // USE_OPENCV  template <typename Dtype>  void MemoryDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) {    CHECK(data);    CHECK(labels);    CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size";    // Warn with transformation parameters since a memory array is meant to    // be generic and no transformations are done with Reset().    if (this->layer_param_.has_transform_param()) {      LOG(WARNING) << this->type() << " does not transform array data on Reset()";    }    data_ = data;    labels_ = labels;    n_ = n;    pos_ = 0;  }  template <typename Dtype>  void MemoryDataLayer<Dtype>::set_batch_size(int new_size) {    CHECK(!has_new_data_) <<        "Can't change batch_size until current data has been consumed.";    batch_size_ = new_size;    added_data_.Reshape(batch_size_, channels_, height_, width_);    added_label_.Reshape(batch_size_, 1, 1, 1);  }  template <typename Dtype>  void MemoryDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,        const vector<Blob<Dtype>*>& top) {    CHECK(data_) << "MemoryDataLayer needs to be initalized by calling Reset";    top[0]->Reshape(batch_size_, channels_, height_, width_);    top[1]->Reshape(batch_size_, 1, 1, 1);    top[0]->set_cpu_data(data_ + pos_ * size_);    top[1]->set_cpu_data(labels_ + pos_);    pos_ = (pos_ + batch_size_) % n_;    if (pos_ == 0)      has_new_data_ = false;  }  INSTANTIATE_CLASS(MemoryDataLayer);  REGISTER_LAYER_CLASS(MemoryData);  }  // namespace caffe

6.修改convet_imaget.cpp

// This program converts a set of images to a lmdb/leveldb by storing them  // as Datum proto buffers.  // Usage:  //   convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME  //  // where ROOTFOLDER is the root folder that holds all the images, and LISTFILE  // should be a list of files as well as their labels, in the format as  //   subfolder1/file1.JPEG 7  //   ....  #include <algorithm>  #include <fstream>  // NOLINT(readability/streams)  #include <string>  #include <utility>  #include <vector>  #include "boost/scoped_ptr.hpp"  #include "gflags/gflags.h"  #include "glog/logging.h"  #include "caffe/proto/caffe.pb.h"  #include "caffe/util/db.hpp"  #include "caffe/util/io.hpp"  #include "caffe/util/rng.hpp"  using namespace caffe;  // NOLINT(build/namespaces)  using std::pair;  using boost::scoped_ptr;  DEFINE_bool(gray, false,      "When this option is on, treat images as grayscale ones");  DEFINE_bool(shuffle, false,      "Randomly shuffle the order of images and their labels");  DEFINE_string(backend, "lmdb",          "The backend {lmdb, leveldb} for storing the result");  DEFINE_int32(resize_width, 0, "Width images are resized to");  DEFINE_int32(resize_height, 0, "Height images are resized to");  DEFINE_bool(check_size, false,      "When this option is on, check that all the datum have the same size");  DEFINE_bool(encoded, false,      "When this option is on, the encoded image will be save in datum");  DEFINE_string(encode_type, "",      "Optional: What type should we encode the image as ('png','jpg',...).");  int main(int argc, char** argv) {  #ifdef USE_OPENCV    //::google::InitGoogleLogging(argv[0]);    // Print output to stderr (while still logging)    FLAGS_alsologtostderr = 1;  #ifndef GFLAGS_GFLAGS_H_    namespace gflags = google;  #endif    gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n"          "format used as input for Caffe.\n"          "Usage:\n"          "    convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n"          "The ImageNet dataset for the training demo is at\n"          "    http://www.image-net.org/download-images\n");    caffe::GlobalInit(&argc, &argv);    if (argc < 4) {      gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset");      return 1;    }    const bool is_color = !FLAGS_gray;    const bool check_size = FLAGS_check_size;    const bool encoded = FLAGS_encoded;    const string encode_type = FLAGS_encode_type;    std::ifstream infile(argv[2]);    std::vector<std::pair<std::string, vector<float>> > lines;    std::string filename;    /*   int label;   while (infile >> filename >> label) {     lines.push_back(std::make_pair(filename, label));   }   */    float x1, y1, x2, y2;    while (infile >> filename >> x1 >> y1 >> x2 >> y2) {        std::vector<float> vec_label;        vec_label.push_back(x1);        vec_label.push_back(y1);        vec_label.push_back(x2);        vec_label.push_back(y2);        lines.push_back(std::make_pair(filename, vec_label));    }    if (FLAGS_shuffle) {      // randomly shuffle data      LOG(INFO) << "Shuffling data";      shuffle(lines.begin(), lines.end());    }    LOG(INFO) << "A total of " << lines.size() << " images.";    if (encode_type.size() && !encoded)      LOG(INFO) << "encode_type specified, assuming encoded=true.";    int resize_height = std::max<int>(0, FLAGS_resize_height);    int resize_width = std::max<int>(0, FLAGS_resize_width);    // Create new DB    scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend));    db->Open(argv[3], db::NEW);    scoped_ptr<db::Transaction> txn(db->NewTransaction());    // Storing to db    std::string root_folder(argv[1]);    Datum datum;    int count = 0;    const int kMaxKeyLength = 256;    char key_cstr[kMaxKeyLength];    int data_size = 0;    bool data_size_initialized = false;    for (int line_id = 0; line_id < lines.size(); ++line_id) {      bool status;      std::string enc = encode_type;      if (encoded && !enc.size()) {        // Guess the encoding type from the file name        string fn = lines[line_id].first;        size_t p = fn.rfind('.');        if ( p == fn.npos )          LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'";        enc = fn.substr(p);        std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower);      }      status = ReadImageToDatum(root_folder + lines[line_id].first,          lines[line_id].second, resize_height, resize_width, is_color,          enc, &datum);      if (status == false) continue;      if (check_size) {        if (!data_size_initialized) {          data_size = datum.channels() * datum.height() * datum.width();          data_size_initialized = true;        } else {          const std::string& data = datum.data();          CHECK_EQ(data.size(), data_size) << "Incorrect data field size "              << data.size();        }      }      // sequential      int length = sprintf_s(key_cstr, kMaxKeyLength, "%08d_%s", line_id,          lines[line_id].first.c_str());      // Put in db      string out;      CHECK(datum.SerializeToString(&out));      txn->Put(string(key_cstr, length), out);      if (++count % 1000 == 0) {        // Commit db        txn->Commit();        txn.reset(db->NewTransaction());        LOG(INFO) << "Processed " << count << " files.";      }    }    // write the last batch    if (count % 1000 != 0) {      txn->Commit();      LOG(INFO) << "Processed " << count << " files.";    }  #else    LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV.";  #endif  // USE_OPENCV    return 0;  }

7.修改io.cpp (只贴了部分需要修改的程序)

bool ReadImageToDatum(const string& filename, const std::vector<float> labels,      const int height, const int width, const bool is_color,      const std::string & encoding, Datum* datum) {    cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color);    if (cv_img.data) {      if (encoding.size()) {        if ( (cv_img.channels() == 3) == is_color && !height && !width &&            matchExt(filename, encoding) )          return ReadFileToDatum(filename, labels, datum);        std::vector<uchar> buf;        cv::imencode("."+encoding, cv_img, buf);        datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]),                        buf.size()));       // datum->set_label(label);        datum->mutable_label()->Clear();        for (int label_i = 0; label_i < labels.size(); label_i++){            datum->add_label(labels[label_i]);        }        datum->set_encoded(true);        return true;      }      CVMatToDatum(cv_img, datum);     // datum->set_label(label);      datum->mutable_label()->Clear();      for (int label_i = 0; label_i < labels.size(); label_i++){          datum->add_label(labels[label_i]);      }      return true;    } else {      return false;    }  }  #endif  // USE_OPENCV  bool ReadFileToDatum(const string& filename, const std::vector<float> labels,      Datum* datum) {    std::streampos size;    fstream file(filename.c_str(), ios::in|ios::binary|ios::ate);    if (file.is_open()) {      size = file.tellg();      std::string buffer(size, ' ');      file.seekg(0, ios::beg);      file.read(&buffer[0], size);      file.close();      datum->set_data(buffer);    //  datum->set_label(label);      datum->mutable_label()->Clear();      for (int label_i = 0; label_i < labels.size(); label_i++){          datum->add_label(labels[label_i]);      }      datum->set_encoded(true);      return true;    } else {      return false;    }  }

8.修改io.hpp (只贴了部分需要修改的程序)

bool ReadFileToDatum(const string& filename, const std::vector<float> labels, Datum* datum);  inline bool ReadFileToDatum(const string& filename, Datum* datum) {   // return ReadFileToDatum(filename, -1, datum);      return 0;  }  bool ReadImageToDatum(const string& filename, const std::vector<float> labels,      const int height, const int width, const bool is_color,      const std::string & encoding, Datum* datum);  inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,      const int height, const int width, const bool is_color, Datum* datum) {    return ReadImageToDatum(filename, labels, height, width, is_color,                            "", datum);  }  inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,      const int height, const int width, Datum* datum) {    return ReadImageToDatum(filename, labels, height, width, true, datum);  }  inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,      const bool is_color, Datum* datum) {    return ReadImageToDatum(filename, labels, 0, 0, is_color, datum);  }  inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,      Datum* datum) {    return ReadImageToDatum(filename, labels, 0, 0, true, datum);  }  inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels,      const std::string & encoding, Datum* datum) {    return ReadImageToDatum(filename, labels, 0, 0, true, encoding, datum);  }

完成上述修改之后即可进行编译得到新的convert_image_set等可执行程序。

二.将自己的数据集转成leveldb格式
基本跟http://blog.csdn.net/messiran10/article/details/49159559的流程一样，主要是以下两点需要变化：

1.样本说明文件
train_samples/10007.jpg 0.491667 0.529412 0.450000 0.352941 需要把一维的label转成4维的label

2.模型配置文件
需要把softmax loss层换成平方损失层
需要去掉accuracy层（否则会出错）

https://github.com/olddocks/caffe-facialkp

0 0