如何用caffe解决回归问题
来源:互联网 发布:dg恢复数据 编辑:程序博客网 时间:2024/06/06 05:50
最近在基于caffe做目标检测的问题,需要利用caffe来训练一个回归网络,用来预测object在图像中的位置(x1,y1,width,height)。但是现有的caffe版本(happynear版本)只适用于二分类问题的数据集转换,所以需要修改caffe源码,使之也可以转换回归问题的数据集。
主要是参照 http://blog.csdn.net/baobei0112/article/details/47606559 进行修改。但是这份博客使用的不是happynear的caffe版本,所以源码改动的地方差异较大。下面我会记录我改动的地方。
一.源码修改
1.修改caffe.proto,位于/src/caffe/proto
36行改成 repeated float label = 5;,然后运行extract_proto.bat
2.修改data_layer.hpp
#ifndef CAFFE_DATA_LAYERS_HPP_ #define CAFFE_DATA_LAYERS_HPP_ #include <string> #include <utility> #include <vector> #include "hdf5/hdf5.h" #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/data_reader.hpp" #include "caffe/data_transformer.hpp" #include "caffe/filler.hpp" #include "caffe/internal_thread.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/blocking_queue.hpp" #include "caffe/util/db.hpp" #define HDF5_DATA_DATASET_NAME "data" #define HDF5_DATA_LABEL_NAME "label" namespace caffe { /** * @brief Provides base for data layers that feed blobs to the Net. * * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class BaseDataLayer : public Layer<Dtype> { public: explicit BaseDataLayer(const LayerParameter& param); // LayerSetUp: implements common data layer setup functionality, and calls // DataLayerSetUp to do special data layer setup for individual layer types. // This method may not be overridden except by the BasePrefetchingDataLayer. virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // Data layers should be shared by multiple solvers in parallel virtual inline bool ShareInParallel() const { return true; } virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { } virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { } virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { } protected: TransformationParameter transform_param_; shared_ptr<DataTransformer<Dtype> > data_transformer_; bool output_labels_; }; template <typename Dtype> class Batch { public: Blob<Dtype> data_, label_; }; template <typename Dtype> class BasePrefetchingDataLayer : public BaseDataLayer<Dtype>, public InternalThread { public: explicit BasePrefetchingDataLayer(const LayerParameter& param); // LayerSetUp: implements common data layer setup functionality, and calls // DataLayerSetUp to do special data layer setup for individual layer types. // This method may not be overridden. void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // Prefetches batches (asynchronously if to GPU memory) static const int PREFETCH_COUNT = 3; protected: virtual void InternalThreadEntry(); virtual void load_batch(Batch<Dtype>* batch) = 0; Batch<Dtype> prefetch_[PREFETCH_COUNT]; BlockingQueue<Batch<Dtype>*> prefetch_free_; BlockingQueue<Batch<Dtype>*> prefetch_full_; Blob<Dtype> transformed_data_; }; template <typename Dtype> class DataLayer : public BasePrefetchingDataLayer<Dtype> { public: explicit DataLayer(const LayerParameter& param); virtual ~DataLayer(); virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // DataLayer uses DataReader instead for sharing for parallelism virtual inline bool ShareInParallel() const { return false; } virtual inline const char* type() const { return "Data"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int MinTopBlobs() const { return 1; } virtual inline int MaxTopBlobs() const { return 2; } protected: virtual void load_batch(Batch<Dtype>* batch); DataReader reader_; }; /** * @brief Provides data to the Net generated by a Filler. * * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class DummyDataLayer : public Layer<Dtype> { public: explicit DummyDataLayer(const LayerParameter& param) : Layer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // Data layers should be shared by multiple solvers in parallel virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { } virtual inline const char* type() const { return "DummyData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int MinTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { } virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { } vector<shared_ptr<Filler<Dtype> > > fillers_; vector<bool> refill_; }; /** * @brief Provides data to the Net from HDF5 files. * * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class HDF5DataLayer : public Layer<Dtype> { public: explicit HDF5DataLayer(const LayerParameter& param) : Layer<Dtype>(param) {} virtual ~HDF5DataLayer(); virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // Data layers should be shared by multiple solvers in parallel virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { } virtual inline const char* type() const { return "HDF5Data"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int MinTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { } virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { } virtual void LoadHDF5FileData(const char* filename); std::vector<std::string> hdf_filenames_; unsigned int num_files_; unsigned int current_file_; hsize_t current_row_; std::vector<shared_ptr<Blob<Dtype> > > hdf_blobs_; std::vector<unsigned int> data_permutation_; std::vector<unsigned int> file_permutation_; }; /** * @brief Write blobs to disk as HDF5 files. * * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class HDF5OutputLayer : public Layer<Dtype> { public: explicit HDF5OutputLayer(const LayerParameter& param) : Layer<Dtype>(param), file_opened_(false) {} virtual ~HDF5OutputLayer(); virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); // Data layers should be shared by multiple solvers in parallel virtual inline bool ShareInParallel() const { return true; } // Data layers have no bottoms, so reshaping is trivial. virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { } virtual inline const char* type() const { return "HDF5Output"; } // TODO: no limit on the number of blobs virtual inline int ExactNumBottomBlobs() const { return 2; } virtual inline int ExactNumTopBlobs() const { return 0; } inline std::string file_name() const { return file_name_; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); virtual void SaveBlobs(); bool file_opened_; std::string file_name_; hid_t file_id_; Blob<Dtype> data_blob_; Blob<Dtype> label_blob_; }; /** * @brief Provides data to the Net from image files. * * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class ImageDataLayer : public BasePrefetchingDataLayer<Dtype> { public: explicit ImageDataLayer(const LayerParameter& param) : BasePrefetchingDataLayer<Dtype>(param) {} virtual ~ImageDataLayer(); virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "ImageData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int ExactNumTopBlobs() const { return 2; } vector<std::pair<std::string, std:: vector<float>> > lines_; shared_ptr<Caffe::RNG> prefetch_rng_; virtual void ShuffleImages(); virtual void load_batch(Batch<Dtype>* batch); int lines_id_; }; /** * @brief Provides data to the Net from memory. * * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class MemoryDataLayer : public BaseDataLayer<Dtype> { public: explicit MemoryDataLayer(const LayerParameter& param) : BaseDataLayer<Dtype>(param), has_new_data_(false) {} virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "MemoryData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int ExactNumTopBlobs() const { return 2; } virtual void AddDatumVector(const vector<Datum>& datum_vector); #ifdef USE_OPENCV virtual void AddMatVector(const vector<cv::Mat>& mat_vector, const vector<int>& labels); #endif // USE_OPENCV // Reset should accept const pointers, but can't, because the memory // will be given to Blob, which is mutable void Reset(Dtype* data, Dtype* label, int n); void set_batch_size(int new_size); int batch_size() { return batch_size_; } int channels() { return channels_; } int height() { return height_; } int width() { return width_; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); int batch_size_, channels_, height_, width_, size_; Dtype* data_; Dtype* labels_; int n_; size_t pos_; Blob<Dtype> added_data_; Blob<Dtype> added_label_; bool has_new_data_; }; /** * @brief Provides data to the Net from windows of images files, specified * by a window data file. * * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class WindowDataLayer : public BasePrefetchingDataLayer<Dtype> { public: explicit WindowDataLayer(const LayerParameter& param) : BasePrefetchingDataLayer<Dtype>(param) {} virtual ~WindowDataLayer(); virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "WindowData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int ExactNumTopBlobs() const { return 2; } protected: virtual unsigned int PrefetchRand(); virtual void load_batch(Batch<Dtype>* batch); shared_ptr<Caffe::RNG> prefetch_rng_; vector<std::pair<std::string, vector<int> > > image_database_; enum WindowField { IMAGE_INDEX, LABEL, OVERLAP, X1, Y1, X2, Y2, NUM }; vector<vector<float> > fg_windows_; vector<vector<float> > bg_windows_; Blob<Dtype> data_mean_; vector<Dtype> mean_values_; bool has_mean_file_; bool has_mean_values_; bool cache_images_; vector<std::pair<std::string, Datum > > image_database_cache_; }; /** * @brief Provides data to the Net from image files. * * TODO(dox): thorough documentation for Forward and proto params. */ template <typename Dtype> class MultiLabelImageDataLayer : public BasePrefetchingDataLayer<Dtype> { public: explicit MultiLabelImageDataLayer(const LayerParameter& param) : BasePrefetchingDataLayer<Dtype>(param) {} virtual ~MultiLabelImageDataLayer(); virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "MultiLabelImageData"; } virtual inline int ExactNumBottomBlobs() const { return 0; } virtual inline int ExactNumTopBlobs() const { return 2; } protected: shared_ptr<Caffe::RNG> prefetch_rng_; virtual void ShuffleImages(); virtual void load_batch(Batch<Dtype>* batch); vector<std::pair<std::string, shared_ptr<vector<Dtype> > > > lines_; int label_count; int lines_id_; }; } // namespace caffe #endif // CAFFE_DATA_LAYERS_HPP_
3.改动data_layer.cpp
#ifdef USE_OPENCV #include <opencv2/core/core.hpp> #endif // USE_OPENCV #include <stdint.h> #include <vector> #include "caffe/data_layers.hpp" #include "caffe/proto/caffe.pb.h" #include "caffe/util/benchmark.hpp" namespace caffe { template <typename Dtype> DataLayer<Dtype>::DataLayer(const LayerParameter& param) : BasePrefetchingDataLayer<Dtype>(param), reader_(param) { } template <typename Dtype> DataLayer<Dtype>::~DataLayer() { this->StopInternalThread(); } template <typename Dtype> void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int batch_size = this->layer_param_.data_param().batch_size(); // Read a data point, and use it to initialize the top blob. Datum& datum = *(reader_.full().peek()); // Use data_transformer to infer the expected blob shape from datum. vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape top[0] and prefetch_data according to the batch_size. top_shape[0] = batch_size; top[0]->Reshape(top_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].data_.Reshape(top_shape); } LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label if (this->output_labels_) { /* vector<int> label_shape(1, batch_size); top[1]->Reshape(label_shape); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(label_shape); } */ top[1]->Reshape(batch_size,4,1,1); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(batch_size, 4, 1, 1); } } } // This function is called on prefetch thread template<typename Dtype> void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(batch->data_.count()); CHECK(this->transformed_data_.count()); // Reshape according to the first datum of each batch // on single input batches allows for inputs of varying dimension. const int batch_size = this->layer_param_.data_param().batch_size(); Datum& datum = *(reader_.full().peek()); // Use data_transformer to infer the expected blob shape from datum. vector<int> top_shape = this->data_transformer_->InferBlobShape(datum); this->transformed_data_.Reshape(top_shape); // Reshape batch according to the batch_size. top_shape[0] = batch_size; batch->data_.Reshape(top_shape); Dtype* top_data = batch->data_.mutable_cpu_data(); Dtype* top_label = NULL; // suppress warnings about uninitialized variables if (this->output_labels_) { top_label = batch->label_.mutable_cpu_data(); } /* if (this->output_labels_) { for (int label_i = 0; label_i < datum.label_size(); label_i++){ top_label[item_id*datum.label_size() + label_i] = datum.label(label_i); } } */ for (int item_id = 0; item_id < batch_size; ++item_id) { timer.Start(); // get a datum Datum& datum = *(reader_.full().pop("Waiting for data")); read_time += timer.MicroSeconds(); timer.Start(); // Apply data transformations (mirror, scale, crop...) int offset = batch->data_.offset(item_id); this->transformed_data_.set_cpu_data(top_data + offset); this->data_transformer_->Transform(datum, &(this->transformed_data_)); // Copy label. if (this->output_labels_) { // top_label[item_id] = datum.label(); for (int label_i = 0; label_i < datum.label_size(); label_i++){ top_label[item_id*datum.label_size()+label_i] = datum.label(label_i); } } trans_time += timer.MicroSeconds(); reader_.free().push(const_cast<Datum*>(&datum)); } timer.Stop(); batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; } INSTANTIATE_CLASS(DataLayer); REGISTER_LAYER_CLASS(Data); } // namespace caffe
4.修改image_data_layer.cpp中label部分
#ifdef USE_OPENCV #include <opencv2/core/core.hpp> #include <fstream> // NOLINT(readability/streams) #include <iostream> // NOLINT(readability/streams) #include <string> #include <utility> #include <vector> #include "caffe/data_layers.hpp" #include "caffe/util/benchmark.hpp" #include "caffe/util/io.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/util/rng.hpp" namespace caffe { template <typename Dtype> ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() { this->StopInternalThread(); } template <typename Dtype> void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int new_height = this->layer_param_.image_data_param().new_height(); const int new_width = this->layer_param_.image_data_param().new_width(); const bool is_color = this->layer_param_.image_data_param().is_color(); string root_folder = this->layer_param_.image_data_param().root_folder(); CHECK((new_height == 0 && new_width == 0) || (new_height > 0 && new_width > 0)) << "Current implementation requires " "new_height and new_width to be set at the same time."; // Read the file with filenames and labels const string& source = this->layer_param_.image_data_param().source(); LOG(INFO) << "Opening file " << source; std::ifstream infile(source.c_str()); string filename; //int label; float x1, y1, x2, y2; while (infile >> filename >> x1 >> y1 >> x2 >> y2) { std::vector<float> vec_label; vec_label.push_back(x1); vec_label.push_back(y1); vec_label.push_back(x2); vec_label.push_back(y2); lines_.push_back(std::make_pair(filename, vec_label)); } if (this->layer_param_.image_data_param().shuffle()) { // randomly shuffle data LOG(INFO) << "Shuffling data"; const unsigned int prefetch_rng_seed = caffe_rng_rand(); prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed)); ShuffleImages(); } LOG(INFO) << "A total of " << lines_.size() << " images."; lines_id_ = 0; // Check if we would need to randomly skip a few data points if (this->layer_param_.image_data_param().rand_skip()) { unsigned int skip = caffe_rng_rand() % this->layer_param_.image_data_param().rand_skip(); LOG(INFO) << "Skipping first " << skip << " data points."; CHECK_GT(lines_.size(), skip) << "Not enough points to skip"; lines_id_ = skip; } // Read an image, and use it to initialize the top blob. cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; // Use data_transformer to infer the expected blob shape from a cv_image. vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img); this->transformed_data_.Reshape(top_shape); // Reshape prefetch_data and top[0] according to the batch_size. const int batch_size = this->layer_param_.image_data_param().batch_size(); CHECK_GT(batch_size, 0) << "Positive batch size required"; top_shape[0] = batch_size; for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].data_.Reshape(top_shape); } top[0]->Reshape(top_shape); LOG(INFO) << "output data size: " << top[0]->num() << "," << top[0]->channels() << "," << top[0]->height() << "," << top[0]->width(); // label vector<int> label_shape(1, batch_size); top[1]->Reshape(batch_size,4,1,1); for (int i = 0; i < this->PREFETCH_COUNT; ++i) { this->prefetch_[i].label_.Reshape(batch_size, 4, 1, 1); } } template <typename Dtype> void ImageDataLayer<Dtype>::ShuffleImages() { caffe::rng_t* prefetch_rng = static_cast<caffe::rng_t*>(prefetch_rng_->generator()); shuffle(lines_.begin(), lines_.end(), prefetch_rng); } // This function is called on prefetch thread template <typename Dtype> void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) { CPUTimer batch_timer; batch_timer.Start(); double read_time = 0; double trans_time = 0; CPUTimer timer; CHECK(batch->data_.count()); CHECK(this->transformed_data_.count()); ImageDataParameter image_data_param = this->layer_param_.image_data_param(); const int batch_size = image_data_param.batch_size(); const int new_height = image_data_param.new_height(); const int new_width = image_data_param.new_width(); const bool is_color = image_data_param.is_color(); string root_folder = image_data_param.root_folder(); // Reshape according to the first image of each batch // on single input batches allows for inputs of varying dimension. cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; // Use data_transformer to infer the expected blob shape from a cv_img. vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img); this->transformed_data_.Reshape(top_shape); // Reshape batch according to the batch_size. top_shape[0] = batch_size; batch->data_.Reshape(top_shape); Dtype* prefetch_data = batch->data_.mutable_cpu_data(); //Dtype* prefetch_label = batch->label_.mutable_cpu_data(); Dtype* prefetch_label = NULL; // datum scales const int lines_size = lines_.size(); for (int item_id = 0; item_id < batch_size; ++item_id) { // get a blob timer.Start(); CHECK_GT(lines_size, lines_id_); cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first, new_height, new_width, is_color); CHECK(cv_img.data) << "Could not load " << lines_[lines_id_].first; read_time += timer.MicroSeconds(); timer.Start(); // Apply transformations (mirror, crop...) to the image int offset = batch->data_.offset(item_id); this->transformed_data_.set_cpu_data(prefetch_data + offset); this->data_transformer_->Transform(cv_img, &(this->transformed_data_)); trans_time += timer.MicroSeconds(); for (int label_i = 0; label_i < (lines_[lines_id_].second).size(); label_i++){ prefetch_label[item_id*(lines_[lines_id_].second).size() + label_i] = (lines_[lines_id_].second)[label_i]; } //prefetch_label[item_id] = lines_[lines_id_].second; // go to the next iter lines_id_++; if (lines_id_ >= lines_size) { // We have reached the end. Restart from the first. DLOG(INFO) << "Restarting data prefetching from start."; lines_id_ = 0; if (this->layer_param_.image_data_param().shuffle()) { ShuffleImages(); } } } batch_timer.Stop(); DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms."; DLOG(INFO) << " Read time: " << read_time / 1000 << " ms."; DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms."; } INSTANTIATE_CLASS(ImageDataLayer); REGISTER_LAYER_CLASS(ImageData); } // namespace caffe #endif // USE_OPENCV
5.修改memory_data_layer.cpp
#ifdef USE_OPENCV #include <opencv2/core/core.hpp> #endif // USE_OPENCV #include <vector> #include "caffe/data_layers.hpp" namespace caffe { template <typename Dtype> void MemoryDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { batch_size_ = this->layer_param_.memory_data_param().batch_size(); channels_ = this->layer_param_.memory_data_param().channels(); height_ = this->layer_param_.memory_data_param().height(); width_ = this->layer_param_.memory_data_param().width(); size_ = channels_ * height_ * width_; CHECK_GT(batch_size_ * size_, 0) << "batch_size, channels, height, and width must be specified and" " positive in memory_data_param"; vector<int> label_shape(1, batch_size_); top[0]->Reshape(batch_size_, channels_, height_, width_); top[1]->Reshape(label_shape); added_data_.Reshape(batch_size_, channels_, height_, width_); added_label_.Reshape(label_shape); data_ = NULL; labels_ = NULL; added_data_.cpu_data(); added_label_.cpu_data(); } template <typename Dtype> void MemoryDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) { CHECK(!has_new_data_) << "Can't add data until current data has been consumed."; size_t num = datum_vector.size(); CHECK_GT(num, 0) << "There is no datum to add."; CHECK_EQ(num % batch_size_, 0) << "The added data must be a multiple of the batch size."; added_data_.Reshape(num, channels_, height_, width_); added_label_.Reshape(num, 1, 1, 1); // Apply data transformations (mirror, scale, crop...) this->data_transformer_->Transform(datum_vector, &added_data_); // Copy Labels Dtype* top_label = added_label_.mutable_cpu_data(); for (int item_id = 0; item_id < num; ++item_id) { //top_label[item_id] = datum_vector[item_id].label(); int label_num = datum_vector[item_id].label_size(); for (int label_i = 0; label_i < label_num; label_i++){ top_label[item_id*label_num + label_i] = datum_vector[item_id].label(label_i); } } // num_images == batch_size_ Dtype* top_data = added_data_.mutable_cpu_data(); Reset(top_data, top_label, num); has_new_data_ = true; } #ifdef USE_OPENCV template <typename Dtype> void MemoryDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector, const vector<int>& labels) { size_t num = mat_vector.size(); CHECK(!has_new_data_) << "Can't add mat until current data has been consumed."; CHECK_GT(num, 0) << "There is no mat to add"; CHECK_EQ(num % batch_size_, 0) << "The added data must be a multiple of the batch size."; added_data_.Reshape(num, channels_, height_, width_); added_label_.Reshape(num, 1, 1, 1); // Apply data transformations (mirror, scale, crop...) this->data_transformer_->Transform(mat_vector, &added_data_); // Copy Labels Dtype* top_label = added_label_.mutable_cpu_data(); for (int item_id = 0; item_id < num; ++item_id) { top_label[item_id] = labels[item_id]; } // num_images == batch_size_ Dtype* top_data = added_data_.mutable_cpu_data(); Reset(top_data, top_label, num); has_new_data_ = true; } #endif // USE_OPENCV template <typename Dtype> void MemoryDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) { CHECK(data); CHECK(labels); CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size"; // Warn with transformation parameters since a memory array is meant to // be generic and no transformations are done with Reset(). if (this->layer_param_.has_transform_param()) { LOG(WARNING) << this->type() << " does not transform array data on Reset()"; } data_ = data; labels_ = labels; n_ = n; pos_ = 0; } template <typename Dtype> void MemoryDataLayer<Dtype>::set_batch_size(int new_size) { CHECK(!has_new_data_) << "Can't change batch_size until current data has been consumed."; batch_size_ = new_size; added_data_.Reshape(batch_size_, channels_, height_, width_); added_label_.Reshape(batch_size_, 1, 1, 1); } template <typename Dtype> void MemoryDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK(data_) << "MemoryDataLayer needs to be initalized by calling Reset"; top[0]->Reshape(batch_size_, channels_, height_, width_); top[1]->Reshape(batch_size_, 1, 1, 1); top[0]->set_cpu_data(data_ + pos_ * size_); top[1]->set_cpu_data(labels_ + pos_); pos_ = (pos_ + batch_size_) % n_; if (pos_ == 0) has_new_data_ = false; } INSTANTIATE_CLASS(MemoryDataLayer); REGISTER_LAYER_CLASS(MemoryData); } // namespace caffe
6.修改convet_imaget.cpp
// This program converts a set of images to a lmdb/leveldb by storing them // as Datum proto buffers. // Usage: // convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME // // where ROOTFOLDER is the root folder that holds all the images, and LISTFILE // should be a list of files as well as their labels, in the format as // subfolder1/file1.JPEG 7 // .... #include <algorithm> #include <fstream> // NOLINT(readability/streams) #include <string> #include <utility> #include <vector> #include "boost/scoped_ptr.hpp" #include "gflags/gflags.h" #include "glog/logging.h" #include "caffe/proto/caffe.pb.h" #include "caffe/util/db.hpp" #include "caffe/util/io.hpp" #include "caffe/util/rng.hpp" using namespace caffe; // NOLINT(build/namespaces) using std::pair; using boost::scoped_ptr; DEFINE_bool(gray, false, "When this option is on, treat images as grayscale ones"); DEFINE_bool(shuffle, false, "Randomly shuffle the order of images and their labels"); DEFINE_string(backend, "lmdb", "The backend {lmdb, leveldb} for storing the result"); DEFINE_int32(resize_width, 0, "Width images are resized to"); DEFINE_int32(resize_height, 0, "Height images are resized to"); DEFINE_bool(check_size, false, "When this option is on, check that all the datum have the same size"); DEFINE_bool(encoded, false, "When this option is on, the encoded image will be save in datum"); DEFINE_string(encode_type, "", "Optional: What type should we encode the image as ('png','jpg',...)."); int main(int argc, char** argv) { #ifdef USE_OPENCV //::google::InitGoogleLogging(argv[0]); // Print output to stderr (while still logging) FLAGS_alsologtostderr = 1; #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif gflags::SetUsageMessage("Convert a set of images to the leveldb/lmdb\n" "format used as input for Caffe.\n" "Usage:\n" " convert_imageset [FLAGS] ROOTFOLDER/ LISTFILE DB_NAME\n" "The ImageNet dataset for the training demo is at\n" " http://www.image-net.org/download-images\n"); caffe::GlobalInit(&argc, &argv); if (argc < 4) { gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/convert_imageset"); return 1; } const bool is_color = !FLAGS_gray; const bool check_size = FLAGS_check_size; const bool encoded = FLAGS_encoded; const string encode_type = FLAGS_encode_type; std::ifstream infile(argv[2]); std::vector<std::pair<std::string, vector<float>> > lines; std::string filename; /* int label; while (infile >> filename >> label) { lines.push_back(std::make_pair(filename, label)); } */ float x1, y1, x2, y2; while (infile >> filename >> x1 >> y1 >> x2 >> y2) { std::vector<float> vec_label; vec_label.push_back(x1); vec_label.push_back(y1); vec_label.push_back(x2); vec_label.push_back(y2); lines.push_back(std::make_pair(filename, vec_label)); } if (FLAGS_shuffle) { // randomly shuffle data LOG(INFO) << "Shuffling data"; shuffle(lines.begin(), lines.end()); } LOG(INFO) << "A total of " << lines.size() << " images."; if (encode_type.size() && !encoded) LOG(INFO) << "encode_type specified, assuming encoded=true."; int resize_height = std::max<int>(0, FLAGS_resize_height); int resize_width = std::max<int>(0, FLAGS_resize_width); // Create new DB scoped_ptr<db::DB> db(db::GetDB(FLAGS_backend)); db->Open(argv[3], db::NEW); scoped_ptr<db::Transaction> txn(db->NewTransaction()); // Storing to db std::string root_folder(argv[1]); Datum datum; int count = 0; const int kMaxKeyLength = 256; char key_cstr[kMaxKeyLength]; int data_size = 0; bool data_size_initialized = false; for (int line_id = 0; line_id < lines.size(); ++line_id) { bool status; std::string enc = encode_type; if (encoded && !enc.size()) { // Guess the encoding type from the file name string fn = lines[line_id].first; size_t p = fn.rfind('.'); if ( p == fn.npos ) LOG(WARNING) << "Failed to guess the encoding of '" << fn << "'"; enc = fn.substr(p); std::transform(enc.begin(), enc.end(), enc.begin(), ::tolower); } status = ReadImageToDatum(root_folder + lines[line_id].first, lines[line_id].second, resize_height, resize_width, is_color, enc, &datum); if (status == false) continue; if (check_size) { if (!data_size_initialized) { data_size = datum.channels() * datum.height() * datum.width(); data_size_initialized = true; } else { const std::string& data = datum.data(); CHECK_EQ(data.size(), data_size) << "Incorrect data field size " << data.size(); } } // sequential int length = sprintf_s(key_cstr, kMaxKeyLength, "%08d_%s", line_id, lines[line_id].first.c_str()); // Put in db string out; CHECK(datum.SerializeToString(&out)); txn->Put(string(key_cstr, length), out); if (++count % 1000 == 0) { // Commit db txn->Commit(); txn.reset(db->NewTransaction()); LOG(INFO) << "Processed " << count << " files."; } } // write the last batch if (count % 1000 != 0) { txn->Commit(); LOG(INFO) << "Processed " << count << " files."; } #else LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV."; #endif // USE_OPENCV return 0; }
7.修改io.cpp (只贴了部分需要修改的程序)
bool ReadImageToDatum(const string& filename, const std::vector<float> labels, const int height, const int width, const bool is_color, const std::string & encoding, Datum* datum) { cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color); if (cv_img.data) { if (encoding.size()) { if ( (cv_img.channels() == 3) == is_color && !height && !width && matchExt(filename, encoding) ) return ReadFileToDatum(filename, labels, datum); std::vector<uchar> buf; cv::imencode("."+encoding, cv_img, buf); datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]), buf.size())); // datum->set_label(label); datum->mutable_label()->Clear(); for (int label_i = 0; label_i < labels.size(); label_i++){ datum->add_label(labels[label_i]); } datum->set_encoded(true); return true; } CVMatToDatum(cv_img, datum); // datum->set_label(label); datum->mutable_label()->Clear(); for (int label_i = 0; label_i < labels.size(); label_i++){ datum->add_label(labels[label_i]); } return true; } else { return false; } } #endif // USE_OPENCV bool ReadFileToDatum(const string& filename, const std::vector<float> labels, Datum* datum) { std::streampos size; fstream file(filename.c_str(), ios::in|ios::binary|ios::ate); if (file.is_open()) { size = file.tellg(); std::string buffer(size, ' '); file.seekg(0, ios::beg); file.read(&buffer[0], size); file.close(); datum->set_data(buffer); // datum->set_label(label); datum->mutable_label()->Clear(); for (int label_i = 0; label_i < labels.size(); label_i++){ datum->add_label(labels[label_i]); } datum->set_encoded(true); return true; } else { return false; } }
8.修改io.hpp (只贴了部分需要修改的程序)
bool ReadFileToDatum(const string& filename, const std::vector<float> labels, Datum* datum); inline bool ReadFileToDatum(const string& filename, Datum* datum) { // return ReadFileToDatum(filename, -1, datum); return 0; } bool ReadImageToDatum(const string& filename, const std::vector<float> labels, const int height, const int width, const bool is_color, const std::string & encoding, Datum* datum); inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels, const int height, const int width, const bool is_color, Datum* datum) { return ReadImageToDatum(filename, labels, height, width, is_color, "", datum); } inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels, const int height, const int width, Datum* datum) { return ReadImageToDatum(filename, labels, height, width, true, datum); } inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels, const bool is_color, Datum* datum) { return ReadImageToDatum(filename, labels, 0, 0, is_color, datum); } inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels, Datum* datum) { return ReadImageToDatum(filename, labels, 0, 0, true, datum); } inline bool ReadImageToDatum(const string& filename, const std::vector<float> labels, const std::string & encoding, Datum* datum) { return ReadImageToDatum(filename, labels, 0, 0, true, encoding, datum); }
完成上述修改之后即可进行编译得到新的convert_image_set等可执行程序。
二.将自己的数据集转成leveldb格式
基本跟http://blog.csdn.net/messiran10/article/details/49159559的流程一样,主要是以下两点需要变化:
1.样本说明文件
train_samples/10007.jpg 0.491667 0.529412 0.450000 0.352941 需要把一维的label转成4维的label
2.模型配置文件
需要把softmax loss层换成 平方损失层
需要去掉accuracy层(否则会出错)
https://github.com/olddocks/caffe-facialkp
- 如何用caffe解决回归问题
- Caffe 用于解决预测(回归)问题
- 如何用gdb调试caffe
- 如何通过牛顿方法解决Logistic回归问题 By 机器之心2017年8月09日 16:30 本文介绍了牛顿方法(Newton's Method),以及如何用它来解决 logistic 回归。log
- 如何用汇编语言解决汉诺塔问题……
- 如何用索引器解决下面的问题啊?
- 【教你如何用驱动人生解决驱动问题】
- 如何用命令行解决可执行文件的权限问题
- kResourceShowAll模式黑边问题如何用图片填充解决
- 如何用报表解决销售工作衡量问题
- 如何用CORS来解决JS中跨域的问题
- 如何用Spark解决一些经典MapReduce问题
- 如何用c语言解决数字河问题
- 如何用c语言解决假身份证问题
- 如何用C语言解决逻辑问题幸运的数字
- 如何用excel做多元回归分析
- 如何用fsck解决系统故障?
- windows-caffe 回归问题浅析
- LOAD DATA FROM MASTER的说明
- CKEDitor支持WORD格式的缩进问题
- OS X 10.11 安装Cocoapods 出现问题的解决方法
- 菜鸟成长记-UIScrollView
- android 常见的补间动画
- 如何用caffe解决回归问题
- FFMPEG视音频编解码零基础学习方法
- Java环境变量的配置 (Win10环境下)
- leetcode 12. Integer to Roman
- whitespaceAndNewlineCharacterSet stringByTrimmingCharactersInSet
- C++如何复制文件、移动文件、删除文件、新建目录?
- Linux同步工具:rsync
- swift简单学习之“ ? !”
- 单元测试UnitTest