Caffe源码 - ImageDataLayer

Caffe 官方提供的直接从 image 文件读取图像数据及对应label.

# include \<image_data_layer.hpp>


1. image_data_layer.hpp


ImageDataLayer (const LayerParameter &param)

DataLayerSetUp (const vector< Blob< Dtype > > &bottom, const vector< Blob< Dtype > > &top)

ExactNumBottomBlobs () const // 返回该层的 bottom blobs 数目,如果没有bottom blob,则返回-1.

ExactNumTopBlobs () const // 返回该层的 top blobs 数目, 如果没有 top blob,则返回-1.



ShuffleImages ()

load_batch (Batch< Dtype > *batch)

Protected Attributes:




2. image_data_layer.cpp

#ifdef USE_OPENCV#include <opencv2/core/core.hpp>#include <fstream>  // NOLINT(readability/streams)#include <iostream>  // NOLINT(readability/streams)#include <string>#include <utility>#include <vector>#include "caffe/data_transformer.hpp"#include "caffe/layers/base_data_layer.hpp"#include "caffe/layers/image_data_layer.hpp"#include "caffe/util/benchmark.hpp"#include "caffe/util/io.hpp"#include "caffe/util/math_functions.hpp"#include "caffe/util/rng.hpp"namespace caffe {template <typename Dtype>ImageDataLayer<Dtype>::~ImageDataLayer<Dtype>() {  this->StopInternalThread();}template <typename Dtype>void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  const int new_height = this->layer_param_.image_data_param().new_height();  const int new_width  = this->layer_param_.image_data_param().new_width();  const bool is_color  = this->layer_param_.image_data_param().is_color();  string root_folder = this->layer_param_.image_data_param().root_folder();  CHECK((new_height == 0 && new_width == 0) ||      (new_height > 0 && new_width > 0)) << "Current implementation requires "      "new_height and new_width to be set at the same time.";  // Read the file with filenames and labels  const string& source = this->layer_param_.image_data_param().source();  LOG(INFO) << "Opening file " << source;  std::ifstream infile(source.c_str());  string line;  size_t pos;  int label;  while (std::getline(infile, line)) {    pos = line.find_last_of(' ');    label = atoi(line.substr(pos + 1).c_str());    lines_.push_back(std::make_pair(line.substr(0, pos), label));  }  CHECK(!lines_.empty()) << "File is empty";  if (this->layer_param_.image_data_param().shuffle()) {    // randomly shuffle data    // 随机打乱数据顺序    LOG(INFO) << "Shuffling data";    const unsigned int prefetch_rng_seed = caffe_rng_rand();    prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));    ShuffleImages();  } else {    if (this->phase_ == TRAIN && Caffe::solver_rank() > 0 &&        this->layer_param_.image_data_param().rand_skip() == 0) {      LOG(WARNING) << "Shuffling or skipping recommended for multi-GPU";    }  }  LOG(INFO) << "A total of " << lines_.size() << " images.";  lines_id_ = 0;  // Check if we would need to randomly skip a few data points  // 随机跳过部分数据  if (this->layer_param_.image_data_param().rand_skip()) {    unsigned int skip = caffe_rng_rand() %        this->layer_param_.image_data_param().rand_skip();    LOG(INFO) << "Skipping first " << skip << " data points.";    CHECK_GT(lines_.size(), skip) << "Not enough points to skip";    lines_id_ = skip;  }  // Read an image, and use it to initialize the top blob.  // 读取图片,并放入 top blob.  cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,                                    new_height, new_width, is_color);  CHECK( << "Could not load " << lines_[lines_id_].first;  // Use data_transformer to infer the expected blob shape from a cv_image.  vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);  this->transformed_data_.Reshape(top_shape);  // Reshape prefetch_data and top[0] according to the batch_size.  const int batch_size = this->layer_param_.image_data_param().batch_size();  CHECK_GT(batch_size, 0) << "Positive batch size required";  top_shape[0] = batch_size;  for (int i = 0; i < this->prefetch_.size(); ++i) {    this->prefetch_[i]->data_.Reshape(top_shape);  }  top[0]->Reshape(top_shape);  LOG(INFO) << "output data size: " << top[0]->num() << ","      << top[0]->channels() << "," << top[0]->height() << ","      << top[0]->width();  // label  // 数据标签  vector<int> label_shape(1, batch_size);  top[1]->Reshape(label_shape);  for (int i = 0; i < this->prefetch_.size(); ++i) {    this->prefetch_[i]->label_.Reshape(label_shape);  }}template <typename Dtype>void ImageDataLayer<Dtype>::ShuffleImages() {  caffe::rng_t* prefetch_rng =      static_cast<caffe::rng_t*>(prefetch_rng_->generator());  shuffle(lines_.begin(), lines_.end(), prefetch_rng);}// This function is called on prefetch thread  // 预读取数据线程template <typename Dtype>void ImageDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {  CPUTimer batch_timer;  batch_timer.Start();  double read_time = 0;  double trans_time = 0;  CPUTimer timer;  CHECK(batch->data_.count());  CHECK(this->transformed_data_.count());  ImageDataParameter image_data_param = this->layer_param_.image_data_param();  const int batch_size = image_data_param.batch_size();  const int new_height = image_data_param.new_height();  const int new_width = image_data_param.new_width();  const bool is_color = image_data_param.is_color();  string root_folder = image_data_param.root_folder();  // Reshape according to the first image of each batch  // on single input batches allows for inputs of varying dimension.  // 读取图像数据  // 数据维度调整转换  cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,      new_height, new_width, is_color);  CHECK( << "Could not load " << lines_[lines_id_].first;  // Use data_transformer to infer the expected blob shape from a cv_img.  vector<int> top_shape = this->data_transformer_->InferBlobShape(cv_img);  this->transformed_data_.Reshape(top_shape);  // Reshape batch according to the batch_size.  top_shape[0] = batch_size;  batch->data_.Reshape(top_shape);  Dtype* prefetch_data = batch->data_.mutable_cpu_data();  Dtype* prefetch_label = batch->label_.mutable_cpu_data();  // datum scales  const int lines_size = lines_.size();  for (int item_id = 0; item_id < batch_size; ++item_id) {    // get a blob    timer.Start();    CHECK_GT(lines_size, lines_id_);    cv::Mat cv_img = ReadImageToCVMat(root_folder + lines_[lines_id_].first,        new_height, new_width, is_color);    CHECK( << "Could not load " << lines_[lines_id_].first;    read_time += timer.MicroSeconds();    timer.Start();    // Apply transformations (mirror, crop...) to the image    // 图像处理,如 mirror,crop 等    int offset = batch->data_.offset(item_id);    this->transformed_data_.set_cpu_data(prefetch_data + offset);    this->data_transformer_->Transform(cv_img, &(this->transformed_data_));    trans_time += timer.MicroSeconds();    prefetch_label[item_id] = lines_[lines_id_].second;    // go to the next iter    lines_id_++;    if (lines_id_ >= lines_size) {      // We have reached the end. Restart from the first.      DLOG(INFO) << "Restarting data prefetching from start.";      lines_id_ = 0;      if (this->layer_param_.image_data_param().shuffle()) {        ShuffleImages();      }    }  }  batch_timer.Stop();  DLOG(INFO) << "Prefetch batch: " << batch_timer.MilliSeconds() << " ms.";  DLOG(INFO) << "     Read time: " << read_time / 1000 << " ms.";  DLOG(INFO) << "Transform time: " << trans_time / 1000 << " ms.";}INSTANTIATE_CLASS(ImageDataLayer);REGISTER_LAYER_CLASS(ImageData);}  // namespace caffe#endif  // USE_OPENCV

3. 数据格式及网络层定义


# train.txt001.jpg 1002.jpg 2003.jpg 3


# train_val.prototxtlayer {    name: "demo"    type: "ImageData"    top: "data"    top: "label"    include {      phase: TRAIN    }    transform_param {      scale: 0.00390625      mean_value: 128    }    image_data_param {      source: "/path/to/train.txt"      root_folder: "/path/to/images"      new_height: 224     new_width: 224      batch_size: 32      shuffle: true    }  }  


[1] - caffe::ImageDataLayer