【caffe源代码的梳理之四】caffe数据I/O模块——数据读取层DataLayer

来源：互联网发布：韦德公牛数据编辑：程序博客网时间：2024/05/01 10:00

作者：JackGao24 博客园
作者：JackGao16 CSDN
文章链接：http://blog.csdn.net/u013108511/article/details/76804540
邮箱：gshuai16@mail.ustc.edu.cn

数据读取层

caffe的数据读取层（DataLayer）是Layer的一个派生类，除了读取LMDB和LEVELDB之外，也可以从原始图像直接读取（ImageDataLayer）。

1、数据结构的描述

// Message存储的参数供DataLayer层使用message DataParameter {  //输入数据使用DB类型  enum DB{    LEVELDB = 0;    LMDB = 1;  }  //源数据的路径  optional string source = 1;  //一个批量数据包含的图片数目  optional uint32 batch_size = 4;  //以下四个参数均为旧版本的参数，现在已经转移到TransformationParameter  optional float scale = 2 [default = 1];  optional string mean_file = 3;  optional uint32 crop_size = 5 [default = 0];  optional bool mirror = 6 [default = false];  //随机的跳过一些图片，跳跃的数目是rand_skip *rand(0,1)  optional uint32 rand_skip = 7 [default = 0];  //默认输入的数据使用DB的类型，默认为LEVELDB  optional DB backend = 8[dafault=LEVELDB]  //强制编码图像为三通道的彩色图像  optional bool force_encoded_color = 9[default = false]  //预取队列（预先放到主机内存中的批量数，默认为4个Batch）  optional uint32 prefetch = 10[default = 4]}

2、数据读取层的实现

数据读取层位于include/caffe/data_layer.hpp中

#ifndef CAFFE_DATA_LAYERS_HPP_#define CAFFE_DATA_LAYERS_HPP_#include <vector>#include "caffe/blob.hpp"#include "caffe/data_transformer.hpp"#include "caffe/internal_thread.hpp"#include "caffe/layer.hpp"#include "caffe/proto/caffe.pb.h"#include "caffe/util/blocking_queue.hpp"namespace caffe {/** * @brief Provides base for data layers that feed blobs to the Net. * * TODO(dox): thorough documentation for Forward and proto params. *///基本数据层，派生于Layertemplate <typename Dtype>class BaseDataLayer : public Layer<Dtype> { public:  //显式的构造函数  explicit BaseDataLayer(const LayerParameter& param);  // This method may not be overridden except by the BasePrefetchingDataLayer.  //通用层配置功能。之后调用DataLayerSetUp进行数据读取层的特殊配置  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {}  //数据读取层没有输入Bottom Blob，变形操作不是很重要  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {}  //反向传播函数不需要做任何事情  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {} protected:  TransformationParameter transform_param_;//数据预处理变换器参数  shared_ptr<DataTransformer<Dtype> > data_transformer_;//数据预处理变换器  bool output_labels_;//是否输出标签数据};//批量数据，用于存放数据读取层输出template <typename Dtype>class Batch { public:  Blob<Dtype> data_, label_;//两个Blob分别用来存储图片数据和标签};//带预取功能的数据读取层，派生于BaseDataLayer和InternalThreadtemplate <typename Dtype>class BasePrefetchingDataLayer :    public BaseDataLayer<Dtype>, public InternalThread { public:  //显式的构造函数  explicit BasePrefetchingDataLayer(const LayerParameter& param);  //通用数据层层配置功能。之后调用DataLayerSetUp进行特定的数据读取层的特殊配置  void LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  //前向传播  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);//前向  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  //预取得数据批数量数目  static const int PREFETCH_COUNT = 3;  protected:   virtual void InternalThreadEntry();//内部线程入口   virtual void load_batch(Batch<Dtype>* batch) = 0;//载入批量数据，纯虚函数   vector<shared_ptr<Batch<Dtype> > > prefetch_ [PREFETCH_COUNT];//预取Buffer   BlockingQueue<Batch<Dtype>*> prefetch_free_;//空闲Batch队列   BlockingQueue<Batch<Dtype>*> prefetch_full_;//已加载Batch队列   Batch<Dtype>* prefetch_current_;   Blob<Dtype> transformed_data_;//变换后的数据};}  #endif

数据读取层的具体实现位于src/caffe/layers/base_data_layer.cpp中：

#include <boost/thread.hpp>#include <vector>#include "caffe/blob.hpp"#include "caffe/data_transformer.hpp"#include "caffe/internal_thread.hpp"#include "caffe/layer.hpp"#include "caffe/layers/base_data_layer.hpp"#include "caffe/proto/caffe.pb.h"#include "caffe/util/blocking_queue.hpp"namespace caffe {//构造函数。初始化Layer参数、数据变换器参数template <typename Dtype>BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param)    : Layer<Dtype>(param),      transform_param_(param.transform_param()) {}//BaseDataLayer层设置template <typename Dtype>void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top) {  if (top.size() == 1) {//判断输出Blob数目。1则只输出data，2则输出data和label    output_labels_ = false;  } else {    output_labels_ = true;  }  //初始化数据变换器对象  data_transformer_.reset(      new DataTransformer<Dtype>(transform_param_, this->phase_));  data_transformer_->InitRand();  // The subclasses should setup the size of bottom and top  DataLayerSetUp(bottom, top);//子类负责设置Top Blob形状}//BasePrefetchingDataLayer构造函数template <typename Dtype>BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(    const LayerParameter& param)    : BaseDataLayer<Dtype>(param),      prefetch_(param.data_param().prefetch()),      prefetch_free_(), prefetch_full_(), prefetch_current_() {  for (int i = 0; i < prefetch_.size(); ++i) {    prefetch_[i].reset(new Batch<Dtype>());    prefetch_free_.push(prefetch_[i].get());//将batch对象都放入空闲队列  }}//BasePrefetchingDataLayer层配置函数template <typename Dtype>void BasePrefetchingDataLayer<Dtype>::LayerSetUp(    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  BaseDataLayer<Dtype>::LayerSetUp(bottom, top);  //在开启数据预取线程前，通过调用Blob相应函数先进行cudaMalloc，  //避免多线程情况下同时进行cudaMalloc，会导致cuda API调用失败  for (int i = 0; i < prefetch_.size(); ++i) {    prefetch_[i]->data_.mutable_cpu_data();    if (this->output_labels_) {      prefetch_[i]->label_.mutable_cpu_data();    }  }  //如果编译选项没有CPU_ONLY代码，则需要编译GPU代码#ifndef CPU_ONLY  if (Caffe::mode() == Caffe::GPU) {    for (int i = 0; i < prefetch_.size(); ++i) {      prefetch_[i]->data_.mutable_gpu_data();      if (this->output_labels_) {        prefetch_[i]->label_.mutable_gpu_data();      }    }  }#endif  DLOG(INFO) << "Initializing prefetch";  this->data_transformer_->InitRand();  StartInternalThread();//开启内部预取线程  DLOG(INFO) << "Prefetch initialized.";}//内部线程入口template <typename Dtype>void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {//创建CUDA Stream，非阻塞类型#ifndef CPU_ONLY  cudaStream_t stream;  if (Caffe::mode() == Caffe::GPU) {    CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));  }#endif  try {    while (!must_stop()) {//循环载入批量数据      Batch<Dtype>* batch = prefetch_free_.pop();//得到一个空闲batch      load_batch(batch);//载入批量数据#ifndef CPU_ONLY      if (Caffe::mode() == Caffe::GPU) {        batch->data_.data().get()->async_gpu_push(stream);        if (this->output_labels_) {          batch->label_.data().get()->async_gpu_push(stream);        }        CUDA_CHECK(cudaStreamSynchronize(stream));//同步到GPU      }#endif      prefetch_full_.push(batch);//加入到带负载的Batch队列中    }  } catch (boost::thread_interrupted&) {//捕获到异常则退出循环    // Interrupted exception is expected on shutdown  }#ifndef CPU_ONLY  if (Caffe::mode() == Caffe::GPU) {    CUDA_CHECK(cudaStreamDestroy(stream));//销毁CUDA Stream  }#endif}//前向传播函数template <typename Dtype>void BasePrefetchingDataLayer<Dtype>::Forward_cpu(    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  if (prefetch_current_) {    prefetch_free_.push(prefetch_current_);  }  //从带负载的Batch中取出一个Batch对象  prefetch_current_ = prefetch_full_.pop("Waiting for data");  // Reshape to loaded data.输出Top Blob根据Batch形状进行变形  top[0]->ReshapeLike(prefetch_current_->data_);  top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());  if (this->output_labels_) {//如果需要输出便签数据    // Reshape to loaded labels.    top[1]->ReshapeLike(prefetch_current_->label_);//Top Blob根据Batch中lable_形状进行变形    top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());  }  //将一个Batch的数据送入Net，完成任务，返回空闲队列接受下一批量的数据  perfetch_free_.push(batch);}#ifdef CPU_ONLYSTUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward);#endifINSTANTIATE_CLASS(BaseDataLayer);INSTANTIATE_CLASS(BasePrefetchingDataLayer);}  // namespace caffe

阅读全文

0 0