mxnet系列教程 代码阅读2-conv层的代码阅读

来源:互联网 发布:印尼可以用中国网络吗 编辑:程序博客网 时间:2024/06/02 02:26

caffe里面都是以layer的形式表现运算,mxnet中直接用operator来进行描述了

具体的代码在src/operator中,下面将进行三

个模块的解读

convolution-inl.h

convolution.cc

convolution.cu

/* * Copyright (c) 2015 by Contributors * \file convolution-inl.h * \brief * \author Bing Xu*///2.基本定义  #ifndef MXNET_OPERATOR_CONVOLUTION_INL_H_#define MXNET_OPERATOR_CONVOLUTION_INL_H_//3.包含头文件 //3.1 #include <mxnet/io.h>#include <mxnet/base.h>#include <mxnet/ndarray.h>#include <mxnet/operator.h>#include <dmlc/logging.h>#include <dmlc/optional.h>//3.2 系统自带#include <algorithm>#include <map>#include <vector>#include <string>#include <utility>#include "./operator_common.h"namespace mxnet {//固定namespace op { //固定 namespace conv {enum ConvolutionOpInputs {kData, kWeight, kBias};enum ConvolutionOpOutputs {kOut};enum ConvolutionOpResource {kTempSpace};enum ConvolutionOpCudnnTune {kOff, kLimited, kFastest};}struct ConvolutionParam : public dmlc::Parameter<ConvolutionParam> {//层参数  TShape kernel;//kernel_size  TShape stride;////步长stride   TShape dilate;  TShape pad;//边框pad  uint32_t num_filter;//滤波器个数,即输出 num_output  uint32_t num_group;//分组group  uint64_t workspace;  bool no_bias;      //是否有bias  dmlc::optional<int> cudnn_tune;  bool cudnn_off;    //是否使用cudnn加速   dmlc::optional<int> layout;  DMLC_DECLARE_PARAMETER(ConvolutionParam) {    DMLC_DECLARE_FIELD(kernel).describe("convolution kernel size: (h, w) or (d, h, w)");    DMLC_DECLARE_FIELD(stride).set_default(TShape())    .describe("convolution stride: (h, w) or (d, h, w)");    DMLC_DECLARE_FIELD(dilate).set_default(TShape())    .describe("convolution dilate: (h, w) or (d, h, w)");    DMLC_DECLARE_FIELD(pad).set_default(TShape())    .describe("pad for convolution: (h, w) or (d, h, w)");    DMLC_DECLARE_FIELD(num_filter).set_range(1, 100000)    .describe("convolution filter(channel) number");    DMLC_DECLARE_FIELD(num_group).set_default(1)    .describe("Number of group partitions. Equivalent to slicing input into num_group\n    "              "partitions, apply convolution on each, then concatenate the results");    DMLC_DECLARE_FIELD(workspace).set_default(1024).set_range(0, 8192)    .describe("Maximum tmp workspace allowed for convolution (MB).");    DMLC_DECLARE_FIELD(no_bias).set_default(false)    .describe("Whether to disable bias parameter.");    DMLC_DECLARE_FIELD(cudnn_tune)    .add_enum("off", conv::kOff)    .add_enum("limited_workspace", conv::kLimited)    .add_enum("fastest", conv::kFastest)    .set_default(dmlc::optional<int>())    .describe("Whether to pick convolution algo by running performance test.\n    "              "Leads to higher startup time but may give faster speed. Options are:\n    "              "\'off\': no tuning\n    "              "\'limited_workspace\': run test and pick the fastest algorithm "              "that doesn't exceed workspace limit.\n    "              "\'fastest\': pick the fastest algorithm and ignore workspace limit.\n    "              "If set to None (default), behavior is determined by environment\n    "              "variable MXNET_CUDNN_AUTOTUNE_DEFAULT: 0 for off,\n    "              "1 for limited workspace (default), 2 for fastest.");    DMLC_DECLARE_FIELD(cudnn_off).set_default(false)    .describe("Turn off cudnn for this layer.");    DMLC_DECLARE_FIELD(layout)    .add_enum("NCHW", mshadow::kNCHW)    .add_enum("NHWC", mshadow::kNHWC)    .add_enum("NCDHW", mshadow::kNCDHW)    .add_enum("NDHWC", mshadow::kNDHWC)    .set_default(dmlc::optional<int>())    .describe("Set layout for input, output and weight. Empty for\n    "              "default layout: NCHW for 2d and NCDHW for 3d.");  }};//卷积操作,相当于caffe里的Convolution_Layer.cpptemplate<typename xpu, typename DType> //gpu和cpu混合编程xpu编程class ConvolutionOp : public Operator {  public:  //initial  explicit ConvolutionOp(ConvolutionParam p) {     this->param_ = p;    // convert MBytes first to Bytes and then to elements.    param_.workspace = (param_.workspace << 20) / sizeof(DType);    CHECK(param_.layout.value() == mshadow::kNCHW ||          param_.layout.value() == mshadow::kNCDHW)      << "Only support NCHW and NCDHW layout";  }  //forward函数  virtual void Forward(const OpContext &ctx,                       const std::vector<TBlob> &in_data,                       const std::vector<OpReqType> &req,                       const std::vector<TBlob> &out_data,                       const std::vector<TBlob> &aux_args) {    using namespace mshadow;    using namespace mshadow::expr;    CHECK_EQ(req[conv::kOut], kWriteTo);    size_t expected = param_.no_bias ? 2 : 3;    CHECK_EQ(in_data.size(), expected);    CHECK_EQ(out_data.size(), 1);    Stream<xpu> *s = ctx.get_stream<xpu>();    if (param_.kernel.ndim() > 2) {      LOG(FATAL) << "Volume convolution is not implmented in mshadow";    }    Tensor<xpu, 4, DType> data = in_data[conv::kData].get<xpu, 4, DType>(s);//数据,四维图像数据,Tensor相当于Blob    Shape<3> wmat_shape =        Shape3(param_.num_group,               param_.num_filter / param_.num_group,               data.shape_[1] / param_.num_group * param_.kernel[0] * param_.kernel[1]);    Tensor<xpu, 3, DType> wmat =        in_data[conv::kWeight].get_with_shape<xpu, 3, DType>(wmat_shape, s);    Tensor<xpu, 4, DType> out = out_data[conv::kOut].get<xpu, 4, DType>(s);#if defined(__CUDACC__)    CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)        << "Must init CuBLAS handle in stream";#endif    const index_t nbatch = data.size(0);//batch尺寸,相当于caffe里面的batchsize,Tensor的第一维相当于BlobD的num    Tensor<xpu, 1, DType> workspace =        ctx.requested[conv::kTempSpace].get_space_typed<xpu, 1, DType>(            Shape1(this->InitTemp(data.shape_, out.shape_)), s);    for (index_t i = 0; i < nbatch; i += nstep_) { //对每一条数据      const index_t step = std::min(nstep_, nbatch - i);      Tensor<xpu, 2, DType> temp_col = Tensor<xpu, 2, DType>(workspace.dptr_,                                               Shape2(shape_colunit_[0],                                                      shape_colunit_[1] * step), s);      Tensor<xpu, 3, DType> temp_dst = Tensor<xpu, 3, DType>(                                               workspace.dptr_ + temp_col.shape_.Size(),                                               Shape3(shape_dstunit_[0],                                                      shape_dstunit_[1],                                                      shape_dstunit_[2] * step), s);      if (param_.pad[0] == 0 && param_.pad[1] == 0) {        temp_col = unpack_patch2col(data.Slice(i, i + step),                                    param_.kernel[0],                                    param_.kernel[1],                                    param_.stride[0],                                    param_.stride[1],                                    param_.dilate[0],                                    param_.dilate[1]);      } else {        temp_col = unpack_patch2col(pad(data.Slice(i, i + step),                                    param_.pad[0], param_.pad[1]),                                    param_.kernel[0],                                    param_.kernel[1],                                    param_.stride[0],                                    param_.stride[1],                                    param_.dilate[0],                                    param_.dilate[1]);      }      const index_t gstride = temp_col.size(0) / param_.num_group;      for (uint32_t gid = 0; gid < param_.num_group; ++gid) {        mshadow::Tensor<xpu, 2, DType> tmpc = temp_col.Slice(gstride * gid,                                       gstride * (gid + 1));        temp_dst[gid] = dot(wmat[gid], tmpc);      }      out.Slice(i, i + step) = swapaxis<1, 0>(reshape(temp_dst,                                              mshadow::Shape4(param_.num_filter,                                                  step,                                                  out.size(2),                                                  out.size(3))));    }    if (!param_.no_bias) {      // add bias, broadcast bias to dim 1: channel      Tensor<xpu, 1, DType> bias = in_data[conv::kBias].get<xpu, 1, DType>(s);      out += broadcast<1>(bias, out.shape_);    }  }  //back_forward函数  virtual void Backward(const OpContext &ctx,                        const std::vector<TBlob> &out_grad,                        const std::vector<TBlob> &in_data,                        const std::vector<TBlob> &out_data,                        const std::vector<OpReqType> &req,                        const std::vector<TBlob> &in_grad,                        const std::vector<TBlob> &aux_args) {    using namespace mshadow;    using namespace mshadow::expr;    // TODO(bing): check the BLAS Handle, be careful    if (param_.kernel.ndim() > 2) {      LOG(FATAL) << "Volume convolution is not implmented in mshadow";    }    CHECK_EQ(out_grad.size(), 1);    size_t expected = param_.no_bias == 0 ? 3 : 2;    CHECK(in_data.size() == expected && in_grad.size() == expected);    CHECK_EQ(req.size(), expected);    CHECK_EQ(in_data[conv::kWeight].CheckContiguous(), true);    // get data    Stream<xpu> *s = ctx.get_stream<xpu>();    Tensor<xpu, 4, DType> data = in_data[conv::kData].get<xpu, 4, DType>(s);    Shape<3> wmat_shape =        Shape3(param_.num_group,               param_.num_filter / param_.num_group,               data.shape_[1] / param_.num_group * param_.kernel[0] * param_.kernel[1]);    Tensor<xpu, 3, DType> wmat =        in_data[conv::kWeight].get_with_shape<xpu, 3, DType>(wmat_shape, s);    Tensor<xpu, 4, DType> grad = out_grad[conv::kOut].get<xpu, 4, DType>(s);    Tensor<xpu, 4, DType> gdata = in_grad[conv::kData].get<xpu, 4, DType>(s);    Tensor<xpu, 3, DType> gwmat =        in_grad[conv::kWeight].get_with_shape<xpu, 3, DType>(wmat_shape, s);#if defined(__CUDACC__)    CHECK_EQ(s->blas_handle_ownership_, Stream<xpu>::OwnHandle)        << "Must init CuBLAS handle in stream";#endif    const index_t nbatch = data.size(0);    Tensor<xpu, 1, DType> workspace =        ctx.requested[conv::kTempSpace].get_space_typed<xpu, 1, DType>(            Shape1(this->InitTemp(data.shape_, grad.shape_)), s);    for (index_t i = 0; i < nbatch; i += nstep_) {      const index_t step = std::min(nstep_, nbatch - i);      Tensor<xpu, 2, DType> temp_col = Tensor<xpu, 2, DType>(workspace.dptr_,                                               Shape2(shape_colunit_[0],                                                      shape_colunit_[1] * step), s);      Tensor<xpu, 3, DType> temp_dst = Tensor<xpu, 3, DType>(                                               workspace.dptr_ + temp_col.shape_.Size(),                                               Shape3(shape_dstunit_[0],                                                      shape_dstunit_[1],                                                      shape_dstunit_[2] * step), s);      temp_dst = reshape(swapaxis<1, 0>(grad.Slice(i, i + step)), temp_dst.shape_);      if (param_.pad[0] == 0 && param_.pad[1] == 0) {        temp_col = unpack_patch2col(data.Slice(i, i + step),                                     param_.kernel[0],                                     param_.kernel[1],                                     param_.stride[0],                                     param_.stride[1],                                     param_.dilate[0],                                     param_.dilate[1]);      } else {        temp_col = unpack_patch2col(pad(data.Slice(i, i + step), param_.pad[0], param_.pad[1]),                                     param_.kernel[0],                                     param_.kernel[1],                                     param_.stride[0],                                     param_.stride[1],                                     param_.dilate[0],                                     param_.dilate[1]);      }      const index_t gstride = temp_col.size(0) / param_.num_group;      for (uint32_t gid = 0; gid < param_.num_group; ++gid) {        Tensor<xpu, 2, DType> tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1));        if (i == 0) {          Tensor<xpu, 2, DType> tmp_gwmat = gwmat[gid];          Assign(tmp_gwmat, req[conv::kWeight], dot(temp_dst[gid], tmpc.T()));        } else {          gwmat[gid] += dot(temp_dst[gid], tmpc.T());        }      }      for (uint32_t gid = 0; gid < param_.num_group; ++gid) {        Tensor<xpu, 2, DType> tmpc = temp_col.Slice(gstride * gid, gstride * (gid + 1));        tmpc = dot(wmat[gid].T(), temp_dst[gid]);      }      if (param_.pad[0] == 0 && param_.pad[1] == 0) {        Assign(gdata.Slice(i, i + step), req[conv::kData],               pack_col2patch(temp_col,                              data.Slice(i, i + step).shape_,                              param_.kernel[0],                              param_.kernel[1],                              param_.stride[0],                              param_.stride[1],                              param_.dilate[0],                              param_.dilate[1]));      } else {        Shape<4> pshape = data.Slice(i, i + step).shape_;        pshape[2] += 2 * param_.pad[0];        pshape[3] += 2 * param_.pad[1];        Assign(gdata.Slice(i, i + step), req[conv::kData],               crop(pack_col2patch(temp_col,                                   pshape,                                   param_.kernel[0],                                   param_.kernel[1],                                   param_.stride[0],                                   param_.stride[1],                                   param_.dilate[0],                                   param_.dilate[1]),                    gdata[i][0].shape_));      }    }    if (!param_.no_bias) {      Tensor<xpu, 1, DType> gbias = in_grad[conv::kBias].get<xpu, 1, DType>(s);      Assign(gbias, req[conv::kBias], sumall_except_dim<1>(grad));    }  } private:  inline index_t InitTemp(const mshadow::Shape<4> &ishape,                          const mshadow::Shape<4> &oshape) {    const int ksize_y = param_.kernel[0];    const int ksize_x = param_.kernel[1];    shape_colunit_ = mshadow::Shape2(ishape[1] * ksize_y * ksize_x,                                     oshape[2] * oshape[3]);    shape_dstunit_ = mshadow::Shape3(param_.num_group,                                     param_.num_filter / param_.num_group,                                     oshape[2] * oshape[3]);    // param_.workspace is in elements of sizeof(DType)    // if param_.workspace is set to zero the nstep_ equals ishape[0] (batch)    nstep_ = std::max(        std::min(            static_cast<index_t>(                param_.workspace / (shape_colunit_.Size() + shape_dstunit_.Size())),            ishape[0]),        1U);    mshadow::Shape<2> scol = mshadow::Shape2(shape_colunit_[0],                                             shape_colunit_[1] * nstep_);    mshadow::Shape<3> sdst = mshadow::Shape3(shape_dstunit_[0],                                             shape_dstunit_[1],                                             shape_dstunit_[2] * nstep_);    index_t required_size = scol.Size() + sdst.Size();    CHECK_GE(param_.workspace, required_size)      << "\nMinimum workspace size: " << required_size * sizeof(DType) << " Bytes\n"      << "Given: " << param_.workspace * sizeof(DType) << " Bytes";    return required_size;  }  ConvolutionParam param_;  mshadow::Shape<2> shape_colunit_;  mshadow::Shape<3> shape_dstunit_;  index_t nstep_;};  // class ConvolutionOptemplate<typename xpu>Operator* CreateOp(ConvolutionParam param, int dtype,                   std::vector<TShape> *in_shape,                   std::vector<TShape> *out_shape,                   Context ctx);#if DMLC_USE_CXX11class ConvolutionProp : public OperatorProperty { //卷积属性 public:  std::vector<std::string> ListArguments() const override {    if (!param_.no_bias) {      return {"data", "weight", "bias"};    } else {      return {"data", "weight"};    }  }  void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {    using namespace mshadow;    param_.Init(kwargs);    if (param_.kernel.ndim() == 2) {      param_.layout = param_.layout ? param_.layout.value() : mshadow::kNCHW;      if (param_.stride.ndim() == 0) param_.stride = Shape2(1, 1);      if (param_.dilate.ndim() == 0) param_.dilate = Shape2(1, 1);      if (param_.pad.ndim() == 0) param_.pad = Shape2(0, 0);    } else {      CHECK_EQ(param_.kernel.ndim(), 3) << param_.kernel.ndim() << "D convolution not supported";      param_.layout = param_.layout ? param_.layout.value(): mshadow::kNCDHW;      if (param_.stride.ndim() == 0) param_.stride = Shape3(1, 1, 1);      if (param_.dilate.ndim() == 0) param_.dilate = Shape3(1, 1, 1);      if (param_.pad.ndim() == 0) param_.pad = Shape3(0, 0, 0);    }  }  std::map<std::string, std::string> GetParams() const override {    return param_.__DICT__();  }  bool InferShape(std::vector<TShape> *in_shape,                  std::vector<TShape> *out_shape,                  std::vector<TShape> *aux_shape) const override {    using namespace mshadow;    if (!param_.no_bias) {      CHECK_EQ(in_shape->size(), 3) << "Input:[data, weight, bias]";    } else {      CHECK_EQ(in_shape->size(), 2) << "Input:[data, weight]";    }    // CHECK_EQ(out_shape->size(), 1) << "Output: [output]";    out_shape->resize(1, TShape());    const TShape &dshp = (*in_shape)[conv::kData];    if (dshp.ndim() ==  0) return false;    if (param_.kernel.ndim() == 2) {      // 2d conv      CHECK_EQ(dshp.ndim(), 4) \          << "Input data should be 4D in batch-num_filter-y-x";      Shape<4> dshape = ConvertLayout(dshp.get<4>(), param_.layout.value(), kNCHW);      Shape<4> wshape = Shape4(param_.num_filter / param_.num_group, dshape[1] / param_.num_group,                               param_.kernel[0], param_.kernel[1]);      wshape = ConvertLayout(wshape, kNCHW, param_.layout.value());      wshape[0] *= param_.num_group;      SHAPE_ASSIGN_CHECK(*in_shape, conv::kWeight, wshape);      if (!param_.no_bias) {        SHAPE_ASSIGN_CHECK(*in_shape, conv::kBias, Shape1(param_.num_filter));      }      const index_t ksize_y = static_cast<index_t>(param_.kernel[0]);      const index_t ksize_x = static_cast<index_t>(param_.kernel[1]);      CHECK_EQ(dshape[1] % param_.num_group, 0) \          << "input num_filter must divide group size";      CHECK_EQ(param_.num_filter % param_.num_group, 0) \          << "output num_filter must divide group size";      CHECK_GT(param_.kernel.Size(), 0) \          << "incorrect kernel size: " << param_.kernel;      CHECK_GT(param_.stride.Size(), 0) \          << "incorrect stride size: " << param_.stride;      CHECK_GT(param_.dilate.Size(), 0) \          << "incorrect dilate size: " << param_.dilate;      CHECK(ksize_y <= dshape[2] + 2 * param_.pad[0]            && ksize_x <= dshape[3] + 2 * param_.pad[1])          << "kernel size exceed input";      Shape<4> oshape;      oshape[0] = dshape[0];      oshape[1] = param_.num_filter;      oshape[2] = (dshape[2] + 2 * param_.pad[0] -          (param_.dilate[0] * (ksize_y - 1) + 1)) / param_.stride[0] + 1;      oshape[3] = (dshape[3] + 2 * param_.pad[1] -          (param_.dilate[1] * (ksize_x - 1) + 1)) / param_.stride[1] + 1;      SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCHW, param_.layout.value()));      return true;    } else if (param_.kernel.ndim() == 3) {      // 3d conv      CHECK_EQ(dshp.ndim(), 5) \        << "Input data should be 5D in batch-num_filter-depth-y-x";      Shape<5> dshape = ConvertLayout(dshp.get<5>(), param_.layout.value(), kNCDHW);      Shape<5> wshape = Shape5(param_.num_filter / param_.num_group, dshape[1] / param_.num_group,                               param_.kernel[0], param_.kernel[1], param_.kernel[2]);      wshape = ConvertLayout(wshape, kNCDHW, param_.layout.value());      wshape[0] *= param_.num_group;      SHAPE_ASSIGN_CHECK(*in_shape, conv::kWeight, wshape);      if (!param_.no_bias) {        SHAPE_ASSIGN_CHECK(*in_shape, conv::kBias, Shape1(param_.num_filter));      }      const index_t ksize_d = static_cast<index_t>(param_.kernel[0]);      const index_t ksize_y = static_cast<index_t>(param_.kernel[1]);      const index_t ksize_x = static_cast<index_t>(param_.kernel[2]);      CHECK_EQ(dshape[1] % param_.num_group, 0)        << "input num_filter must divide group size";      CHECK_EQ(param_.num_filter % param_.num_group, 0)        << "output num_filter must divide group size";      CHECK_GT(param_.kernel.Size(), 0) \        << "incorrect kernel size: " << param_.kernel;      CHECK_GT(param_.stride.Size(), 0) \        << "incorrect stride size: " << param_.stride;      CHECK_GT(param_.dilate.Size(), 0) \        << "incorrect dilate size: " << param_.dilate;      CHECK(ksize_d < dshape[2] + 2 * param_.pad[0]            && ksize_y <= dshape[3] + 2 * param_.pad[1]            && ksize_x <= dshape[4] + 2 * param_.pad[2])        << "kernel size exceed input";      CHECK_EQ(param_.dilate.Size(), 1)        << "Dilate is not supported in 3d convolution";      Shape<5> oshape;      oshape[0] = dshape[0];      oshape[1] = param_.num_filter;      oshape[2] = (dshape[2] + 2 * param_.pad[0] -          (1 * (ksize_d - 1) + 1)) / param_.stride[0] + 1;      oshape[3] = (dshape[3] + 2 * param_.pad[1] -          (1 * (ksize_y - 1) + 1)) / param_.stride[1] + 1;      oshape[4] = (dshape[4] + 2 * param_.pad[2] -          (1 * (ksize_x - 1) + 1)) / param_.stride[2] + 1;      SHAPE_ASSIGN_CHECK(*out_shape, 0, ConvertLayout(oshape, kNCDHW, param_.layout.value()));      return true;    } else {      LOG(FATAL) << "Unknown convolution type";      return false;    }  }  bool InferType(std::vector<int> *in_type,                 std::vector<int> *out_type,                 std::vector<int> *aux_type) const override {    CHECK_GE(in_type->size(), 1);    int dtype = (*in_type)[0];    CHECK_NE(dtype, -1) << "First input must have specified type";    for (index_t i = 0; i < in_type->size(); ++i) {      if ((*in_type)[i] == -1) {        (*in_type)[i] = dtype;      } else {        CHECK_EQ((*in_type)[i], dtype) << "This layer requires uniform type. "                                       << "Expected " << dtype << " v.s. given "                                       << (*in_type)[i] << " at " << ListArguments()[i];      }    }    out_type->clear();    out_type->push_back(dtype);    return true;  }  OperatorProperty* Copy() const override {    auto ptr = new ConvolutionProp();    ptr->param_ = param_;    return ptr;  }  std::string TypeString() const override {    return "Convolution";  }  std::vector<int> DeclareBackwardDependency(    const std::vector<int> &out_grad,    const std::vector<int> &in_data,    const std::vector<int> &out_data) const override {    return {out_grad[conv::kOut], in_data[conv::kData], in_data[conv::kWeight]};  }  std::vector<ResourceRequest> ForwardResource(      const std::vector<TShape> &in_shape) const override {    return {ResourceRequest::kTempSpace};  }  std::vector<ResourceRequest> BackwardResource(      const std::vector<TShape> &in_shape) const override {    return {ResourceRequest::kTempSpace};  }  Operator* CreateOperator(Context ctx) const override {    LOG(FATAL) << "Not Implemented.";    return NULL;  }  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,                             std::vector<int> *in_type) const override; private:  ConvolutionParam param_;};  // class ConvolutionProp#endif  // DMLC_USE_CXX11}  // namespace op}  // namespace mxnet#endif  // MXNET_OPERATOR_CONVOLUTION_INL_H_
                                             
0 0