Dropout_layer.cpp(防止过拟合)
来源:互联网 发布:蜂云网络 编辑:程序博客网 时间:2024/06/06 05:15
dropout层的作用是防止训练的时候过拟合。在训练的时候,传统的训练方法是每次迭代经过某一层时,将所有的结点拿来做参与更新,训练整个网络。加入dropout层,我们只需要按一定的概率(retaining probability)p 来对weight layer 的参数进行随机采样,将被采样的结点拿来参与更新,将这个子网络作为此次更新的目标网络。这样做的好处是,由于随机的让一些节点不工作了,因此可以避免某些特征只在固定组合下才生效,有意识地让网络去学习一些普遍的共性(而不是某些训练样本的一些特性)这样能提高训练出的模型的鲁棒性!!!
下面记录下我在看dropout层时的注释,如有错误,请指出~~~
Dropout_layer.hpp::::
#ifndef CAFFE_DROPOUT_LAYER_HPP_#define CAFFE_DROPOUT_LAYER_HPP_#include <vector>#include "caffe/blob.hpp"#include "caffe/layer.hpp"#include "caffe/proto/caffe.pb.h"#include "caffe/layers/neuron_layer.hpp"namespace caffe { template <typename Dtype>class DropoutLayer : public NeuronLayer<Dtype> { public: explicit DropoutLayer(const LayerParameter& param) : NeuronLayer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "Dropout"; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); Blob<unsigned int> rand_vec_; Dtype threshold_; Dtype scale_; unsigned int uint_thres_;};} #endif
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
- 92
- 93
- 94
- 95
Dropout_layer.cpp:::
#include <vector>#include "caffe/layers/dropout_layer.hpp"#include "caffe/util/math_functions.hpp"namespace caffe {template <typename Dtype>void DropoutLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { NeuronLayer<Dtype>::LayerSetUp(bottom, top); threshold_ = this->layer_param_.dropout_param().dropout_ratio(); DCHECK(threshold_ > 0.); DCHECK(threshold_ < 1.); scale_ = 1. / (1. - threshold_); uint_thres_ = static_cast<unsigned int>(UINT_MAX * threshold_);}template <typename Dtype>void DropoutLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { NeuronLayer<Dtype>::Reshape(bottom, top); rand_vec_.Reshape(bottom[0]->shape());}template <typename Dtype>void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); unsigned int* mask = rand_vec_.mutable_cpu_data(); const int count = bottom[0]->count(); if (this->phase_ == TRAIN) { caffe_rng_bernoulli(count, 1. - threshold_, mask); for (int i = 0; i < count; ++i) { top_data[i] = bottom_data[i] * mask[i] * scale_; } } else { caffe_copy(bottom[0]->count(), bottom_data, top_data); }}template <typename Dtype>void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); if (this->phase_ == TRAIN) { const unsigned int* mask = rand_vec_.cpu_data(); const int count = bottom[0]->count(); for (int i = 0; i < count; ++i) { bottom_diff[i] = top_diff[i] * mask[i] * scale_; } } else { caffe_copy(top[0]->count(), top_diff, bottom_diff); } }}#ifdef CPU_ONLYSTUB_GPU(DropoutLayer);#endifINSTANTIATE_CLASS(DropoutLayer);REGISTER_LAYER_CLASS(Dropout);}