caffe中对6种激活函数类的封装--TanH

来源:互联网 发布:乳源县网络问政 编辑:程序博客网 时间:2024/06/03 13:12

tanh_layer.hpp

#ifndef CAFFE_TANH_LAYER_HPP_#define CAFFE_TANH_LAYER_HPP_#include <vector>#include "caffe/blob.hpp"#include "caffe/layer.hpp"#include "caffe/proto/caffe.pb.h"#include "caffe/layers/neuron_layer.hpp"namespace caffe {/** * @brief TanH hyperbolic tangent non-linearity @f$ *         y = \frac{\exp(2x) - 1}{\exp(2x) + 1} *     @f$, popular in auto-encoders. * * Note that the gradient vanishes as the values move away from 0. * The ReLULayer is often a better choice for this reason. */template <typename Dtype>class TanHLayer : public NeuronLayer<Dtype> { public:  explicit TanHLayer(const LayerParameter& param)      : NeuronLayer<Dtype>(param) {}  virtual inline const char* type() const { return "TanH"; } protected:  /**   * @param bottom input Blob vector (length 1)   *   -# @f$ (N \times C \times H \times W) @f$   *      the inputs @f$ x @f$   * @param top output Blob vector (length 1)   *   -# @f$ (N \times C \times H \times W) @f$   *      the computed outputs @f$   *        y = \frac{\exp(2x) - 1}{\exp(2x) + 1}   *      @f$   */  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,      const vector<Blob<Dtype>*>& top);  /**   * @brief Computes the error gradient w.r.t. the sigmoid inputs.   *   * @param top output Blob vector (length 1), providing the error gradient with   *      respect to the outputs   *   -# @f$ (N \times C \times H \times W) @f$   *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$   *      with respect to computed outputs @f$ y @f$   * @param propagate_down see Layer::Backward.   * @param bottom input Blob vector (length 1)   *   -# @f$ (N \times C \times H \times W) @f$   *      the inputs @f$ x @f$; Backward fills their diff with   *      gradients @f$   *        \frac{\partial E}{\partial x}   *            = \frac{\partial E}{\partial y}   *              \left(1 - \left[\frac{\exp(2x) - 1}{exp(2x) + 1} \right]^2 \right)   *            = \frac{\partial E}{\partial y} (1 - y^2)   *      @f$ if propagate_down[0]   */  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);};}  // namespace caffe#endif  // CAFFE_TANH_LAYER_HPP_

tanh_layer.cpp

// TanH neuron activation function layer.// Adapted from ReLU layer code written by Yangqing Jia#include <vector>#include "caffe/layers/tanh_layer.hpp"namespace caffe {template <typename Dtype>void TanHLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,    const vector<Blob<Dtype>*>& top) {  const Dtype* bottom_data = bottom[0]->cpu_data();  Dtype* top_data = top[0]->mutable_cpu_data();  const int count = bottom[0]->count();  for (int i = 0; i < count; ++i) {    top_data[i] = tanh(bottom_data[i]);  }}template <typename Dtype>void TanHLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,    const vector<bool>& propagate_down,    const vector<Blob<Dtype>*>& bottom) {  if (propagate_down[0]) {    const Dtype* top_data = top[0]->cpu_data();    const Dtype* top_diff = top[0]->cpu_diff();    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();    const int count = bottom[0]->count();    Dtype tanhx;    for (int i = 0; i < count; ++i) {      tanhx = top_data[i];      bottom_diff[i] = top_diff[i] * (1 - tanhx * tanhx);    }  }}#ifdef CPU_ONLYSTUB_GPU(TanHLayer);#endifINSTANTIATE_CLASS(TanHLayer);}  // namespace caffe


0 0
原创粉丝点击