Eltwise层的操作有三个:product(点乘), sum(相加减) 和 max(取大值),其中sum是默认操作。
假设输入bottom为A和B,如果要实现element_wise的A+B,即A和B的对应元素相加,prototxt文件如下:
layer { name: "eltwise_layer" bottom: "A" bottom: "B" top: "diff" type: "Eltwise" eltwise_param { operation: SUM }}
如果实现A-B,则prototxt为:
layer { name: "eltwise_layer" bottom: "A" bottom: "B" top: "diff" type: "Eltwise" eltwise_param { operation: SUM coeff: 1 coeff: -1 }}
注意:其中A和B的系数(coefficient)都要给出。要求N,C,W,H必须全部相同;
1.LayerSetUp 函数:
template <typename Dtype>void EltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK(this->layer_param().eltwise_param().coeff_size() == 0 || this->layer_param().eltwise_param().coeff_size() == bottom.size()) << "Eltwise Layer takes one coefficient per bottom blob."; CHECK(!(this->layer_param().eltwise_param().operation() == EltwiseParameter_EltwiseOp_PROD && this->layer_param().eltwise_param().coeff_size())) << "Eltwise layer only takes coefficients for summation."; op_ = this->layer_param_.eltwise_param().operation(); coeffs_ = vector<Dtype>(bottom.size(), 1); if (this->layer_param().eltwise_param().coeff_size()) { for (int i = 0; i < bottom.size(); ++i) { coeffs_[i] = this->layer_param().eltwise_param().coeff(i); } } stable_prod_grad_ = this->layer_param_.eltwise_param().stable_prod_grad();}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
2.Reshape 函数:
template <typename Dtype>void EltwiseLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { for (int i = 1; i < bottom.size(); ++i) { CHECK(bottom[i]->shape() == bottom[0]->shape()); } top[0]->ReshapeLike(*bottom[0]); if (this->layer_param_.eltwise_param().operation() == EltwiseParameter_EltwiseOp_MAX && top.size() == 1) { max_idx_.Reshape(bottom[0]->shape()); }}
3.Forward_cpu 函数:
template <typename Dtype>void EltwiseLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { int* mask = NULL; const Dtype* bottom_data_a = NULL; const Dtype* bottom_data_b = NULL; const int count = top[0]->count(); Dtype* top_data = top[0]->mutable_cpu_data(); switch (op_) { case EltwiseParameter_EltwiseOp_PROD: caffe_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data); for (int i = 2; i < bottom.size(); ++i) { caffe_mul(count, top_data, bottom[i]->cpu_data(), top_data); } break; case EltwiseParameter_EltwiseOp_SUM: caffe_set(count, Dtype(0), top_data); for (int i = 0; i < bottom.size(); ++i) { caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data); } break; case EltwiseParameter_EltwiseOp_MAX: mask = max_idx_.mutable_cpu_data(); caffe_set(count, -1, mask); caffe_set(count, Dtype(-FLT_MAX), top_data); bottom_data_a = bottom[0]->cpu_data(); bottom_data_b = bottom[1]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_a[idx] > bottom_data_b[idx]) { top_data[idx] = bottom_data_a[idx]; mask[idx] = 0; } else { top_data[idx] = bottom_data_b[idx]; mask[idx] = 1; } } for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) { bottom_data_b = bottom[blob_idx]->cpu_data(); for (int idx = 0; idx < count; ++idx) { if (bottom_data_b[idx] > top_data[idx]) { top_data[idx] = bottom_data_b[idx]; mask[idx] = blob_idx; } } } break; default: LOG(FATAL) << "Unknown elementwise operation."; }}
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
4.Backward_cpu 函数:
template <typename Dtype>void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { const int* mask = NULL; const int count = top[0]->count(); const Dtype* top_data = top[0]->cpu_data(); const Dtype* top_diff = top[0]->cpu_diff(); for (int i = 0; i < bottom.size(); ++i) { if (propagate_down[i]) { const Dtype* bottom_data = bottom[i]->cpu_data(); Dtype* bottom_diff = bottom[i]->mutable_cpu_diff(); switch (op_) { case EltwiseParameter_EltwiseOp_PROD: if (stable_prod_grad_) { bool initialized = false; for (int j = 0; j < bottom.size(); ++j) { if (i == j) { continue; } if (!initialized) { caffe_copy(count, bottom[j]->cpu_data(), bottom_diff); initialized = true; } else { caffe_mul(count, bottom[j]->cpu_data(), bottom_diff, bottom_diff); } } } else {5 caffe_div(count, top_data, bottom_data, bottom_diff); } caffe_mul(count, bottom_diff, top_diff, bottom_diff); break; case EltwiseParameter_EltwiseOp_SUM: if (coeffs_[i] == Dtype(1)) { caffe_copy(count, top_diff, bottom_diff); } else { caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff); } break; case EltwiseParameter_EltwiseOp_MAX: mask = max_idx_.cpu_data(); for (int index = 0; index < count; ++index) { Dtype gradient = 0; if (mask[index] == i) { gradient += top_diff[index]; } bottom_diff[index] = gradient; } break; default: LOG(FATAL) << "Unknown elementwise operation."; } } }}