caffe forward_cpu

来源：互联网发布：软件脱壳工具编辑：程序博客网时间：2024/05/17 22:58

关联函数解析：Forward_cpu -> forward_cpu_gemm -> caffe_cpu_gemm

void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
// blob_[0] 是 weight blob, blob_[1] 是 bias blob
const Dtype* weight = this->blobs_[0]->cpu_data();
for (int i = 0; i < bottom.size(); ++i) {
const Dtype* bottom_data = bottom[i]->cpu_data();
Dtype* top_data = top[i]->mutable_cpu_data();
// num_ 是参数配置文件中设置的batchsize
for (int n = 0; n < this->num_; ++n) {
// 基类的forward_cpu_gemm函数执行矩阵乘积运算
// top_data[n * this->top_dim_] = weights X bottom_data[n * this->bottom_dim_]
this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight, top_data + n * this->top_dim_);
if (this->bias_term_) {
const Dtype* bias = this->blobs_[1]->cpu_data();
this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
}
}
}
}

void BaseConvolutionLayer<Dtype>::forward_cpu_gemm(const Dtype* input,
const Dtype* weights, Dtype* output, bool skip_im2col) {
const Dtype* col_buff = input;
if (!is_1x1_) {
if (!skip_im2col) {
// 如果没有1x1卷积，也没有skip_im2col
// 则使用conv_im2col_cpu对使用卷积核滑动过程中的每一个kernel大小的图像块
// 变成一个列向量，形成一个height=kernel_dim_的
// width = 卷积后图像heght*卷积后图像width
conv_im2col_cpu(input, col_buffer_.mutable_cpu_data());
}
col_buff = col_buffer_.cpu_data();
}
// 分割为group_组分别进行计算，可能是为了避免通道数过多（如1024个）一次完成计算会占用太多内存
// conv_out_channels_，权重矩阵 weight matrix 的行数
// conv_out_spatial_dim_ , 卷积后输出特征图的空间维度 = 卷积后图像h*卷积后图像w = col_buff matrix的列数
// kernel_dim_，卷积核的维度 = 输入图像的维度*卷积核的h*卷积核的w = weight matrix 的列数
for (int g = 0; g < group_; ++g) {
// conv_out_channels_是参数配置文件中设置的num_output
// conv_out_channels_ / group_是每个卷积组的大小。卷积组内的每个卷积核一般都是3D的，但在weight matrix中展开为一行
// 每次参与乘积运算的是weight matrix的row slices. weights的形状是 [conv_out_channel, kernel_dim_]
// col_buff的形状是[kernel_dim_ , (卷积后图像高度乘以卷积后图像宽度)]
// 所以output的形状自然就是[conv_out_channel, (卷积后图像高度乘以卷积后图像宽度)]
caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, conv_out_channels_ / group_, conv_out_spatial_dim_, kernel_dim_,
(Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g, (Dtype)0., output + output_offset_ * g);
}
}

/*
*功能： C=alpha*A*B+beta*C
*A,B,C 是输入矩阵（一维数组格式）
*CblasRowMajor :数据是行主序的（二维数据也是用一维数组储存的）
*TransA, TransB：是否要对A和B做转置操作（CblasTrans CblasNoTrans）
*M： A、C 的行数
*N： B、C 的列数
*K： A 的列数， B 的行数
*lda ： A的列数（不做转置）行数（做转置）
*ldb： B的列数（不做转置）行数（做转置）
*/
template<>
void caffe_cpu_gemm<float>(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const float alpha, const float* A, const float* B, const float beta,
float* C) {
int lda = (TransA == CblasNoTrans) ? K : M;
int ldb = (TransB == CblasNoTrans) ? N : K;
cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
ldb, beta, C, N);
}

阅读全文

0 0