Openblas加速三维矩阵卷积操作-增加num_output计算

来源:互联网 发布:php大于符号 编辑:程序博客网 时间:2024/06/17 12:35

上个博客中(http://blog.csdn.net/samylee/article/details/73302547)提到了Openblas加速三维矩阵卷积操作,本博客增加了num_output计算。

代码如下(以下程序经博主测试准确无误):

头文件function.h

//function.h//cblas加速三维矩阵卷积操作-num_output//注意:stride=1//作者:samylee#ifndef FUNCTION_H#define FUNCTION_H#include <cblas.h>  #include <iostream>using namespace std;//A加pad的计算void comput_Apad(const int pad_w, const int Map, const int channel, float *A_pad, const float *A);//pad_A的转换,以适用于Openblasvoid convertA(float *A_convert, const int outM, const int convAw, const int pad_w, float *A_pad, const int channel);//kernel转换以适用于openblasvoid convertB(const int convAw, const int channel, const int num_output, float *B, float *B_convert);//Openblas矩阵乘积计算void Matrixmul_blas(const int convAh, const int convAw, float *A_convert, float *B_convert, float *C, const int channel, const int num_output);//转换C为常用矩阵排列void convertC(const int channel, const int convAh, const int num_output, float *C_convert, float *C);//验证结果是否正确void evaluate_blas(const int channel, const int Map, const int num_output, const float *A, const int Kernel, const float *B, const int outM, float *C_convert);#endif // !FUNCTION_H


函数文件function.cpp

//function.cpp//cblas加速三维矩阵卷积操作-num_output//注意:stride=1//作者:samylee#include "function.h"//A加pad的计算void comput_Apad(const int pad_w, const int Map, const int channel, float *A_pad, const float *A){int pad_one_channel = pad_w*pad_w;int org_one_channel = Map*Map;for (int c = 0; c < channel; c++){for (int i = 0; i < pad_w; i++){for (int j = 0; j < pad_w; j++){int col = c*pad_one_channel + i*pad_w + j;if (i == 0 || i == pad_w - 1){A_pad[col] = 0;}else{if (j == 0 || j == pad_w - 1){A_pad[col] = 0;}else{A_pad[col] = A[c*org_one_channel + (i - 1)*Map + j - 1];}}}}}}//pad_A的转换,以适用于Openblasvoid convertA(float *A_convert, const int outM, const int convAw, const int pad_w, float *A_pad, const int channel){int pad_one_channel = pad_w*pad_w;int seg = channel * convAw;for (int c = 0; c < channel; c++){for (int i = 0; i < outM; i++){for (int j = 0; j < outM; j++){int wh = c*convAw + i * outM * seg + j * seg;int col1 = c*pad_one_channel + i * pad_w + j;A_convert[wh] = A_pad[col1];A_convert[wh + 1] = A_pad[col1 + 1];A_convert[wh + 2] = A_pad[col1 + 2];int col2 = c*pad_one_channel + (i + 1) * pad_w + j;A_convert[wh + 3] = A_pad[col2];A_convert[wh + 4] = A_pad[col2 + 1];A_convert[wh + 5] = A_pad[col2 + 2];int col3 = c*pad_one_channel + (i + 2) * pad_w + j;A_convert[wh + 6] = A_pad[col3];A_convert[wh + 7] = A_pad[col3 + 1];A_convert[wh + 8] = A_pad[col3 + 2];}}}}//kernel转换以适用于openblasvoid convertB(const int convAw, const int channel, const int num_output, float *B, float *B_convert){int block1 = convAw * channel * num_output;int block2 = num_output*channel;int block3 = convAw*channel;for (int c = 0; c < channel; c++){for (int i = 0; i < convAw; i++){for (int n = 0; n < num_output; n++){for (int j = 0; j < channel; j++){if (c == j){B_convert[c*block1 + i*block2 + n*channel + j] = B[c*convAw + n*block3 + i];}else{B_convert[c*block1 + i*block2 + n*channel + j] = 0;}}}}}}//Openblas矩阵乘积计算void Matrixmul_blas(const int convAh, const int convAw, float *A_convert, float *B_convert, float *C, const int channel, const int num_output){const enum CBLAS_ORDER Order = CblasRowMajor;const enum CBLAS_TRANSPOSE TransA = CblasNoTrans;const enum CBLAS_TRANSPOSE TransB = CblasNoTrans;const int M = convAh;//A的行数,C的行数const int N = channel * num_output;//B的列数,C的列数const int K = convAw * channel;//A的列数,B的行数const float alpha = 1;const float beta = 0;const int lda = K;//A的列const int ldb = N;//B的列const int ldc = N;//C的列cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A_convert, lda, B_convert, ldb, beta, C, ldc);}//转换C为常用矩阵排列void convertC(const int channel, const int convAh, const int num_output, float *C_convert, float *C){int block1 = channel * convAh;int block2 = channel * num_output;for (int n = 0; n < num_output; n++){for (int c = 0; c < channel; c++){for (int i = 0; i < convAh; i++){C_convert[n * block1 + c*convAh + i] = C[i*block2 + n*channel + c];}}}}//验证结果是否正确void evaluate_blas(const int channel, const int Map, const int num_output, const float *A, const int Kernel, const float *B, const int outM, float *C_convert){cout << "A is:" << endl;for (int c = 0; c < channel; c++){for (int i = 0; i < Map; i++){for (int j = 0; j < Map; j++){cout << A[c*Map*Map + i*Map + j] << " ";}cout << endl;}cout << endl;}cout << "B is:" << endl;for (int n = 0; n < num_output; n++){for (int c = 0; c < channel; c++){for (int i = 0; i < Kernel; i++){for (int j = 0; j < Kernel; j++){cout << B[n* channel* Kernel * Kernel + c * Kernel * Kernel + i*Kernel + j] << " ";}cout << endl;}cout << endl;}cout << endl;}cout << "C is:" << endl;for (int n = 0; n < num_output; n++){for (int c = 0; c < channel; c++){for (int i = 0; i < outM; i++){for (int j = 0; j < outM; j++){cout << C_convert[n* channel* outM * outM + c * outM * outM + i*outM + j] << " ";}cout << endl;}cout << endl;}}}


主函数文件main.cpp

//main.cpp//cblas加速三维矩阵卷积操作-num_output//注意:stride=1//作者:samylee#include "function.h"int main(){//卷积参数初始化const int pad = 1;const int stride = 1;const int num_output = 3;//定义被卷积三维矩阵const int Map = 4;const int channel = 3;const float A[Map * Map * channel] = {1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4 };//定义三维卷积核const int Kernel = 3;float B[Kernel * Kernel * channel * num_output] = {1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,       3,3,3,3,3,3,3,3,3,   ////////3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2, 1,1,1,1,1,1,1,1,1,    ////////1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3 };//计算卷积输出矩阵宽高const int outM = (Map - Kernel + 2 * pad) / stride + 1;//计算三维pad_Aconst int pad_w = Map + 2 * pad;float A_pad[pad_w*pad_w*channel];comput_Apad(pad_w, Map, channel, A_pad, A);//定义被卷积矩阵宽高const int convAw = Kernel*Kernel;const int convAh = outM*outM;//转换被卷积矩阵float A_convert[convAh*convAw*channel];convertA(A_convert, outM, convAw, pad_w, A_pad, channel);//转换卷积核以适用于cblasfloat B_convert[channel * Kernel * Kernel * channel * num_output];convertB(convAw, channel, num_output, B, B_convert);//定义卷积输出矩阵float C[convAh * channel * num_output];//cblas计算输出矩阵Matrixmul_blas(convAh, convAw, A_convert, B_convert, C, channel, num_output);//将输出转换为常用矩阵形式float C_convert[outM * outM * channel * num_output];convertC(channel, convAh, num_output, C_convert, C);//输出验证evaluate_blas(channel, Map, num_output, A, Kernel, B, outM, C_convert);system("pause");return EXIT_SUCCESS;}


效果如下图所示:

                      


任何问题请加唯一QQ2258205918(名称samylee)!