Openblas加速二维矩阵卷积操作-增加pad计算

来源:互联网 发布:怎样查看追加淘宝评价 编辑:程序博客网 时间:2024/06/06 03:55

上个博客中(http://blog.csdn.net/samylee/article/details/73251042)提到了openblas加速二维矩阵的卷积计算,本博客增加了pad计算。

下个博客将介绍openblas在三维矩阵卷积计算中的加速。

代码如下(以下程序经博主测试准确无误):

//cblas加速二维矩阵卷积操作//注意:stride=1//作者:samylee#include <cblas.h>  #include <iostream>using namespace std;//A加pad的计算void comput_Apad(const int pad_w, const int Map, float *A_pad, const float *A){for (int i = 0; i < pad_w; i++){for (int j = 0; j < pad_w; j++){int col = i*pad_w + j;if (i == 0 || i == pad_w - 1){A_pad[col] = 0;}else{if (j == 0 || j == pad_w - 1){A_pad[col] = 0;}else{A_pad[col] = A[(i - 1)*Map + j - 1];}}}}}//pad_A的转换,以适用于Openblasvoid convertA(float *A_convert, const int outM, const int convAw, const int pad_w, float *A_pad){for (int i = 0; i < outM; i++){for (int j = 0; j < outM; j++){int wh = i * outM * convAw + j * convAw;int col1 = i * pad_w + j;A_convert[wh] = A_pad[col1];A_convert[wh + 1] = A_pad[col1 + 1];A_convert[wh + 2] = A_pad[col1 + 2];int col2 = (i + 1) * pad_w + j;A_convert[wh + 3] = A_pad[col2];A_convert[wh + 4] = A_pad[col2 + 1];A_convert[wh + 5] = A_pad[col2 + 2];int col3 = (i + 2) * pad_w + j;A_convert[wh + 6] = A_pad[col3];A_convert[wh + 7] = A_pad[col3 + 1];A_convert[wh + 8] = A_pad[col3 + 2];}}}//Openblas矩阵乘积计算void Matrixmul_blas(const int convAh, const int convAw, float *A_convert, float *B, float *C){const enum CBLAS_ORDER Order = CblasRowMajor;const enum CBLAS_TRANSPOSE TransA = CblasNoTrans;const enum CBLAS_TRANSPOSE TransB = CblasNoTrans;const int M = convAh;//A的行数,C的行数const int N = 1;//B的列数,C的列数const int K = convAw;//A的列数,B的行数const float alpha = 1;const float beta = 0;const int lda = K;//A的列const int ldb = N;//B的列const int ldc = N;//C的列cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A_convert, lda, B, ldb, beta, C, ldc);}int main() {//卷积参数初始化const int pad = 1;const int stride = 1;//定义被卷积矩阵const int Map = 6;const float A[Map * Map] = { 1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6,1,2,3,4,5,6 };//定义卷积核const int Kernel = 3;float B[Kernel * Kernel] = {1,1,1,1,1,1,1,1,1 };//计算卷积输出矩阵宽高const int outM = (Map - Kernel + 2 * pad) / stride + 1;//计算pad_Aconst int pad_w = Map + 2 * pad;float A_pad[pad_w*pad_w];comput_Apad(pad_w, Map, A_pad, A);//定义被卷积矩阵宽高const int convAw = Kernel*Kernel;const int convAh = outM*outM;//转换被卷积矩阵float A_convert[convAh*convAw];convertA(A_convert, outM, convAw, pad_w, A_pad);//定义卷积输出矩阵float C[convAh*1];//cblas计算输出矩阵Matrixmul_blas(convAh, convAw, A_convert, B, C);//输出验证cout << "A_pad is:" << endl;for (int i = 0; i < pad_w; i++){for (int j = 0; j < pad_w; j++){cout << A_pad[i*pad_w + j] << " ";}cout << endl;}cout << endl;cout << "B is:" << endl;for (int i = 0; i < Kernel; i++){for (int j = 0; j < Kernel; j++){cout << B[i*Kernel + j] << " ";}cout << endl;}cout << endl;cout << "C is:" << endl;for (int i = 0; i < outM; i++){for (int j = 0; j < outM; j++){cout << C[i*outM + j] << " ";}cout << endl;}cout << endl;system("pause");return EXIT_SUCCESS;}


效果如下:



任何问题请加唯一QQ2258205918(名称samylee)!