矩阵运算的GPU加速!!求教各位大神帮忙!在此谢过

来源:互联网 发布:王者荣耀手办淘宝 编辑:程序博客网 时间:2024/05/22 06:35

最近在做计算机视觉的问题,里面涉及到矩阵的乘法,包括A*B, A'B 和AB', 我们稍微改了下内容,不知道如何用GPU加速,希望大神指导一下,谢谢啦!

void gemm_nn(ptrdiff_t M, ptrdiff_t N, ptrdiff_t K,float ALPHA,float const *A, ptrdiff_t lda,float const *B, ptrdiff_t ldb,float *C, ptrdiff_t ldc,ptrdiff_t numGroup, ptrdiff_t filtersVolume_step){int i, j, k, g;for (g = 0; g<numGroup; ++g){ptrdiff_t filterGrpOffset = K * N * g;ptrdiff_t tempGrpOffset = M*filtersVolume_step*g;ptrdiff_t outputGrpOffset = M*N*g;for (j = 0; j < N; ++j){for (k = 0; k < K; ++k){register float temp = ALPHA * (*(B + filterGrpOffset + j*ldb + k));for (i = 0; i < M; ++i){*(C + outputGrpOffset + j*ldc + i) += temp * (*(A + tempGrpOffset + k*lda + i));}}}}}void gemm_tn(ptrdiff_t M, ptrdiff_t N, ptrdiff_t K,float ALPHA,float const *A, ptrdiff_t lda,float const *B, ptrdiff_t ldb,float *C, ptrdiff_t ldc,ptrdiff_t numGroup, ptrdiff_t filtersVolume_step){int i, j, k, g;for (int g = 0; g<numGroup; ++g){ptrdiff_t filterGrpOffset = M * N * g;ptrdiff_t tempGrpOffset = K * filtersVolume_step * g;ptrdiff_t derOutputGrpOffset = K *N *g;for (j = 0; j < N; ++j){for (i = 0; i < M; ++i){register float sum = 0;for (k = 0; k < K; ++k){sum += (*(A + tempGrpOffset + i*lda + k)) * (*(B + derOutputGrpOffset + j*ldb + k));}*(C + filterGrpOffset + j*ldc + i) += ALPHA*sum;}}}}void gemm_nt(ptrdiff_t M, ptrdiff_t N, ptrdiff_t K,float ALPHA,float const *A, ptrdiff_t lda,float const *B, ptrdiff_t ldb,float *C, ptrdiff_t ldc,ptrdiff_t numGroup, ptrdiff_t filtersVolume_step){int i, j, k, g;for (int g = 0; g<numGroup; ++g){ptrdiff_t filterGrpOffset = N * K * g;ptrdiff_t tempGrpOffset = M * filtersVolume_step *g;ptrdiff_t  derOutputGrpOffset = M * K *g;for (j = 0; j < N; ++j){for (k = 0; k < K; ++k){register float temp = ALPHA*(*(B + filterGrpOffset + k*ldb + j));for (i = 0; i < M; ++i){*(C + tempGrpOffset + j*ldc + i) += temp * (*(A + derOutputGrpOffset + k*lda + i));}}}}}


0 0
原创粉丝点击