OpenCL 初实践(1)矩阵相乘

来源:互联网 发布:我是歌手4网络踢馆赛 编辑:程序博客网 时间:2024/05/19 15:24

对2000*2000的两个矩阵做矩阵相乘,

执行内核 设定的globalsize 是二维的,200*200,实验环境是centos  OpenCL 2.0 AMD-APP (1800.5)   AMD Accelerated Parallel Processing,

MaxItemSize=256,256,256,

故选取的globalsize global item size 是200,200.

size_t globalSize[2] = {200,200};
size_t localSize[1] = {200};
err = clEnqueueNDRangeKernel(commands, kernel, 2, NULL, globalSize, NULL, 0, NULL, NULL);


核函数:

__kernel void mmult(__global int* a, __global int* b, __global int* output)

{
int width = get_global_id(1);
int height = get_global_id(0);
int num_size = get_global_size(0);
int rank = 2000;
for (int total = 1;total<=10;total++)
{
int running = 0;
for(int num =0;num<2000;num++)
{
int aIndex = width*rank+num;
int bIndex = num*rank + height*10+total-1;
running +=  a[aIndex]*b[bIndex];
}
output[width*rank+height*10+total-1] = running;
}


return;

}



0 0
原创粉丝点击