openCL-hello word

来源:互联网 发布:淘宝点结算没反应 编辑:程序博客网 时间:2024/05/29 18:59

忙了好久,期中考试考完了,要开始研究openCL了,这是自己的第一个hello word。就是数组的并行化,当然这里的kernal函数没有写在文件里,主要是程序不大,姑且就这样吧,便于阅读。以下是源代码

#include <iostream>#include <stdio.h>#include <stdlib.h>#include <CL/cl.h>const char* programSource="__kernel                                         \n""void vecadd(__global const float* A,             \n""__global const float* B,                         \n"" __global float* C)                              \n""{                                                \n""   int id = get_global_id(0);                    \n""   C[id] = A[id] + B[id];                        \n""}               \n";int main(){int *A = NULL; // 输入数组int *B = NULL; // 输入数组int *C = NULL; // 输出数组// 数组的大小const int  elements = 2048;// 计算内存大小size_t datasize = sizeof(int)*elements;// 分配内存空间A = (int*)malloc(datasize);B = (int*)malloc(datasize);C = (int*)malloc(datasize);// 初始化输入数组for(int i = 0;i < elements;i++){A[i] = std::rand();B[i] = std::rand();}// 获取并初始化平台cl_int status;cl_uint numPlatforms = 0;cl_platform_id *platforms = NULL;status = clGetPlatformIDs(0,NULL,&numPlatforms);platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));//status = clGetPlatformIDs(numPlatforms,platforms,NULL);cl_uint numDevices = 0;cl_device_id *devices = NULL;status = clGetDeviceIDs(platforms[0],CL_DEVICE_TYPE_ALL,0,NULL,&numDevices);// 分配内存空间devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id));status = clGetDeviceIDs(platforms[0],CL_DEVICE_TYPE_ALL,numDevices,devices,NULL);cl_context context = NULL;//创建上下文,管理设备之间的资料context = clCreateContext(NULL,numDevices,devices,NULL,NULL,&status);cl_command_queue cmdQueue;//创建命令队列cmdQueue = clCreateCommandQueue(context,devices[0],0,&status);//初始化数组内存cl_mem bufferA;cl_mem bufferB;cl_mem bufferC;bufferA = clCreateBuffer(context,CL_MEM_READ_ONLY,datasize,NULL,&status);bufferB = clCreateBuffer(context,CL_MEM_READ_ONLY,datasize,NULL,&status);bufferC = clCreateBuffer(context,CL_MEM_WRITE_ONLY,datasize,NULL,&status);//将主机端的数据写入设备status = clEnqueueWriteBuffer(cmdQueue,bufferA,CL_FALSE,0,datasize,A,0,NULL,NULL);status = clEnqueueWriteBuffer(cmdQueue,bufferB,CL_FALSE,0,datasize,B,0,NULL,NULL);//编译函数cl_program program = clCreateProgramWithSource(context,1,(const char**)&programSource,NULL,&status);status = clBuildProgram(program,numDevices,devices,NULL,NULL,NULL);//创建Kernel函数cl_kernel kernel = NULL;kernel = clCreateKernel(program,"vecadd",&status);//设置参数status = clSetKernelArg(kernel,0,sizeof(cl_mem),&bufferA);status = clSetKernelArg(kernel,1,sizeof(cl_mem),&bufferB);status = clSetKernelArg(kernel,2,sizeof(cl_mem),&bufferC);//初始化线程的映射size_t globalWorkSize[1];globalWorkSize[0] = elements;//运行kernelstatus = clEnqueueNDRangeKernel(cmdQueue,kernel,1,NULL,globalWorkSize,NULL,0,NULL,NULL);//从设备中读回数据结果clEnqueueReadBuffer(cmdQueue,bufferC,CL_TRUE,0,datasize,C,0,NULL,NULL);bool result = true;for(int i = 0;i < elements;i++){//std::cout<<C[i]<<std::endl;if(C[i]!=A[i]+B[i]){result = false;//break;}}if(result){printf("Output is correct\n");}else{printf("Output is incorrect\n");}//清理数据clReleaseKernel(kernel);clReleaseProgram(program);clReleaseCommandQueue(cmdQueue);clReleaseMemObject(bufferA);clReleaseMemObject(bufferB);clReleaseMemObject(bufferC);clReleaseContext(context);free(A);free(B);free(C);free(platforms);free(devices);return 0;}


 

原创粉丝点击