openCL实现计算自然对数

来源:互联网 发布:2018经济危机 知乎 编辑:程序博客网 时间:2024/05/22 17:09


主要是之前老师让不同的方法实现计算自然对数,了解不同并行语言的特点。所以在用了多线程,openMP后,想用opencL实现以下,先介绍一下算法

 

 

方法一.

 代码主机端

/*   项目:openCL的矩阵相乘   作者:刘荣   时间:2012.11.20*/#include <iostream>#include<time.h>#include <string> #include<math.h>#include <vector>#include <CL/cl.h>#include <fstream>using namespace std;//kernel函数std::stringconvertToString(const char *filename)//将kernel源码,即自己写的并行化的函数,转化成字符串{    size_t size;    char*  str;    std::string s;    std::fstream f(filename, (std::fstream::in | std::fstream::binary));    if(f.is_open())    {        size_t fileSize;        f.seekg(0, std::fstream::end);        size = fileSize = (size_t)f.tellg();        f.seekg(0, std::fstream::beg);        str = new char[size+1];        if(!str)        {            f.close();            std::cout << "Memory allocation failed";            return NULL;        }        f.read(str, fileSize);        f.close();        str[size] = '\0';            s = str;        delete[] str;        return s;    }    else    {        std::cout << "\nFile containg the kernel code(\".cl\") not found. Please copy the required file in the folder containg the executable.\n";        exit(1);    }    return NULL;}int main(){//double start,end,time1,time2;//查询平台cl_int ciErrNum;cl_platform_id platform;ciErrNum = clGetPlatformIDs(1, &platform, NULL);if(ciErrNum != CL_SUCCESS){cout<<"获取设备失败"<<endl;return 0;}//获取设备信息cl_device_id device;cl_int   status;    cl_uint maxDims;    cl_event events[3];    size_t globalThreads[1];    size_t localThreads[1];    size_t maxWorkGroupSize;    size_t maxWorkItemSizes[3];  //创建设备ciErrNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);  //创建上下文cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0};cl_context ctx = clCreateContext(cps, 1, &device, NULL, NULL, &ciErrNum);if(ciErrNum != CL_SUCCESS){cout<<"创建上下文失败"<<endl;return 0;}cl_command_queue myqueue = clCreateCommandQueue(ctx,device,0,&ciErrNum);if(ciErrNum != CL_SUCCESS){cout<<"命令队列失败"<<endl;return 0;}//声明buffer,传输数据double *C = NULL; // 输出数组int MaxItem=1000;int StepNum = 100000000;size_t datasize = sizeof(double)*MaxItem*2;// 分配内存空间C = (double*)malloc(datasize);// 初始化输入数组cl_mem bufferC = clCreateBuffer(ctx,CL_MEM_WRITE_ONLY,datasize,NULL,&ciErrNum);//运行时kernel编译const char * filename  = "CaluE.cl";    std::string  sourceStr = convertToString(filename);    const char * source    = sourceStr.c_str();    size_t sourceSize[]    = { strlen(source) };//直接将CL文件读到记忆体    cl_program myprog = clCreateProgramWithSource(                  ctx,                   1,                   &source,                  sourceSize,                  &ciErrNum);//cl_program myprog = clCreateProgramWithSource(ctx,1,(const char**)&programSource,NULL,&ciErrNum);if(ciErrNum != 0){cout<<"createprogram failed"<<endl;}ciErrNum = clBuildProgram(myprog,0,NULL,NULL,NULL,NULL);if(ciErrNum != 0){cout<<"clBuildProgram failed"<<endl;}cl_kernel mykernel = clCreateKernel(myprog,"CaluE",&ciErrNum);if(ciErrNum != 0){cout<<"clCreateKernel failed"<<endl;}//运行程序,设置参数clSetKernelArg(mykernel,0,sizeof(cl_mem),(void*)&bufferC);clSetKernelArg(mykernel,1,sizeof(int),&StepNum);clSetKernelArg(mykernel,2,sizeof(int),&MaxItem);    size_t globalWorkSize[1];globalWorkSize[0] = MaxItem;//start = clock();ciErrNum = clEnqueueNDRangeKernel(myqueue,mykernel,1,NULL,globalWorkSize,NULL,0,NULL,&events[0]);if(ciErrNum != 0){cout<<"clEnqueueNDRangeKernel failed"<<endl;}//时间同步status = clWaitForEvents(1, &events[0]);    if(status != CL_SUCCESS)     {         std::cout <<            "Error: Waiting for kernel run to finish. \            (clWaitForEvents0)\n";        return 0;    }    cout<<"o"<<endl;   status = clReleaseEvent(events[0]);//将结果拷贝到主机端end = clock();time1=end-start;cout<<"shijian "<<time1<<endl;ciErrNum = clEnqueueReadBuffer(myqueue,bufferC,CL_TRUE,0,datasize,C,0,NULL,&events[1]);status = clWaitForEvents(1, &events[1]);    if(status != CL_SUCCESS)     {         std::cout <<            "Error: Waiting for read buffer call to finish. \            (clWaitForEvents1)n";        return 0;    }    status = clReleaseEvent(events[1]);    if(status != CL_SUCCESS)     {         std::cout <<            "Error: Release event object. \            (clReleaseEvent)\n";        return 0;    }double e=0;double result = 0;double temp = 1;//for(int i=0; i<MaxItem; i++){ result = C[i*2];e += (1/temp)*result;temp = C[i*2+1];}printf("e = %1.22f",e);return 0;}

kernel函数

// Enter your kernel in this window__kernelvoid CaluE(__global double* result,           int StepNum,           int MaxItem ){    int id=get_global_id(0);    double start,end,res;int offest = StepNum/MaxItem;//获得所求的初末start = id+1;end = id+offest;//开始计算res = 0;double fact = 1;for(int i = start; i < end; i++){fact *= i;res += (1.0/fact);}//传回       result[id*2] = res; result[id*2+1] = fact;        barrier(CLK_LOCAL_MEM_FENCE);  };


 

方法二

主机端程序

/*   项目:openCL的矩阵相乘   作者:刘荣   时间:2012.11.20*/#include <iostream>#include<time.h>#include <string> #include<math.h>#include <vector>#include <CL/cl.h>#include <fstream>using namespace std;//kernel函数std::stringconvertToString(const char *filename)//将kernel源码,即自己写的并行化的函数,转化成字符串{    size_t size;    char*  str;    std::string s;    std::fstream f(filename, (std::fstream::in | std::fstream::binary));    if(f.is_open())    {        size_t fileSize;        f.seekg(0, std::fstream::end);        size = fileSize = (size_t)f.tellg();        f.seekg(0, std::fstream::beg);        str = new char[size+1];        if(!str)        {            f.close();            std::cout << "Memory allocation failed";            return NULL;        }        f.read(str, fileSize);        f.close();        str[size] = '\0';            s = str;        delete[] str;        return s;    }    else    {        std::cout << "\nFile containg the kernel code(\".cl\") not found. Please copy the required file in the folder containg the executable.\n";        exit(1);    }    return NULL;}int main(){//double start,end,time1,time2;//查询平台cl_int ciErrNum;cl_platform_id platform;ciErrNum = clGetPlatformIDs(1, &platform, NULL);if(ciErrNum != CL_SUCCESS){cout<<"获取设备失败"<<endl;return 0;}//获取设备信息cl_device_id device;cl_int   status;    cl_uint maxDims;    cl_event events[3];    size_t globalThreads[1];    size_t localThreads[1];    size_t maxWorkGroupSize;    size_t maxWorkItemSizes[3];  //创建设备ciErrNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, NULL);  //创建上下文cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0};cl_context ctx = clCreateContext(cps, 1, &device, NULL, NULL, &ciErrNum);if(ciErrNum != CL_SUCCESS){cout<<"创建上下文失败"<<endl;return 0;}cl_command_queue myqueue = clCreateCommandQueue(ctx,device,0,&ciErrNum);if(ciErrNum != CL_SUCCESS){cout<<"命令队列失败"<<endl;return 0;}//声明buffer,传输数据double *C = NULL; // 输出数组int MaxItem=10;int StepNum = 1000000000;size_t datasize = sizeof(double)*MaxItem;// 分配内存空间C = (double*)malloc(datasize);// 初始化输入数组cl_mem bufferC = clCreateBuffer(ctx,CL_MEM_WRITE_ONLY,datasize*sizeof(float),NULL,&ciErrNum);//运行时kernel编译const char * filename  = "CaluE.cl";    std::string  sourceStr = convertToString(filename);    const char * source    = sourceStr.c_str();    size_t sourceSize[]    = { strlen(source) };//直接将CL文件读到记忆体    cl_program myprog = clCreateProgramWithSource(                  ctx,                   1,                   &source,                  sourceSize,                  &ciErrNum);//cl_program myprog = clCreateProgramWithSource(ctx,1,(const char**)&programSource,NULL,&ciErrNum);if(ciErrNum != 0){cout<<"createprogram failed"<<endl;}ciErrNum = clBuildProgram(myprog,0,NULL,NULL,NULL,NULL);if(ciErrNum != 0){cout<<"clBuildProgram failed"<<endl;}cl_kernel mykernel = clCreateKernel(myprog,"CaluE",&ciErrNum);if(ciErrNum != 0){cout<<"clCreateKernel failed"<<endl;}//运行程序,设置参数clSetKernelArg(mykernel,0,sizeof(cl_mem),(void*)&bufferC);clSetKernelArg(mykernel,1,sizeof(int),&StepNum);clSetKernelArg(mykernel,2,sizeof(int),&MaxItem);    size_t globalWorkSize[1];globalWorkSize[0] = MaxItem;//start = clock();ciErrNum = clEnqueueNDRangeKernel(myqueue,mykernel,1,NULL,globalWorkSize,NULL,0,NULL,&events[0]);if(ciErrNum != 0){cout<<"clEnqueueNDRangeKernel failed"<<endl;}//时间同步status = clWaitForEvents(1, &events[0]);    if(status != CL_SUCCESS)     {         std::cout <<            "Error: Waiting for kernel run to finish. \            (clWaitForEvents0)\n";        return 0;    }    cout<<"o"<<endl;   status = clReleaseEvent(events[0]);//将结果拷贝到主机端end = clock();time1=end-start;cout<<"shijian "<<time1<<endl;ciErrNum = clEnqueueReadBuffer(myqueue,bufferC,CL_TRUE,0,datasize,C,0,NULL,&events[1]);status = clWaitForEvents(1, &events[1]);    if(status != CL_SUCCESS)     {         std::cout <<            "Error: Waiting for read buffer call to finish. \            (clWaitForEvents1)n";        return 0;    }    status = clReleaseEvent(events[1]);    if(status != CL_SUCCESS)     {         std::cout <<            "Error: Release event object. \            (clReleaseEvent)\n";        return 0;    }double e=0;//for(int i=0; i<MaxItem; i++){ cout<< C[i]<<endl;e += C[i];}printf("e = %1.22f",e);return 0;}


 

kernel函数

// Enter your kernel in this window__kernelvoid CaluE(__global double* result,           int StepNum,           int MaxItem ){    int id = get_global_id(0);    float fact = 1;double e = 0;for(int i = id+1; i <= StepNum;i+=MaxItem){for(int j=0; j<MaxItem && j<i;j++)        {              fact *= (i-j);        }        e += (1.0/fact);  }result[id] = e;barrier(CLK_LOCAL_MEM_FENCE);  };


 


// Enter your kernel in this window__kernelvoid CaluE(__global double* result,           int StepNum,           int MaxItem ){    int id=get_global_id(0);    double start,end,res;int offest = StepNum/MaxItem;//获得所求的初末start = id+1;end = id+offest;//开始计算res = 0;double fact = 1;for(int i = start; i < end; i++){fact *= i;res += (1.0/fact);}//传回       result[id*2] = res; result[id*2+1] = fact;        barrier(CLK_LOCAL_MEM_FENCE);  };