opencl矩阵点乘运算及遇到的问题总结

来源:互联网 发布:认真对待权利 知乎 编辑:程序博客网 时间:2024/04/28 18:41

matvec

#define PROGRAM_FILE "matvec.cl"#define KERNEL_FUNC "matvec_mult"#include <stdio.h>#include<stdlib.h>#include <string.h>#include<sys/types.h>#include <CL/cl.h>int main(){cl_platform_id platform;cl_device_id device;cl_context context;cl_command_queue queue;cl_int i,err;cl_program program;FILE* program_handle;char* program_buffer,*program_log;    size_t program_size,log_size;cl_kernel kernel;size_t work_units_per_kernel;float mat[16],vec[4],result[4];//计算矩阵乘法的因子及最后的结果float correct[4] = {0.0f,0.0f,0.0f,0.0f};cl_mem mat_buff,vec_buff,res_buff;//内核参数for(i = 0;i<16;i++){mat[i]=i*2.0f;}for(i=0;i<4;i++){vec[i]=i*3.0f;correct[0]+=mat[i]*vec[i];correct[1]+=mat[i+4]*vec[i];correct[2]+=mat[i+8]*vec[i];correct[3]+=mat[i+12]*vec[i];}//配置平台,配置上下文clGetPlatformIDs(1,&platform,NULL);clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);context = clCreateContext(NULL,1,&device,NULL,NULL,&err);//读取程序文件program_handle = fopen(PROGRAM_FILE,"r");fseek(program_handle,0,SEEK_END);program_size = ftell(program_handle);rewind(program_handle);program_buffer = (char*)malloc(program_size+1);program_buffer[program_size] = '\0';fread(program_buffer,sizeof(char),program_size,program_handle);fclose(program_handle);//编译程序program = clCreateProgramWithSource(context,1,(const char**)&program_buffer,&program_size,&err);free(program_buffer);//创建内核队列clBuildProgram(program,0,NULL,NULL,NULL,NULL);//创建内核队列kernel = clCreateKernel(program,KERNEL_FUNC,&err);queue = clCreateCommandQueue(context,device,0,&err);//设置内核参数mat_buff = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,sizeof(float)*16,mat,&err);vec_buff = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,sizeof(float)*4,vec,&err);res_buff = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,sizeof(float)*4,vec,&err);clSetKernelArg(kernel,0,sizeof(cl_mem),&mat_buff);clSetKernelArg(kernel,1,sizeof(cl_mem),&vec_buff);clSetKernelArg(kernel,2,sizeof(cl_mem),&res_buff);work_units_per_kernel = 4;clEnqueueNDRangeKernel(queue,kernel,1,NULL,&work_units_per_kernel,NULL,0,NULL,NULL);clEnqueueReadBuffer(queue,res_buff,CL_TRUE,0,sizeof(float)*4,result,0,NULL,NULL);if(result[0]==correct[0]&&result[1]==correct[1]&&result[2]==correct[2]&&result[3]==correct[3]){printf("Matrix-vector multiplication successful.\n");}else{printf("Matrix-vector multiplication unsuccessful.\n");}clReleaseMemObject(res_buff);clReleaseMemObject(vec_buff);clReleaseMemObject(mat_buff);clReleaseKernel(kernel);clReleaseCommandQueue(queue);clReleaseProgram(program);clReleaseContext(context);system("pause");return 0;}

matvec.cl
__kernel void matvec_mult(__global float4* matrix,__global float4* vector,__global float* result){     int i = get_global_id(0);     result[i] = dot(matrix[i],vector[0]);}
注意在VS下保存.cl文件时要把高级保存选项选为 unicode-8 unix(LF)

0 0
原创粉丝点击