OpenCL实例
来源:互联网 发布:知世头像 编辑:程序博客网 时间:2024/06/03 18:52
OpenCL培训实例,下面是Main函数。
#include <math.h>#include "CL\opencl.h"#include "utility.h"static const size_t vectorSize = 4096; //must be evenly disible by workSizestatic const size_t workSize = 256;//#define EXERCISE1int main(void){cl_int err;//Setup Platformcl_uint myPlatformCount;////////////// Exercise 1 Step 2.2 err = clGetPlatformIDs(0, NULL, &myPlatformCount);//Get Platform IDcl_platform_id myPlatform;////////////// Exercise 1 Step 2.3 err = clGetPlatformIDs(1, &myPlatform, NULL);assert(err==CL_SUCCESS);print_platform_info(&myPlatform);//Setup Devicecl_uint myDeviceCount;////////////// Exercise 1 Step 2.4err = clGetDeviceIDs(myPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &myDeviceCount);//Get Device IDcl_device_id myDevice;////////////// Exercise 1 Step 2.5 err = clGetDeviceIDs(myPlatform, CL_DEVICE_TYPE_ALL, 1, &myDevice, NULL);assert(err==CL_SUCCESS);print_device_info(&myDevice);//Create Context////////////// Exercise 1 Step 2.6 cl_context context = clCreateContext(0, 1, &myDevice, NULL, NULL, &err);assert(err==CL_SUCCESS);//Create Command queue////////////// Exercise 1 Step 2.7cl_command_queue queue = clCreateCommandQueue(context, myDevice, 0, &err);assert(err==CL_SUCCESS);////////////// Exercise 1 Step 2.8cl_mem kernelIn = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);assert(err==CL_SUCCESS);cl_mem kernelIn2 = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);assert(err==CL_SUCCESS);cl_mem kernelOut = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);assert(err==CL_SUCCESS);//Inputs and Outputs to Kernel, X and Y are inputs, Z is outputvoid *X, *Y, *Z;//Allocates memory with value from 0 to 1000float LO= 0; float HI=1000;allocate_generate(&X, &Y, &Z, LO, HI, vectorSize);//Create Buffers for input and output//Write data to device////////////// Exercise 1 Step 2.9err = clEnqueueWriteBuffer(queue, kernelIn, CL_FALSE, 0, sizeof(cl_float) * vectorSize, X, 0, NULL, NULL);err = clEnqueueWriteBuffer(queue, kernelIn2, CL_FALSE, 0, sizeof(cl_float) * vectorSize, Y, 0, NULL, NULL);clFinish(queue);assert(err==CL_SUCCESS);#ifndef EXERCISE1// create the kernelconst char *kernel_name = "SimpleKernel";size_t lengths;unsigned char* binaries = get_binary("SimpleKernel.aocx", &lengths);cl_int kernel_status;// Create the Program from the AOCX file.////////////////////// Exercise 2 Step 2.3 ///////////////////cl_program program = clCreateProgramWithBinary(context, 1, &myDevice, &lengths, (const unsigned char**)&binaries, &kernel_status, &err);assert(err==CL_SUCCESS); // build the program////////////// Compile the Kernel.... For Altera, nothing is done here, but this comforms to the standard////////////// Exercise 2 Step 2.4 ///////////////////err = clBuildProgram(program, 1, &myDevice, "", NULL, NULL);assert(err==CL_SUCCESS);// create the kernel////////////// Find Kernel in Program////////////// Exercise 2 Step 2.5 ///////////////////cl_kernel kernel = clCreateKernel(program, kernel_name, &err);assert(err==CL_SUCCESS);////////////// Set Arguments to the Kernels////////////// Exercise 2 Step 2.6 ///////////////////err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&kernelIn);assert(err==CL_SUCCESS);err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&kernelIn2);assert(err==CL_SUCCESS);err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&kernelOut);assert(err==CL_SUCCESS);printf("\nLaunching the kernel...\n");// launch kernel////////////// Exercise 2 Step 2.7 ///////////////////err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &vectorSize, &workSize, 0, NULL, NULL);clFinish(queue);assert(err==CL_SUCCESS);// read the output////////////// Exercise 2 Step 2.8 ///////////////////err = clEnqueueReadBuffer(queue, kernelOut, CL_TRUE, 0, sizeof(cl_float) * vectorSize, Z, 0, NULL, NULL);assert(err==CL_SUCCESS);void * CalcZ = malloc(sizeof(float)*vectorSize);for (int i=0; i<vectorSize; i++){////////////// Equivalent Code runnign on CPUs////////////// Exercise 2 Step 2.9 ///////////////////((float*) CalcZ)[i]=sin(((float*) X)[i] + ((float*) Y)[i]); }//Print Performance Resultsverification (X, Y, Z, CalcZ, vectorSize);// Clean up Stuffif(kernel) clReleaseKernel(kernel); if(program) clReleaseProgram(program);if(queue) clReleaseCommandQueue(queue);if(context) clReleaseContext(context);if(kernelIn) clReleaseMemObject(kernelIn);if(kernelOut) clReleaseMemObject(kernelOut);if(X) free (X);if(Y) free (Y);if(Z) free (Z);if(CalcZ) free (CalcZ);#endif return 1;}
下面是子函数utility.c文件。
// This file#include "utility.h"#include <math.h>// unsigned char* get_binary(const char * name, size_t* length){FILE *fp = fopen(name, "rb");assert (fp != NULL);fseek (fp, 0, SEEK_END);*length = ftell (fp);unsigned char *binaries = (unsigned char*)malloc(sizeof(unsigned char) **length);rewind (fp);fread (binaries, *length, 1, fp);fclose (fp);return binaries;}void print_platform_info(cl_platform_id* myPlatform){cl_int err;//Grab Platform Infochar myPlatformName[128];char myPlatformProfile[128];char myPlatformVersion[128];char myPlatformVendor[128];err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_NAME, 128 * sizeof(char), myPlatformName, NULL);err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_PROFILE, 128 * sizeof(char), myPlatformProfile, NULL);err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_VERSION, 128 * sizeof(char), myPlatformVersion, NULL);err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_VENDOR, 128 * sizeof(char), myPlatformVendor, NULL);printf ("\nPlatform Name: %s\n", myPlatformName);printf ("Platform Profile: %s\n", myPlatformProfile);printf ("Platform Version: %s\n", myPlatformVersion);printf ("Platform Vendor: %s\n", myPlatformVendor);}void print_device_info(cl_device_id* myDevice){cl_int err;//Get Device Propertieschar myDeviceVendor[128];cl_uint myDeviceMaxCU;cl_uint myDeviceMaxWID;char myDeviceName[128];char myDeviceVersion[128];cl_bool myDeviceAvailable;err = clGetDeviceInfo(*myDevice, CL_DEVICE_NAME, 128 * sizeof(char), myDeviceName, NULL);err = clGetDeviceInfo(*myDevice, CL_DEVICE_VENDOR, 128 * sizeof(char), myDeviceVendor, NULL);err = clGetDeviceInfo(*myDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &myDeviceMaxCU, NULL);err = clGetDeviceInfo(*myDevice, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &myDeviceMaxWID, NULL);err = clGetDeviceInfo(*myDevice, CL_DEVICE_VERSION, 128 * sizeof(char), myDeviceVersion, NULL);err = clGetDeviceInfo(*myDevice, CL_DEVICE_AVAILABLE, sizeof(cl_bool), &myDeviceAvailable, NULL);printf ("\nDevice Name: %s\n", myDeviceName);printf ("Device Vendor: %s\n", myDeviceVendor);printf ("Device Version: %s\n", myDeviceVersion);printf ("Device Available: %d\n", myDeviceAvailable);printf ("Device Max Compute Units: %d\n", myDeviceMaxCU);printf ("Device Max Work Item Dimensions: %d\n", myDeviceMaxWID);}void allocate_generate(void** X, void** Y, void** Z, float LO, float HI, size_t vectorSize){// allocate and initialize the input and output vectors*X = malloc(sizeof(float)*vectorSize);*Y = malloc(sizeof(float)*vectorSize);*Z = malloc(sizeof(float)*vectorSize);//Assigns randome number from LO to HI to all locatoin of X and Yfor (int i = 0; i < vectorSize; ++i) {((float *) *X)[i] = LO + (float)rand()/((float)RAND_MAX/(HI-LO));((float *) *Y)[i] = LO + (float)rand()/((float)RAND_MAX/(HI-LO));}}bool verification (void * X, void * Y, void * Z, void * CalcZ, size_t vectorSize){//Verify if OpenCL Calculation is Same as C Resultfor(int i = 0; i < vectorSize-4; i++) {if(fabs(((float*)CalcZ)[i] - ((float*)Z)[i]) > EPSILON) { printf("\nVERIFICATION FAILED! index %d, X:%f, Y:%f, OpenCL Result:%f != Result %f)", i, ((float*)X)[i], ((float*)Y)[i], ((float*)Z)[i], ((float*)CalcZ)[i]); return false;} }// Print 10 Sample Data to Standard Outprintf("\n\nVERIFICATION PASSED!!!\n\nSome Sample of Results\n");printf("------------------------------------\n");for (int i = 0; i < (int)vectorSize; i=i+((int)vectorSize)/5) {printf("Index %d: Input 1 is %f, Input 2 is %f, Result is %f\n", i, ((float*)X)[i], ((float*)Y)[i], ((float*)Z)[i]);}return true;}
需要原工程可以留下邮箱。
阅读全文
0 0
- OpenCL实例
- OpenCL程序实例
- opencl fft实例整理
- opencl 实例源码
- intel opencl hello程序实例
- OpenCL编程实例: 向量计算
- GPGPU OpenCL编程步骤与简单实例
- GPGPU OpenCL编程步骤与简单实例
- GPGPU OpenCL编程步骤与简单实例
- GPGPU OpenCL编程步骤与简单实例
- OpenCL
- OpenCL
- OpenCL
- OpenCL
- opencl
- OpenCL
- OpenCL
- OpenCL
- linux常用指令
- input框focus时的美化效果
- Python2和Python3的区别
- 【OpenCV入门教程之四】 ROI区域图像叠加&初级图像混合 全剖析
- hadoop2任务提交过程
- OpenCL实例
- hibernate
- centos7 下安装curl 和 composer
- 一致性HASH算法详解
- 在Github和Git上fork之简单指南
- opencv 内存图像操作
- HashMap、HashSet、TreeMap、TreeSet判断元素是否存在的逻辑
- 复仇之路——我一定要学会linux系统
- 1057. 数零壹(20)