OpenCL实例

来源:互联网 发布:知世头像 编辑:程序博客网 时间:2024/06/03 18:52
OpenCL培训实例,下面是Main函数。
#include <math.h>#include "CL\opencl.h"#include "utility.h"static const size_t vectorSize = 4096; //must be evenly disible by workSizestatic const size_t workSize = 256;//#define EXERCISE1int main(void){cl_int err;//Setup Platformcl_uint myPlatformCount;////////////// Exercise 1 Step 2.2 err = clGetPlatformIDs(0, NULL, &myPlatformCount);//Get Platform IDcl_platform_id myPlatform;////////////// Exercise 1 Step 2.3 err = clGetPlatformIDs(1, &myPlatform, NULL);assert(err==CL_SUCCESS);print_platform_info(&myPlatform);//Setup Devicecl_uint myDeviceCount;////////////// Exercise 1 Step 2.4err = clGetDeviceIDs(myPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &myDeviceCount);//Get Device IDcl_device_id myDevice;////////////// Exercise 1 Step 2.5 err = clGetDeviceIDs(myPlatform, CL_DEVICE_TYPE_ALL, 1, &myDevice, NULL);assert(err==CL_SUCCESS);print_device_info(&myDevice);//Create Context////////////// Exercise 1 Step 2.6 cl_context context = clCreateContext(0, 1, &myDevice, NULL, NULL, &err);assert(err==CL_SUCCESS);//Create Command queue////////////// Exercise 1 Step 2.7cl_command_queue queue = clCreateCommandQueue(context, myDevice, 0, &err);assert(err==CL_SUCCESS);////////////// Exercise 1 Step 2.8cl_mem kernelIn = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);assert(err==CL_SUCCESS);cl_mem kernelIn2 = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);assert(err==CL_SUCCESS);cl_mem kernelOut = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float) * vectorSize, NULL, &err);assert(err==CL_SUCCESS);//Inputs and Outputs to Kernel, X and Y are inputs, Z is outputvoid *X, *Y, *Z;//Allocates memory with value from 0 to 1000float LO= 0;   float HI=1000;allocate_generate(&X, &Y, &Z, LO, HI, vectorSize);//Create Buffers for input and output//Write data to device////////////// Exercise 1 Step 2.9err = clEnqueueWriteBuffer(queue, kernelIn, CL_FALSE, 0, sizeof(cl_float) * vectorSize, X, 0, NULL, NULL);err = clEnqueueWriteBuffer(queue, kernelIn2, CL_FALSE, 0, sizeof(cl_float) * vectorSize, Y, 0, NULL, NULL);clFinish(queue);assert(err==CL_SUCCESS);#ifndef EXERCISE1// create the kernelconst char *kernel_name = "SimpleKernel";size_t lengths;unsigned char* binaries = get_binary("SimpleKernel.aocx", &lengths);cl_int kernel_status;// Create the Program from the AOCX file.////////////////////// Exercise 2 Step 2.3    ///////////////////cl_program program = clCreateProgramWithBinary(context, 1, &myDevice, &lengths, (const unsigned char**)&binaries, &kernel_status, &err);assert(err==CL_SUCCESS);  // build the program//////////////      Compile the Kernel.... For Altera, nothing is done here, but this comforms to the standard//////////////       Exercise 2   Step 2.4    ///////////////////err = clBuildProgram(program, 1, &myDevice, "", NULL, NULL);assert(err==CL_SUCCESS);// create the kernel//////////////       Find Kernel in Program//////////////       Exercise 2   Step 2.5    ///////////////////cl_kernel kernel = clCreateKernel(program, kernel_name, &err);assert(err==CL_SUCCESS);//////////////     Set Arguments to the Kernels//////////////       Exercise 2   Step 2.6    ///////////////////err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&kernelIn);assert(err==CL_SUCCESS);err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*)&kernelIn2);assert(err==CL_SUCCESS);err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*)&kernelOut);assert(err==CL_SUCCESS);printf("\nLaunching the kernel...\n");// launch kernel//////////////       Exercise 2   Step 2.7    ///////////////////err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &vectorSize, &workSize, 0, NULL, NULL);clFinish(queue);assert(err==CL_SUCCESS);// read the output//////////////       Exercise 2   Step 2.8    ///////////////////err = clEnqueueReadBuffer(queue, kernelOut, CL_TRUE, 0, sizeof(cl_float) * vectorSize, Z, 0, NULL, NULL);assert(err==CL_SUCCESS);void * CalcZ = malloc(sizeof(float)*vectorSize);for (int i=0; i<vectorSize; i++){//////////////  Equivalent Code runnign on CPUs//////////////       Exercise 2   Step 2.9    ///////////////////((float*) CalcZ)[i]=sin(((float*) X)[i] + ((float*) Y)[i]); }//Print Performance Resultsverification (X, Y, Z, CalcZ, vectorSize);// Clean up Stuffif(kernel) clReleaseKernel(kernel);  if(program) clReleaseProgram(program);if(queue) clReleaseCommandQueue(queue);if(context) clReleaseContext(context);if(kernelIn) clReleaseMemObject(kernelIn);if(kernelOut) clReleaseMemObject(kernelOut);if(X) free (X);if(Y) free (Y);if(Z) free (Z);if(CalcZ) free (CalcZ);#endif    return 1;}


下面是子函数utility.c文件。
 

// This file#include "utility.h"#include <math.h>// unsigned char* get_binary(const char * name, size_t* length){FILE *fp = fopen(name, "rb");assert (fp != NULL);fseek (fp, 0, SEEK_END);*length = ftell (fp);unsigned char *binaries = (unsigned char*)malloc(sizeof(unsigned char) **length);rewind (fp);fread (binaries, *length, 1, fp);fclose (fp);return binaries;}void print_platform_info(cl_platform_id* myPlatform){cl_int err;//Grab Platform Infochar myPlatformName[128];char myPlatformProfile[128];char myPlatformVersion[128];char myPlatformVendor[128];err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_NAME, 128 * sizeof(char), myPlatformName, NULL);err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_PROFILE, 128 * sizeof(char), myPlatformProfile, NULL);err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_VERSION, 128 * sizeof(char), myPlatformVersion, NULL);err = clGetPlatformInfo(*myPlatform, CL_PLATFORM_VENDOR, 128 * sizeof(char), myPlatformVendor, NULL);printf ("\nPlatform Name: %s\n", myPlatformName);printf ("Platform Profile: %s\n", myPlatformProfile);printf ("Platform Version: %s\n", myPlatformVersion);printf ("Platform Vendor: %s\n", myPlatformVendor);}void print_device_info(cl_device_id* myDevice){cl_int err;//Get Device Propertieschar myDeviceVendor[128];cl_uint myDeviceMaxCU;cl_uint myDeviceMaxWID;char myDeviceName[128];char myDeviceVersion[128];cl_bool myDeviceAvailable;err = clGetDeviceInfo(*myDevice, CL_DEVICE_NAME, 128 * sizeof(char), myDeviceName, NULL);err = clGetDeviceInfo(*myDevice, CL_DEVICE_VENDOR, 128 * sizeof(char), myDeviceVendor, NULL);err = clGetDeviceInfo(*myDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &myDeviceMaxCU, NULL);err = clGetDeviceInfo(*myDevice, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &myDeviceMaxWID, NULL);err = clGetDeviceInfo(*myDevice, CL_DEVICE_VERSION, 128 * sizeof(char), myDeviceVersion, NULL);err = clGetDeviceInfo(*myDevice, CL_DEVICE_AVAILABLE, sizeof(cl_bool), &myDeviceAvailable, NULL);printf ("\nDevice Name: %s\n", myDeviceName);printf ("Device Vendor: %s\n", myDeviceVendor);printf ("Device Version: %s\n", myDeviceVersion);printf ("Device Available: %d\n", myDeviceAvailable);printf ("Device Max Compute Units: %d\n", myDeviceMaxCU);printf ("Device Max Work Item Dimensions: %d\n", myDeviceMaxWID);}void allocate_generate(void** X, void** Y, void** Z, float LO, float HI, size_t vectorSize){// allocate and initialize the input and output vectors*X = malloc(sizeof(float)*vectorSize);*Y = malloc(sizeof(float)*vectorSize);*Z = malloc(sizeof(float)*vectorSize);//Assigns randome number from LO to HI to all locatoin of X and Yfor (int i = 0; i < vectorSize; ++i) {((float *) *X)[i] =  LO + (float)rand()/((float)RAND_MAX/(HI-LO));((float *) *Y)[i] =  LO + (float)rand()/((float)RAND_MAX/(HI-LO));}}bool verification (void * X, void * Y, void * Z, void * CalcZ, size_t vectorSize){//Verify if OpenCL Calculation is Same as C Resultfor(int i = 0; i < vectorSize-4; i++) {if(fabs(((float*)CalcZ)[i] - ((float*)Z)[i]) > EPSILON) { printf("\nVERIFICATION FAILED! index %d, X:%f, Y:%f, OpenCL Result:%f != Result %f)",  i, ((float*)X)[i], ((float*)Y)[i], ((float*)Z)[i], ((float*)CalcZ)[i]); return false;}    }// Print 10 Sample Data to Standard Outprintf("\n\nVERIFICATION PASSED!!!\n\nSome Sample of Results\n");printf("------------------------------------\n");for (int i = 0; i < (int)vectorSize; i=i+((int)vectorSize)/5) {printf("Index %d: Input 1 is %f, Input 2 is %f, Result is %f\n", i, ((float*)X)[i], ((float*)Y)[i], ((float*)Z)[i]);}return true;}
需要原工程可以留下邮箱。