OpenCL实现序列卷积
来源:互联网 发布:ubuntu 17.04 163源 编辑:程序博客网 时间:2024/06/06 08:34
在上一篇博客中讲解了卷积和滤波的区别,本文主要介绍利用OpenCL如何在GPU上实现序列卷积。
http://blog.csdn.net/u011028771/article/details/52733677
采用上文中的第一种方法实现:
host.c
#include<stdio.h>#include<CL/cl.h>#pragma warning( disable : 4996 )#define MATRIX_DIM 1*1024int main() { cl_int error; cl_platform_id platforms; cl_device_id devices; cl_context context; FILE *program_handle; size_t program_size; char *program_buffer; cl_program program; size_t log_size; char *program_log; char kernel_name[] = "createBuffer"; cl_kernel kernel; cl_command_queue queue; //获取平台和设备 error = clGetPlatformIDs(1, &platforms, NULL); error = clGetDeviceIDs(platforms, CL_DEVICE_TYPE_GPU, 1, &devices, NULL); if (error != 0) { printf("Get device failed!"); return -1; } //创建上下文 context = clCreateContext(NULL, 1, &devices, NULL, NULL, &error); if (error != 0) { printf("Creat context failed!"); return -1; } //创建程序 program_handle = fopen("kernel.cl", "rb"); if (program_handle == NULL) { printf("The kernle can not be opened!"); return -1; } fseek(program_handle, 0, SEEK_END); program_size = ftell(program_handle); rewind(program_handle); program_buffer = (char *)malloc(program_size + 1); program_buffer[program_size] = '\0'; error = fread(program_buffer, sizeof(char), program_size, program_handle); if (error == 0) { printf("Read kernel failed!"); return -1; } fclose(program_handle); program = clCreateProgramWithSource(context, 1, (const char **)&program_buffer, &program_size, &error); if (error < 0) { printf("Couldn't create the program!"); return -1; } free(program_buffer); //编译程序 error = clBuildProgram(program, 1, &devices, NULL, NULL, NULL); if (error < 0) { //确定日志文件的大小 clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); program_log = (char *)malloc(log_size + 1); program_log[log_size] = '\0'; //读取日志 clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG, log_size + 1, program_log, NULL); printf("%s\n", program_log); getchar(); free(program_log); return -1; } //创建命令队列 queue = clCreateCommandQueue(context, devices, CL_QUEUE_PROFILING_ENABLE, &error); if (error < 0) { printf("Coudn't create the command queue"); return -1; } //创建内核 kernel = clCreateKernel(program, kernel_name, &error); if (kernel == NULL) { printf("Couldn't create kernel!\n"); return -1; } //初始化参数 float result[MATRIX_DIM + 31]; float a_in[32]; float b_in[MATRIX_DIM]; float c_in[MATRIX_DIM + 31]; for (int i = 0; i < MATRIX_DIM; i++) { b_in[i] = i; } for (int i = 0; i < MATRIX_DIM + 31; i++) { c_in[i] = 0; result[i] = 0; } for (int i = 0; i < 32; i++) { a_in[i] = i*1.0; } for (int j = 0; j < MATRIX_DIM+31 ; j++) { for (int k = 0; k < 1024; k++) { if ( (j - k) >= 0 && (j - k) < 32) { c_in[j] += a_in[j - k] * b_in[k]; } } } printf("\n"); //创建缓存对象 cl_mem memObject1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * MATRIX_DIM, a_in, &error); if (error < 0) { printf("Creat memObject1 failed!\n"); return -1; } cl_mem memObject2 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * MATRIX_DIM, b_in, &error); if (error < 0) { printf("Creat memObject2 failed!\n"); return -1; } cl_mem memObject3 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * MATRIX_DIM, NULL, &error); if (error < 0) { printf("Creat memObject3 failed!\n"); return -1; } //设置内核参数 error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memObject1); error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &memObject2); error |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &memObject3); if (error != CL_SUCCESS) { printf("Error setting kernel arguments!\n"); return -1; } //执行内核 size_t globalWorkSize[1] = { 32 }; size_t localWorkSize[1] = { 32 }; error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL); if (error != CL_SUCCESS) { printf("Error queuing kernel for execution!\n"); return -1; } //读取执行结果 error = clEnqueueReadBuffer(queue, memObject3, CL_TRUE, 0, MATRIX_DIM * sizeof(float), result, 0, NULL, NULL); if (error != CL_SUCCESS) { printf("Error reading result buffer!\n"); return -1; } //显示结果 int check = 1; for (int i = 0; i < MATRIX_DIM; i++) { // printf("%f ", result[i]); if (c_in[i] != result[i]) { check = 0; } } printf("\n"); if (check) printf("successed!\n"); else printf("failed!\n"); clReleaseProgram(program); clReleaseContext(context); clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseMemObject(memObject1); clReleaseMemObject(memObject2); clReleaseMemObject(memObject3); return 0;}
kernel.cl
__kernel void createBuffer(__global const float *a_in, __global const float *b_in, __global float *result) { int gid = get_global_id(0); float a_tmp[32]; a_tmp[gid]= a_in[gid]; for (int i = 0; i+gid < 1024+31; i++) { result[i+gid] += a_tmp[gid]*b_in[i]; }}
其中c_in的实现是采用C语言实现的卷积,用于验证kernel计算结果。
代码写的不规范,但是作为实验,它实现了基本的功能。欢迎大家指正!
0 0
- OpenCL实现序列卷积
- 使用OpenCL+OpenCV实现图像卷积(一)
- 使用OpenCL+OpenCV实现图像卷积(二)
- 使用OpenCL+OpenCV实现图像卷积(三)
- C语言实现序列卷积
- OpenCL编程:图像卷积
- opencl _高斯核卷积
- OpenCL 优化后的卷积代码
- 实验一 离散时间序列卷积和MATLAB实现
- 离散序列的卷积
- OpenCL实现MapReduce算法
- 卷积实现
- openCL实现计算自然对数
- OpenCL之矩阵乘法实现
- opencl+opencv实现sobel算法
- OpenCL实现矩阵转置
- OpenCL
- OpenCL
- libev的使用
- Handlebars.js 预编译
- linux基础第一课
- typeof 的用法和用处
- ansys工作平面平移转动
- OpenCL实现序列卷积
- C. Polycarp at the Radio
- JAVA基本语法注意事项总结
- OJ-上海交大-1021. 从前有座山
- 2.第一个小程序(微信公众号开发实战)
- codeforces281CRectangle Puzzle+矩形旋转+面积交
- 289. Game of Life
- LayoutInflater的inflate()函数解释
- String类探讨