使用OpenCL+OpenCV实现图像卷积(二)
来源:互联网 发布:番茄时间管理 知乎 编辑:程序博客网 时间:2024/06/05 11:41
[题外话]近期申请了一个微信公众号:平凡程式人生。有兴趣的朋友可以关注,那里将会涉及更多更新OpenCL+OpenCV以及图像处理方面的文章。
3、kernel程序代码
Kernel程序是每个workitem需要执行的,它需要存储在以cl为后缀的文件中。该程序中kernel文件为ImageConvolution.cl。
Kernel内程序定义如下:
constsampler_t mysampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
__kernelvoidconvolution(
__read_only image2d_t sourceImage,
__write_only image2d_t outputImage,
int cols,
int rows,
__constant float*filter,
int filterWidth);
变量sourceImage和outputImage为输入、输出图像的buffer;
变量cols和rows是所需处理图像的宽度和高度;
变量*filter指向存储卷积核的buffer;
变量filterWidth为卷积核的宽度;
这里在私有空间,定义了一个sampler_t变量,用于从输入图像buffer中读取图像数据。如下面的code:
pixel =read_imageui(sourceImage, mysampler, coords);
Kernel程序定义如下:
1.const sampler_t mysampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; 2.__kernel void convolution( 3. __read_only image2d_t sourceImage, 4. __write_only image2d_t outputImage, 5. int cols, 6. int rows, 7. __constant float *filter, 8. int filterWidth) 9.{ 10. //Store each work-item's unique row and column 11. int column = get_global_id(0); 12. int row = get_global_id(1); 13. 14. //Each work-item iterates around its local area based on the size of the filter 15. int2 coords; //Coordinates for accessing the image 16. 17. //Half the width of the filter is needed for indexing memory later 18. int halfWidth = (int)(filterWidth / 2); 19. 20. //All accesses to images return data as four-element vector 21. //(i.e., float4), although only the 'x' component will contain meaningful data in this code 22. uint4 sum = {0, 0, 0, 0}; 23. 24. //Iterator for the filter 25. int filterIdx = 0; 26. 27. //Iterate the filter rows 28. for (int i = -halfWidth; i <= halfWidth; i++) { 29. coords.y = row + i; 30. 31. //Iterate over the filter columns 32. for (int j = -halfWidth; j <= halfWidth; j++) { 33. coords.x = column + j; 34. uint4 pixel; 35. 36. //Read a pixel from the image. A single channel image store the pixel 37. //in the 'x' coordinate of the returned vector 38. pixel = read_imageui(sourceImage, mysampler, coords); 39. sum.x += pixel.x * filter[filterIdx++]; 40. } 41. } 42. 43. //Copy the datat to the output image if the work-item is in bounds 44. if (row < rows && column < cols) 45. { 46. coords.x = column; 47. coords.y = row; 48. write_imageui(outputImage, coords, sum); 49. } 50.}
4、host端程序代码
Host端程序处理流程就是按照前面“程序设计”一节编写的。除了调用OpenCL+OpenCV的API函数,其他的地方都是按照C/C++语法编写的。
具体代码如下:
1.// ImageConvolution.cpp : 定义控制台应用程序的入口点。 2.// 3. 4.#include "stdafx.h" 5.#include <iostream> 6.#include <fstream> 7.#include <sstream> 8. 9.#include <opencv2/opencv.hpp> 10. 11.#ifdef __APPLE__ 12.#include <OpenCL/cl.h> 13.#else 14.#include <CL/cl.h> 15.#endif 16. 17.using namespace cv; 18. 19.void DisplayPlatformInfo( 20. cl_platform_id id, 21. cl_platform_info name, 22. std::string str) 23.{ 24. cl_int errNum; 25. std::size_t paramValueSize; 26. 27. errNum = clGetPlatformInfo(id, name, 0, NULL, ¶mValueSize); 28. if (errNum != CL_SUCCESS) { 29. std::cerr << "Failed to find OpenCL platform" 30. << str << "." << std::endl; 31. return; 32. } 33. 34. char *info = (char *)alloca(sizeof(char) * paramValueSize); 35. errNum = clGetPlatformInfo(id, name, paramValueSize, info, NULL); 36. if (errNum != CL_SUCCESS) { 37. std::cerr << "Failed to find OpenCL platform " 38. << str << "." << std::endl; 39. return; 40. } 41. 42. std::cout << "\t" << str << ":\t" << info << std::endl; 43. 44. return; 45.} 46. 47.int _tmain(int argc, _TCHAR* argv[]) 48.{ 49. cl_int ciErrNum; 50. const char *fileName = "ImageConvolution.cl"; 51. int width = 0, height = 0; 52. const char* imageName = "F:\\code\\pic\\test01.jpg"; 53. char *bufInput = NULL, *bufOutput = NULL; 54. const float filter[] = {-1,0,-1,0,4,0,-1,0,-1}; 55. 56. //read one jpeg pic and store it in a Mat variable. 57. Mat img = imread(imageName); 58. if (!img.data) { 59. std::cout << "fail to open the file:" << imageName << std::endl; 60. return -1; 61. } 62. 63. //the type of img is RGB, convert to gray image. 64. Mat imgGray; 65. cvtColor(img, imgGray, CV_BGR2GRAY); 66. width = imgGray.cols; 67. height = imgGray.rows; 68. std::cout << "picture width: " << width << ", height: " << height << std::endl; 69. 70. //save the source data of original gray image. 71. FILE *yuvFileOrg = NULL; 72. fopen_s(&yuvFileOrg, "gray_org.yuv", "wb"); 73. fwrite(imgGray.data, width * height * sizeof(unsigned char), 1, yuvFileOrg); 74. fclose(yuvFileOrg); 75. yuvFileOrg = NULL; 76. 77. //display the original gray image in a window. 78. namedWindow( imageName, CV_WINDOW_AUTOSIZE ); 79. imshow(imageName, imgGray); 80. //waitKey(0); 81. 82. //allocate the input buffer to store the original gray image 83. if (NULL == (bufInput = (char *)malloc(width * height * sizeof(char)))) { 84. std::cerr << "Failed to malloc buffer for input image. " << std::endl; 85. return -1; 86. } 87. 88. //allocate the output buffer to store the image rotated. 89. if (NULL == (bufOutput = (char *)malloc(width * height * sizeof(char)))) { 90. std::cerr << "Failed to malloc buffer for output image. " << std::endl; 91. return -1; 92. } 93. 94. //copy the data of gray image to the input buffer. initialize the output buffer by zero. 95. memcpy(bufInput, imgGray.data, width * height * sizeof(unsigned char)); 96. memset(bufOutput, 0x0, width * height * sizeof(unsigned char)); 97. 98. //use the first platform 99. cl_platform_id platform; 100. cl_platform_id *platforms; 101. cl_uint numPlatform; 102. ciErrNum = clGetPlatformIDs(0, NULL, &numPlatform); //get the number of platform 103. if (ciErrNum != CL_SUCCESS) { 104. std::cerr << "Failed to get the number of platform." << std::endl; 105. return -1; 106. } 107. std::cout << "The number of the platform is: " << numPlatform << std::endl; 108. 109. platforms = (cl_platform_id *)malloc(numPlatform * sizeof(cl_platform_id)); 110. ciErrNum = clGetPlatformIDs(numPlatform, platforms, NULL); 111. if (ciErrNum != CL_SUCCESS) { 112. std::cerr << "Failed to get the ID of platform." << std::endl; 113. return -1; 114. } 115. for(cl_uint i = 0; i < numPlatform; i++) { 116. std::cout << "The platform " << i << ":" << std::endl; 117. DisplayPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, "CL_PLATFORM_PROFILE"); 118. DisplayPlatformInfo(platforms[i], CL_PLATFORM_VERSION, "CL_PLATFORM_VERSION"); 119. DisplayPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, "CL_PLATFORM_VENDOR"); 120. DisplayPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, "CL_PLATFORM_EXTENSIONS"); 121. } 122. 123. platform = platforms[1]; //choose the platform of NVIDIA, support the OpenCL 1.2 124. 125. //use the first device 126. cl_device_id device; 127. ciErrNum = clGetDeviceIDs( 128. platform, 129. CL_DEVICE_TYPE_ALL, 130. 1, 131. &device, 132. NULL); 133. if (ciErrNum != CL_SUCCESS) { 134. std::cerr << "Failed to get the device." << std::endl; 135. return -1; 136. } 137. 138. cl_context_properties cps[3] = { 139. CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 140. }; 141. //create the context 142. cl_context ctx = clCreateContext( 143. cps, 144. 1, 145. &device, 146. NULL, 147. NULL, 148. &ciErrNum); 149. if (ciErrNum != CL_SUCCESS) { 150. std::cerr << "Failed to create the context." << std::endl; 151. return -1; 152. } 153. 154. //create the command queue 155. cl_command_queue myqueue = clCreateCommandQueue( 156. ctx, 157. device, 158. 0, 159. &ciErrNum); 160. if (ciErrNum != CL_SUCCESS) { 161. std::cerr << "Failed to create the queue." << std::endl; 162. return -1; 163. } 164. 165. //the convolution filter is 3x3 166. int filterWidth = 3; 167. int filterSize = filterWidth * filterWidth; //assume a square kernel 168. 169. //the image format describes how the data will be stored in memory 170. cl_image_format format; 171. format.image_channel_order = CL_R; //single channel 172. format.image_channel_data_type = CL_UNSIGNED_INT8; //float data type 173. cl_int status; 174. cl_image_desc desc; 175. desc.image_type = CL_MEM_OBJECT_IMAGE2D; 176. desc.image_width = width; 177. desc.image_height = height; 178. desc.image_depth = 0; 179. desc.image_array_size = 0; 180. desc.image_row_pitch = 0; 181. desc.image_slice_pitch = 0; 182. desc.num_mip_levels = 0; 183. desc.num_samples = 0; 184. desc.buffer = NULL; 185. 186. //create the image buffers for input and output data. 187. cl_mem bufferSourceImage = clCreateImage(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &format, &desc, bufInput, &status); 188. cl_mem bufferOutputImage = clCreateImage(ctx, CL_MEM_WRITE_ONLY, &format, &desc, bufOutput, &status); 189. 190. //create space for the 3x3 filter on the device 191. cl_mem bufferFilter = clCreateBuffer( 192. ctx, 193. 0, 194. filterSize * sizeof(float), 195. NULL, 196. NULL); 197. 198. //copy the source image to the device 199. size_t origin[3] = {0, 0, 0}; //offset within the image to copy from 200. size_t region[3] = {width, height, 1}; //elements to per dimension 201. 202. ciErrNum = clEnqueueWriteImage( 203. myqueue, 204. bufferSourceImage, 205. CL_FALSE, 206. origin, 207. region, 208. 0, 209. 0, 210. bufInput, 211. 0, 212. NULL, 213. NULL); 214. if (ciErrNum != CL_SUCCESS) { 215. std::cerr << "Failed to write the image to the device." << std::endl; 216. return -1; 217. } 218. 219. //copy the 3x3 filter to the device 220. ciErrNum = clEnqueueWriteBuffer( 221. myqueue, 222. bufferFilter, 223. CL_FALSE, 224. 0, 225. filterSize * sizeof(float), 226. filter, 227. 0, 228. NULL, 229. NULL); 230. if (ciErrNum != CL_SUCCESS) { 231. std::cerr << "Failed to write the filter to the device." << std::endl; 232. return -1; 233. } 234. 235. //open kernel file and read the content to a string variable. 236. std::ifstream kernelFile("ImageConvolution.cl", std::ios::in); 237. //std::ifstream kernelFile("ImageRotate.cl", std::ios::in); 238. if (!kernelFile.is_open()) { 239. std::cerr << "Failed to open file for reading: " << fileName << std::endl; 240. return NULL; 241. } 242. std::ostringstream oss; 243. oss << kernelFile.rdbuf(); 244. std::string srcStdStr = oss.str(); 245. const char *srcStr = srcStdStr.c_str(); 246. kernelFile.close(); 247. 248. //create the program with source code of kernel. 249. cl_program myprog = clCreateProgramWithSource( 250. ctx, 251. 1, 252. (const char**)&srcStr, 253. NULL, 254. &ciErrNum); 255. if (ciErrNum != CL_SUCCESS) { 256. std::cerr << "Failed to create the program." << std::endl; 257. return -1; 258. } 259. 260. //compile the program. passing NULL for the 'device_list' argument targets all devices in the context 261. ciErrNum = clBuildProgram(myprog, 0, NULL, NULL, NULL, NULL); 262. if (ciErrNum != CL_SUCCESS) { 263. std::cerr << "Failed to build the program." << std::endl; 264. return -1; 265. } 266. 267. //create the kernel 268. cl_kernel mykernel = clCreateKernel( 269. myprog, 270. "convolution", 271. &ciErrNum); 272. if (ciErrNum != CL_SUCCESS) { 273. std::cerr << "Failed to create the kernel." << std::endl; 274. return -1; 275. } 276. 277. //set the kernel arguments 278. clSetKernelArg(mykernel, 0, sizeof(cl_mem), (void *)&bufferSourceImage); 279. clSetKernelArg(mykernel, 1, sizeof(cl_mem), (void *)&bufferOutputImage); 280. clSetKernelArg(mykernel, 2, sizeof(cl_int), (void *)&width); 281. clSetKernelArg(mykernel, 3, sizeof(cl_int), (void *)&height); 282. clSetKernelArg(mykernel, 4, sizeof(cl_mem), (void *)&bufferFilter); 283. clSetKernelArg(mykernel, 5, sizeof(cl_int), (void *)&filterWidth); 284. 285. //set local and global workgroup sizes 286. size_t localws[2] = {1, 1}; 287. size_t globalws[2] = {width, height}; 288. 289. //execute the kernel 290. ciErrNum = clEnqueueNDRangeKernel( 291. myqueue, 292. mykernel, 293. 2, 294. NULL, 295. globalws, 296. localws, 297. 0, 298. NULL, 299. NULL); 300. if (ciErrNum != CL_SUCCESS) { 301. std::cerr << "Failed to execute the kernel." << std::endl; 302. return -1; 303. } 304. 305. //read the output data back to the host 306. ciErrNum = clEnqueueReadImage( 307. myqueue, 308. bufferOutputImage, 309. CL_TRUE, 310. origin, 311. region, 312. 0, 313. 0, 314. bufOutput, 315. 0, 316. NULL, 317. NULL); 318. if (ciErrNum != CL_SUCCESS) { 319. std::cerr << "Failed to read the image from the device." << std::endl; 320. return -1; 321. } 322. 323. //copy the output data from output buffer to Mat variable. 324. memcpy(imgGray.data, bufOutput, width * height * sizeof(unsigned char)); 325. 326. //save the source data for gray image rotated 327. FILE *yuvFile = NULL; 328. fopen_s(&yuvFile, "gray.yuv", "wb"); 329. fwrite(imgGray.data, width * height * sizeof(unsigned char), 1, yuvFile); 330. fclose(yuvFile); 331. yuvFile = NULL; 332. 333. //save the gray image rotated. 334. imwrite("test_gray.jpg", imgGray); 335. 336. //show the gray image rotated. 337. const char *winName = "gray_image_convolution"; 338. namedWindow(winName, CV_WINDOW_AUTOSIZE ); 339. imshow(winName, imgGray); 340. waitKey(0); 341. destroyAllWindows(); 342. 343. //release all resource 344. if (bufInput != NULL) 345. free(bufInput); 346. 347. if (bufOutput != NULL) 348. free(bufOutput); 349. 350. if (bufferSourceImage != 0) 351. clReleaseMemObject(bufferSourceImage); 352. 353. if (bufferOutputImage != 0) 354. clReleaseMemObject(bufferOutputImage); 355. 356. if (bufferFilter != 0) 357. clReleaseMemObject(bufferFilter); 358. 359. if (myqueue != 0) 360. clReleaseCommandQueue(myqueue); 361. 362. if (mykernel != 0) 363. clReleaseKernel(mykernel); 364. 365. if (myprog != 0) 366. clReleaseProgram(myprog); 367. 368. if (ctx != 0) 369. clReleaseContext(ctx); 370. 371. return 0; 372.}
(未完待续)
- 使用OpenCL+OpenCV实现图像卷积(二)
- 使用OpenCL+OpenCV实现图像卷积(一)
- 使用OpenCL+OpenCV实现图像卷积(三)
- 使用OpenCL+OpenCV实现图像旋转(二)
- 使用OpenCL+OpenCV实现图像旋转(一)
- OpenCL编程:图像卷积
- OpenCL实现序列卷积
- opencv filter2D函数实现图像卷积
- OpenCV图像变换(一)卷积
- OPENCV图像卷积运算
- opencv图像处理-卷积
- AM5728/AM57XX openCV使用opencl案例二(Canny算法)
- Opencv 实现图像的离散傅里叶变换(DFT)、卷积运算(相关滤波)
- Opencv 实现图像的离散傅里叶变换(DFT)、卷积运算(相关滤波)
- Opencv 实现图像的离散傅里叶变换(DFT)、卷积运算(相关滤波)
- 通过傅里叶变换方法求图像卷积-OpenCV实现
- OpenCV官方文档研究(二)卷积
- opencv图像处理8-卷积
- codeforces 814B An express train to reveries
- 记第一次codeforces比赛经历
- DispatcherServlet 源码分析(三)
- MySQL 5.7 版本的安装及简单使用(图文教程)
- 计算机原码、反码、补码详解
- 使用OpenCL+OpenCV实现图像卷积(二)
- Unity说明文档翻译-Script Execution Order Settings
- MySQL各个版本的区别
- 发送有序广播
- codeforces 814D An overnight dance in discotheque
- 【正一专栏】我的高考作文——在广州的乡下生活
- Jquery使用
- BS与CS的联系与区别
- CDN 资源分发性能优越的网站