使用OpenCL+OpenCV实现图像卷积（二）

来源：互联网发布：番茄时间管理知乎编辑：程序博客网时间：2024/06/05 11:41

[题外话]近期申请了一个微信公众号：平凡程式人生。有兴趣的朋友可以关注，那里将会涉及更多更新OpenCL+OpenCV以及图像处理方面的文章。

3、kernel程序代码

Kernel程序是每个workitem需要执行的，它需要存储在以cl为后缀的文件中。该程序中kernel文件为ImageConvolution.cl。

Kernel内程序定义如下：

constsampler_t mysampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

__kernelvoidconvolution(

__read_only image2d_t sourceImage,

__write_only image2d_t outputImage,

int cols,

int rows,

__constant float*filter,

int filterWidth)；

变量sourceImage和outputImage为输入、输出图像的buffer；

变量cols和rows是所需处理图像的宽度和高度；

变量*filter指向存储卷积核的buffer；

变量filterWidth为卷积核的宽度；

这里在私有空间，定义了一个sampler_t变量，用于从输入图像buffer中读取图像数据。如下面的code：

pixel =read_imageui(sourceImage, mysampler, coords);

Kernel程序定义如下：

1.const sampler_t mysampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;  2.__kernel void convolution(  3.    __read_only image2d_t sourceImage,  4.    __write_only image2d_t outputImage,  5.    int cols,  6.    int rows,     7.    __constant float *filter,  8.    int filterWidth)  9.{  10.    //Store each work-item's unique row and column  11.    int column = get_global_id(0);  12.    int row = get_global_id(1);  13.  14.    //Each work-item iterates around its local area based on the size of the filter  15.    int2 coords; //Coordinates for accessing the image  16.      17.    //Half the width of the filter is needed for indexing memory later  18.    int halfWidth = (int)(filterWidth / 2);  19.  20.    //All accesses to images return data as four-element vector  21.    //(i.e., float4), although only the 'x' component will contain meaningful data in this code  22.    uint4 sum = {0, 0, 0, 0};     23.  24.    //Iterator for the filter  25.    int filterIdx = 0;    26.  27.    //Iterate the filter rows  28.    for (int i = -halfWidth; i <= halfWidth; i++) {  29.        coords.y = row + i;  30.  31.        //Iterate over the filter columns  32.        for (int j = -halfWidth; j <= halfWidth; j++) {  33.            coords.x = column + j;  34.            uint4 pixel;  35.  36.            //Read a pixel from the image. A single channel image store the pixel  37.            //in the 'x' coordinate of the returned vector  38.            pixel = read_imageui(sourceImage, mysampler, coords);  39.            sum.x += pixel.x * filter[filterIdx++];           40.        }  41.    }  42.  43.    //Copy the datat to the output image if the work-item is in bounds  44.    if (row < rows && column < cols)   45.    {  46.        coords.x = column;  47.        coords.y = row;  48.        write_imageui(outputImage, coords, sum);  49.    }  50.}

4、host端程序代码

Host端程序处理流程就是按照前面“程序设计”一节编写的。除了调用OpenCL+OpenCV的API函数，其他的地方都是按照C/C++语法编写的。

具体代码如下：

1.// ImageConvolution.cpp : 定义控制台应用程序的入口点。  2.//  3.  4.#include "stdafx.h"  5.#include <iostream>  6.#include <fstream>  7.#include <sstream>  8.  9.#include <opencv2/opencv.hpp>  10.  11.#ifdef __APPLE__  12.#include <OpenCL/cl.h>  13.#else  14.#include <CL/cl.h>  15.#endif  16.  17.using namespace cv;  18.  19.void DisplayPlatformInfo(  20.    cl_platform_id id,  21.    cl_platform_info name,  22.    std::string str)  23.{  24.    cl_int errNum;  25.    std::size_t paramValueSize;  26.  27.    errNum = clGetPlatformInfo(id, name, 0, NULL, ¶mValueSize);  28.    if (errNum != CL_SUCCESS) {  29.        std::cerr << "Failed to find OpenCL platform"  30.            << str << "." << std::endl;  31.        return;  32.    }  33.  34.    char *info = (char *)alloca(sizeof(char) * paramValueSize);  35.    errNum = clGetPlatformInfo(id, name, paramValueSize, info, NULL);  36.    if (errNum != CL_SUCCESS) {  37.        std::cerr << "Failed to find OpenCL platform "  38.            << str << "." << std::endl;  39.        return;  40.    }  41.  42.    std::cout << "\t" << str << ":\t" << info << std::endl;  43.  44.    return;  45.}  46.  47.int _tmain(int argc, _TCHAR* argv[])  48.{  49.    cl_int ciErrNum;  50.    const char *fileName = "ImageConvolution.cl";  51.    int width = 0, height = 0;  52.    const char* imageName = "F:\\code\\pic\\test01.jpg";  53.    char *bufInput = NULL, *bufOutput = NULL;  54.    const float filter[] = {-1,0,-1,0,4,0,-1,0,-1};   55.      56.    //read one jpeg pic and store it in a Mat variable.  57.    Mat img = imread(imageName);  58.    if (!img.data) {  59.        std::cout << "fail to open the file:" << imageName << std::endl;  60.        return -1;  61.    }  62.  63.    //the type of img is RGB, convert to gray image.  64.    Mat imgGray;  65.    cvtColor(img, imgGray, CV_BGR2GRAY);  66.    width = imgGray.cols;  67.    height = imgGray.rows;  68.    std::cout << "picture width: " << width << ", height: " << height << std::endl;  69.      70.    //save the source data of original gray image.  71.    FILE *yuvFileOrg = NULL;  72.    fopen_s(&yuvFileOrg, "gray_org.yuv", "wb");  73.    fwrite(imgGray.data, width * height * sizeof(unsigned char), 1, yuvFileOrg);  74.    fclose(yuvFileOrg);  75.    yuvFileOrg = NULL;    76.  77.    //display the original gray image in a window.  78.    namedWindow( imageName, CV_WINDOW_AUTOSIZE );  79.    imshow(imageName, imgGray);  80.    //waitKey(0);  81.  82.    //allocate the input buffer to store the original gray image  83.    if (NULL == (bufInput = (char *)malloc(width * height * sizeof(char)))) {  84.        std::cerr << "Failed to malloc buffer for input image. " << std::endl;  85.        return -1;  86.    }  87.  88.    //allocate the output buffer to store the image rotated.  89.    if (NULL == (bufOutput = (char *)malloc(width * height * sizeof(char)))) {  90.        std::cerr << "Failed to malloc buffer for output image. " << std::endl;  91.        return -1;  92.    }  93.  94.    //copy the data of gray image to the input buffer. initialize the output buffer by zero.   95.    memcpy(bufInput, imgGray.data, width * height * sizeof(unsigned char));  96.    memset(bufOutput, 0x0, width * height * sizeof(unsigned char));  97.      98.    //use the first platform  99.    cl_platform_id platform;  100.    cl_platform_id *platforms;  101.    cl_uint numPlatform;  102.    ciErrNum = clGetPlatformIDs(0, NULL, &numPlatform); //get the number of platform  103.    if (ciErrNum != CL_SUCCESS) {  104.        std::cerr << "Failed to get the number of platform." << std::endl;  105.        return -1;  106.    }  107.    std::cout << "The number of the platform is: " << numPlatform << std::endl;  108.  109.    platforms = (cl_platform_id *)malloc(numPlatform * sizeof(cl_platform_id));  110.    ciErrNum = clGetPlatformIDs(numPlatform, platforms, NULL);  111.    if (ciErrNum != CL_SUCCESS) {  112.        std::cerr << "Failed to get the ID of platform." << std::endl;  113.        return -1;  114.    }  115.    for(cl_uint i = 0; i < numPlatform; i++) {  116.        std::cout << "The platform " << i << ":" << std::endl;  117.        DisplayPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, "CL_PLATFORM_PROFILE");  118.        DisplayPlatformInfo(platforms[i], CL_PLATFORM_VERSION, "CL_PLATFORM_VERSION");  119.        DisplayPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, "CL_PLATFORM_VENDOR");  120.        DisplayPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, "CL_PLATFORM_EXTENSIONS");  121.    }  122.  123.    platform = platforms[1]; //choose the platform of NVIDIA, support the OpenCL 1.2  124.  125.    //use the first device  126.    cl_device_id device;  127.    ciErrNum = clGetDeviceIDs(  128.        platform,  129.        CL_DEVICE_TYPE_ALL,  130.        1,  131.        &device,  132.        NULL);  133.    if (ciErrNum != CL_SUCCESS) {  134.        std::cerr << "Failed to get the device." << std::endl;  135.        return -1;  136.    }  137.  138.    cl_context_properties cps[3] = {  139.        CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0  140.    };  141.    //create the context  142.    cl_context ctx = clCreateContext(  143.        cps,  144.        1,  145.        &device,  146.        NULL,  147.        NULL,  148.        &ciErrNum);  149.    if (ciErrNum != CL_SUCCESS) {  150.        std::cerr << "Failed to create the context." << std::endl;  151.        return -1;  152.    }  153.  154.    //create the command queue  155.    cl_command_queue myqueue = clCreateCommandQueue(  156.        ctx,  157.        device,  158.        0,  159.        &ciErrNum);  160.    if (ciErrNum != CL_SUCCESS) {  161.        std::cerr << "Failed to create the queue." << std::endl;  162.        return -1;  163.    }  164.  165.    //the convolution filter is 3x3  166.    int filterWidth = 3;  167.    int filterSize = filterWidth * filterWidth; //assume a square kernel  168.  169.    //the image format describes how the data will be stored in memory  170.    cl_image_format format;  171.    format.image_channel_order = CL_R; //single channel  172.    format.image_channel_data_type = CL_UNSIGNED_INT8; //float  data type  173.    cl_int status;  174.    cl_image_desc desc;  175.    desc.image_type = CL_MEM_OBJECT_IMAGE2D;  176.    desc.image_width = width;  177.    desc.image_height = height;  178.    desc.image_depth = 0;  179.    desc.image_array_size = 0;  180.    desc.image_row_pitch = 0;  181.    desc.image_slice_pitch = 0;  182.    desc.num_mip_levels = 0;  183.    desc.num_samples = 0;  184.    desc.buffer = NULL;  185.  186.    //create the image buffers for input and output data.  187.    cl_mem bufferSourceImage = clCreateImage(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &format, &desc, bufInput, &status);  188.    cl_mem bufferOutputImage = clCreateImage(ctx, CL_MEM_WRITE_ONLY, &format, &desc, bufOutput, &status);  189.     190.    //create space for the 3x3 filter on the device  191.    cl_mem bufferFilter = clCreateBuffer(  192.        ctx,  193.        0,  194.        filterSize * sizeof(float),  195.        NULL,  196.        NULL);  197.  198.    //copy the source image to the device  199.    size_t origin[3] = {0, 0, 0}; //offset within the image to copy from  200.    size_t region[3] = {width, height, 1}; //elements to per dimension  201.  202.    ciErrNum = clEnqueueWriteImage(  203.        myqueue,  204.        bufferSourceImage,  205.        CL_FALSE,  206.        origin,  207.        region,  208.        0,  209.        0,  210.        bufInput,  211.        0,  212.        NULL,  213.        NULL);  214.    if (ciErrNum != CL_SUCCESS) {  215.        std::cerr << "Failed to write the image to the device." << std::endl;  216.        return -1;  217.    }  218.      219.    //copy the 3x3 filter to the device  220.    ciErrNum = clEnqueueWriteBuffer(  221.        myqueue,  222.        bufferFilter,  223.        CL_FALSE,  224.        0,  225.        filterSize * sizeof(float),  226.        filter,  227.        0,  228.        NULL,  229.        NULL);  230.    if (ciErrNum != CL_SUCCESS) {  231.        std::cerr << "Failed to write the filter to the device." << std::endl;  232.        return -1;  233.    }  234.  235.    //open kernel file and read the content to a string variable.  236.    std::ifstream kernelFile("ImageConvolution.cl", std::ios::in);  237.    //std::ifstream kernelFile("ImageRotate.cl", std::ios::in);  238.    if (!kernelFile.is_open()) {  239.        std::cerr << "Failed to open file for reading: " << fileName << std::endl;  240.        return NULL;  241.    }  242.    std::ostringstream oss;  243.    oss << kernelFile.rdbuf();  244.    std::string srcStdStr = oss.str();  245.    const char *srcStr = srcStdStr.c_str();  246.    kernelFile.close();  247.  248.    //create the program with source code of kernel.  249.    cl_program myprog = clCreateProgramWithSource(  250.        ctx,  251.        1,  252.        (const char**)&srcStr,  253.        NULL,  254.        &ciErrNum);  255.    if (ciErrNum != CL_SUCCESS) {  256.        std::cerr << "Failed to create the program." << std::endl;  257.        return -1;  258.    }  259.  260.    //compile the program. passing NULL for the 'device_list' argument targets all devices in the context  261.    ciErrNum = clBuildProgram(myprog, 0, NULL, NULL, NULL, NULL);  262.    if (ciErrNum != CL_SUCCESS) {  263.        std::cerr << "Failed to build the program." << std::endl;  264.        return -1;  265.    }  266.  267.    //create the kernel  268.    cl_kernel mykernel = clCreateKernel(  269.        myprog,  270.        "convolution",  271.        &ciErrNum);  272.    if (ciErrNum != CL_SUCCESS) {  273.        std::cerr << "Failed to create the kernel." << std::endl;  274.        return -1;  275.    }  276.  277.    //set the kernel arguments  278.    clSetKernelArg(mykernel, 0, sizeof(cl_mem), (void *)&bufferSourceImage);  279.    clSetKernelArg(mykernel, 1, sizeof(cl_mem), (void *)&bufferOutputImage);  280.    clSetKernelArg(mykernel, 2, sizeof(cl_int), (void *)&width);  281.    clSetKernelArg(mykernel, 3, sizeof(cl_int), (void *)&height);   282.    clSetKernelArg(mykernel, 4, sizeof(cl_mem), (void *)&bufferFilter);   283.    clSetKernelArg(mykernel, 5, sizeof(cl_int), (void *)&filterWidth);  284.  285.    //set local and global workgroup sizes  286.    size_t localws[2] = {1, 1};  287.    size_t globalws[2] = {width, height};  288.  289.    //execute the kernel  290.    ciErrNum = clEnqueueNDRangeKernel(  291.        myqueue,  292.        mykernel,  293.        2,  294.        NULL,  295.        globalws,  296.        localws,  297.        0,  298.        NULL,  299.        NULL);  300.    if (ciErrNum != CL_SUCCESS) {  301.        std::cerr << "Failed to execute the kernel." << std::endl;  302.        return -1;  303.    }  304.  305.    //read the output data back to the host  306.    ciErrNum = clEnqueueReadImage(  307.        myqueue,  308.        bufferOutputImage,  309.        CL_TRUE,  310.        origin,  311.        region,  312.        0,  313.        0,  314.        bufOutput,  315.        0,  316.        NULL,  317.        NULL);  318.    if (ciErrNum != CL_SUCCESS) {  319.        std::cerr << "Failed to read the image from the device." << std::endl;  320.        return -1;  321.    }  322.      323.    //copy the output data from output buffer to Mat variable.   324.    memcpy(imgGray.data, bufOutput, width * height * sizeof(unsigned char));  325.  326.    //save the source data for gray image rotated  327.    FILE *yuvFile = NULL;  328.    fopen_s(&yuvFile, "gray.yuv", "wb");  329.    fwrite(imgGray.data, width * height * sizeof(unsigned char), 1, yuvFile);  330.    fclose(yuvFile);  331.    yuvFile = NULL;  332.  333.    //save the gray image rotated.  334.    imwrite("test_gray.jpg", imgGray);  335.  336.    //show the gray image rotated.  337.    const char *winName = "gray_image_convolution";  338.    namedWindow(winName, CV_WINDOW_AUTOSIZE );  339.    imshow(winName, imgGray);     340.    waitKey(0);  341.    destroyAllWindows();  342.  343.    //release all resource  344.    if (bufInput != NULL)  345.        free(bufInput);  346.  347.    if (bufOutput != NULL)  348.        free(bufOutput);  349.  350.    if (bufferSourceImage != 0)  351.        clReleaseMemObject(bufferSourceImage);  352.  353.    if (bufferOutputImage != 0)  354.        clReleaseMemObject(bufferOutputImage);  355.  356.    if (bufferFilter != 0)  357.        clReleaseMemObject(bufferFilter);  358.  359.    if (myqueue != 0)  360.        clReleaseCommandQueue(myqueue);  361.  362.    if (mykernel != 0)  363.        clReleaseKernel(mykernel);  364.  365.    if (myprog != 0)  366.        clReleaseProgram(myprog);  367.  368.    if (ctx != 0)  369.        clReleaseContext(ctx);  370.  371.    return 0;  372.}

（未完待续）

阅读全文

0 0