我的CUDA学习之旅2——图像形态学腐蚀、膨胀CUDA实现

来源：互联网发布：进销存哪个好知乎编辑：程序博客网时间：2024/06/04 23:35

引言

由于上两周杂七杂八的事情比较多加上自己写的算法有些问题，一直改bug。。。。没时间继续写博客，今天开始补上博客。从这篇文章起，开始将一些较为典型的OpenCV算法通过CUDA进行实现，本文实现的为图像处理中最为常见的形态学腐蚀以及膨胀，由于本文目的在于算法移植后的验证，故在图片的选择上用小图像作为输入的示例图像，有不当之处欢迎评论或私信~

任务要求

输入一张图片，将其转为灰度图后，通过CUDA在GPU中对图片实现形态学腐蚀、膨胀操作，最后将结果输出至CPU并进行显示，要求输出图与用OpenCV实现后的结果一致。

实现思路

关于腐蚀与膨胀的算法原理网上已有完备的的资料，在这里不再复述，具体原理可见图像的腐蚀原理
由于是对经典算法的移植，故在thread以及block的设计上不能单单针对某一张图片，而是要通用，同时为了尽可能提高运算速度，将其设计为32*32的1024个thread大小的block（本人显卡Nvidia GeForce 755 M），block数量则是根据传入图片的大小动态变化。

实现环境

VS2013 + CUDA7.5 + Opencv2.4.13

实现代码

#include "cuda_runtime.h"#include "device_launch_parameters.h"#include <cuda.h>#include <device_functions.h>#include <opencv2\opencv.hpp>#include <iostream>using namespace std;using namespace cv;//腐蚀__global__ void erodeInCuda(unsigned char *dataIn, unsigned char *dataOut, Size erodeElement, int imgWidth, int imgHeight){    //Grid中x方向上的索引    int xIndex = threadIdx.x + blockIdx.x * blockDim.x;    //Grid中y方向上的索引    int yIndex = threadIdx.y + blockIdx.y * blockDim.y;    int elementWidth = erodeElement.width;    int elementHeight = erodeElement.height;    int halfEW = elementWidth / 2;    int halfEH = elementHeight / 2;    //初始化输出图    dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;    //防止越界    if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH)    {        for (int i = -halfEH; i < halfEH + 1; i++)        {            for (int j = -halfEW; j < halfEW + 1; j++)            {                if (dataIn[(i + yIndex) * imgWidth + xIndex + j] < dataOut[yIndex * imgWidth + xIndex])                {                    dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];                }            }        }    }}//膨胀__global__ void dilateInCuda(unsigned char *dataIn, unsigned char *dataOut, Size dilateElement, int imgWidth, int imgHeight){    //Grid中x方向上的索引    int xIndex = threadIdx.x + blockIdx.x * blockDim.x;    //Grid中y方向上的索引    int yIndex = threadIdx.y + blockIdx.y * blockDim.y;    int elementWidth = dilateElement.width;    int elementHeight = dilateElement.height;    int halfEW = elementWidth / 2;    int halfEH = elementHeight / 2;    //初始化输出图    dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;    //防止越界    if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH)    {        for (int i = -halfEH; i < halfEH + 1; i++)        {            for (int j = -halfEW; j < halfEW + 1; j++)            {                if (dataIn[(i + yIndex) * imgWidth + xIndex + j] > dataOut[yIndex * imgWidth + xIndex])                {                    dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];                }            }        }    }}int main(){    Mat srcImg = imread("1.jpg");//输入图片    Mat grayImg = imread("1.jpg", 0);//输入的灰度图    unsigned char *d_in;//输入图片在GPU内的内存    unsigned char *d_out1;//腐蚀后输出图片在GPU内的内存    unsigned char *d_out2;//膨胀后输出图片在GPU内的内存    int imgWidth = grayImg.cols;    int imgHeight = grayImg.rows;    Mat dstImg1(imgHeight, imgWidth, CV_8UC1, Scalar(0));//腐蚀后输出图片在CPU内的内存    Mat dstImg2(imgHeight, imgWidth, CV_8UC1, Scalar(0));//膨胀后输出图片在CPU内的内存    //在GPU中开辟内存    cudaMalloc((void**)&d_in, imgWidth * imgHeight * sizeof(unsigned char));    cudaMalloc((void**)&d_out1, imgWidth * imgHeight * sizeof(unsigned char));    cudaMalloc((void**)&d_out2, imgWidth * imgHeight * sizeof(unsigned char));    //将输入图片传入GPU    cudaMemcpy(d_in, grayImg.data, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyHostToDevice);    //定义block中thread的分布    dim3 threadsPerBlock(32, 32);    //根据输入图片的宽高定义block的大小    dim3 blocksPerGrid((imgWidth + threadsPerBlock.x - 1) / threadsPerBlock.x, (imgHeight + threadsPerBlock.y - 1) / threadsPerBlock.y);    //算子大小    Size Element(3, 5);    //CUDA腐蚀    erodeInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out1, Element, imgWidth, imgHeight);    //将结果传回CPU    cudaMemcpy(dstImg1.data, d_out1, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);    //CPU内腐蚀（OpenCV实现）    Mat erodeImg;    Mat element = getStructuringElement(MORPH_RECT, Size(3, 5));    erode(grayImg, erodeImg, element);    //CUDA膨胀    dilateInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out2, Element, imgWidth, imgHeight);    //将结果传回CPU    cudaMemcpy(dstImg2.data, d_out2, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);    //CPU内膨胀（OpenCV实现）    Mat dilateImg;    dilate(grayImg, dilateImg, element);    return 0;}

实现结果

原灰度图

腐蚀后图片
这里写图片描述

膨胀后图片
这里写图片描述

通过比对发现CUDA输出结果与OpenCV输出结果一致~

阅读全文

0 0