CUDA卷积操作—使用constant memory实现高斯滤波

来源:互联网 发布:2017淘宝如何投诉盗图 编辑:程序博客网 时间:2024/05/16 19:21

高斯滤波就是使用高斯模板和图片进行卷积运算,高斯函数及模板如下图所示:


卷积前后的效果图如下:


constant memory的使用及CUDA编程的相关内容,在代码注释中有详细介绍。
GPU代码如下所示:
#include "cuda_runtime.h"#include "device_launch_parameters.h"#include <stdio.h>#include "Windows.h"#include <math.h>#include<iostream>using namespace std;#define BLOCKDIM_X16#define BLOCKDIM_Y16#define GRIDDIM_X256#define GRIDDIM_Y256#define MASK_WIDTH5__constant__ int d_const_Gaussian[MASK_WIDTH*MASK_WIDTH]; //分配常数存储器unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount);bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount);static __global__ void kernel_GaussianFilt(int width,int height,int byteCount,unsigned char *d_src_imgbuf,unsigned char *d_guassian_imgbuf);void main(){//查看显卡配置struct cudaDeviceProp pror;cudaGetDeviceProperties(&pror,0);cout<<"maxThreadsPerBlock="<<pror.maxThreadsPerBlock<<endl;long start, end;long time = 0; //CUDA计时函数start = GetTickCount();cudaEvent_t startt,stop; //CUDA计时机制cudaEventCreate(&startt);cudaEventCreate(&stop);cudaEventRecord(startt,0);unsigned char *h_src_imgbuf;  //图像指针int width, height, byteCount;char rootPath1[]="C:\\Users\\a404\\Desktop\\测试图片\\";char readPath[1024];int frame=1;for (int k=1;k<=frame;k++){sprintf(readPath, "%s%d.bmp", rootPath1, k);h_src_imgbuf=readBmp(readPath, &width, &height, &byteCount);int size1=width*height *byteCount*sizeof(unsigned char);int size2=width*height *sizeof(unsigned char);//输出图像内存-host端unsigned char *h_guassian_imgbuf=new unsigned char[width*height*byteCount];//分配显存空间unsigned char *d_src_imgbuf;unsigned char *d_guassian_imgbuf;cudaMalloc((void**)&d_src_imgbuf, size1);cudaMalloc((void**)&d_guassian_imgbuf, size1);//把数据从Host传到DevicecudaMemcpy(d_src_imgbuf, h_src_imgbuf, size1, cudaMemcpyHostToDevice);//将高斯模板传入constant memoryint Gaussian[25] = {1,4,7,4,1,4,16,26,16,4,7,26,41,26,7,4,16,26,16,4,1,4,7,4,1};//总和为273cudaMemcpyToSymbol(d_const_Gaussian, Gaussian, 25 * sizeof(int));int bx = ceil((double)width/BLOCKDIM_X); //网格和块的分配int by = ceil((double)height/BLOCKDIM_Y);if(bx > GRIDDIM_X) bx = GRIDDIM_X;if(by > GRIDDIM_Y) by = GRIDDIM_Y;dim3 grid(bx, by);//网格的结构dim3 block(BLOCKDIM_X, BLOCKDIM_Y);//块的结构//kernel--高斯滤波kernel_GaussianFilt<<<grid, block>>>(width,height,byteCount,d_src_imgbuf,d_guassian_imgbuf);cudaMemcpy(h_guassian_imgbuf, d_guassian_imgbuf,size1, cudaMemcpyDeviceToHost);//数据传回主机端char rootPath2[]="C:\\Users\\a404\\Desktop\\测试结果\\";char writePath[1024];sprintf(writePath, "%s%d.bmp", rootPath2, k);saveBmp(writePath, h_guassian_imgbuf, width, height, byteCount);//输出进度展示cout<<k<<"  "<<((float)k/frame)*100<<"%"<<endl;//释放内存cudaFree(d_src_imgbuf); cudaFree(d_guassian_imgbuf);delete []h_src_imgbuf;delete []h_guassian_imgbuf;}end = GetTickCount();InterlockedExchangeAdd(&time, end - start);cout << "Total time GPU:";cout << time << endl;int x;cin>>x;}static __global__ void kernel_GaussianFilt(int width,int height,int byteCount,unsigned char *d_src_imgbuf,unsigned char *d_dst_imgbuf){const int tix = blockDim.x * blockIdx.x + threadIdx.x;const int tiy = blockDim.y * blockIdx.y + threadIdx.y;const int threadTotalX = blockDim.x * gridDim.x;const int threadTotalY = blockDim.y * gridDim.y;for(int ix = tix; ix < height; ix += threadTotalX)for(int iy = tiy; iy < width; iy += threadTotalY){ for(int k=0;k<byteCount;k++){int sum=0;//临时值int tempPixelValue=0;for (int m=-2;m<=2;m++){for (int n=-2;n<=2;n++){//边界处理,幽灵元素赋值为零if (ix+m<0||iy+n<0||ix+m>=height||iy+n>=width)tempPixelValue=0; elsetempPixelValue=*(d_src_imgbuf+(ix+m)*width*byteCount+(iy+n)*byteCount+k);sum+=tempPixelValue*d_const_Gaussian[(m+2)*5+n+2];}}if (sum/273<0) *(d_dst_imgbuf+(ix)*width*byteCount+(iy)*byteCount+k)=0;else if(sum/273>255)  *(d_dst_imgbuf+(ix)*width*byteCount+(iy)*byteCount+k)=255;else  *(d_dst_imgbuf+(ix)*width*byteCount+(iy)*byteCount+k)=sum/273;}    }}unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount){//打开文件FILE *fp=fopen(bmpName,"rb");if(fp==0) return 0;//跳过文件头fseek(fp, sizeof(BITMAPFILEHEADER),0);//读入信息头int w, h, b;BITMAPINFOHEADER head;fread(&head, sizeof(BITMAPINFOHEADER), 1,fp); w = head.biWidth;h = head.biHeight;b = head.biBitCount/8;int lineByte=(w * b+3)/4*4; //每行的字节数为4的倍数//跳过颜色表 (颜色表的大小为1024)(彩色图像并没有颜色表,不需要这一步)if(b==1)fseek(fp, 1024,1);//图像数据unsigned char *imgBuf=new unsigned char[w * h * b];for(int i=0;i<h;i++){fread(imgBuf+i*w*b,w*b, 1,fp);fseek(fp, lineByte-w*b, 1);}fclose(fp);*width=w,  *height=h, *byteCount=b;return imgBuf;}bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount){if(!imgBuf)return 0;//灰度图像颜色表空间1024,彩色图像没有颜色表int palettesize=0;if(byteCount==1) palettesize=1024;//一行象素字节数为4的倍数int lineByte=(width * byteCount+3)/4*4;FILE *fp=fopen(bmpName,"wb");if(fp==0) return 0;//填写文件头BITMAPFILEHEADER fileHead;fileHead.bfType = 0x4D42;fileHead.bfSize= sizeof(BITMAPFILEHEADER)+sizeof(BITMAPINFOHEADER)+ palettesize + lineByte*height;fileHead.bfReserved1 = 0;fileHead.bfReserved2 = 0;fileHead.bfOffBits=54+palettesize;fwrite(&fileHead, sizeof(BITMAPFILEHEADER),1, fp);// 填写信息头BITMAPINFOHEADER head; head.biBitCount=byteCount*8;head.biClrImportant=0;head.biClrUsed=0;head.biCompression=0;head.biHeight=height;head.biPlanes=1;head.biSize=40;head.biSizeImage=lineByte*height;head.biWidth=width;head.biXPelsPerMeter=0;head.biYPelsPerMeter=0;fwrite(&head, sizeof(BITMAPINFOHEADER),1, fp);//颜色表拷贝  if(palettesize==1024){unsigned char palette[1024];for(int i=0;i<256;i++){*(palette+i*4+0)=i;*(palette+i*4+1)=i;*(palette+i*4+2)=i;*(palette+i*4+3)=0;     }fwrite(palette, 1024,1, fp);}//准备数据并写文件unsigned char *buf=new unsigned char[height*lineByte];for(int i=0;i<height;i++){for(int j=0;j<width*byteCount; j++)*(buf+i*lineByte+j)=*(imgBuf+i*width*byteCount+j);}fwrite(buf, height*lineByte, 1, fp);delete []buf;fclose(fp);return 1;}
附录:高斯滤波CPU代码
#include "stdio.h"#include "Windows.h"#include <iostream>using namespace std;unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount); //读入图像bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount); //保存图像void GaussianFilt(int width,int height,int byteCount,int Gaussian[][5],unsigned char *gray_imgbuf,unsigned char *guassian_imgbuf); //高斯滤波void main(){//计时函数long start, end;long time = 0; start = GetTickCount();unsigned char *src_imgbuf; //图像指针int width, height, byteCount;char rootPath1[]="C:\\Users\\a404\\Desktop\\测试图片\\";char readPath[1024];int frame=300;  //读入图像副数for (int i=1;i<=frame;i++){sprintf(readPath, "%s%d.bmp", rootPath1, i);src_imgbuf=readBmp(readPath, &width, &height, &byteCount);//printf("宽=%d,高=%d,字节=%d\n",width, height, byteCount);//读入高斯模糊模板int Gaussian_mask[5][5]={{1,4,7,4,1},{4,16,26,16,4},{7,26,41,26,7},{4,16,26,16,4},{1,4,7,4,1}};//总和为273//输出图像内存分配unsigned char *guassian_imgbuf=new unsigned char[width*height*byteCount];//对原图高斯模糊GaussianFilt(width,height,byteCount,Gaussian_mask,src_imgbuf,guassian_imgbuf);char rootPath2[]="C:\\Users\\a404\\Desktop\\";char writePath[1024];sprintf(writePath, "%s%d.bmp", rootPath2, i);saveBmp(writePath, guassian_imgbuf, width, height, byteCount);cout<<i<<"  "<<((float)i/frame)*100<<"%"<<endl;delete []src_imgbuf;delete []guassian_imgbuf;}end = GetTickCount();InterlockedExchangeAdd(&time, end - start);cout << "Total time CPU:";cout << time << endl;int x;cin>>x;}void GaussianFilt(int width,int height,int byteCount,int Gaussian[][5],unsigned char *src_imgbuf,unsigned char *guassian_imgbuf){//高斯模糊处理 5层循环处理for(int i=0;i<height;i++){for(int j=0;j<width;j++){for(int k=0;k<byteCount;k++){int sum=0;//临时值int tempPixelValue=0;for (int m=-2;m<=2;m++){for (int n=-2;n<=2;n++){//边界处理,幽灵元素赋值为零if (i+m<0||j+n<0||i+m>=height||j+n>=width)tempPixelValue=0; elsetempPixelValue=*(src_imgbuf+(i+m)*width*byteCount+(j+n)*byteCount+k);//tempPixelValue=*(gray_imgbuf+(i+m)*width+(j+n)+k);sum+=tempPixelValue*Gaussian[m+2][n+2];}}//tempPixelValue=*(src_imgbuf+(i)*width*byteCount+(j)*byteCount+k);if (sum/273<0) *(guassian_imgbuf+i*width*byteCount+j*byteCount+k)=0;else if(sum/273>255)  *(guassian_imgbuf+i*width*byteCount+j*byteCount+k)=255;else  *(guassian_imgbuf+i*width*byteCount+j*byteCount+k)=sum/273;}}}}//给定一个图像文件及其路径,读入图像数据。 unsigned char *readBmp(char *bmpName, int *width, int *height, int *byteCount){//打开文件,FILE *fp=fopen(bmpName,"rb");if(fp==0) return 0;//跳过文件头fseek(fp, sizeof(BITMAPFILEHEADER),0);//读入信息头int w, h, b;BITMAPINFOHEADER head;fread(&head, sizeof(BITMAPINFOHEADER), 1,fp); w = head.biWidth;h = head.biHeight;b = head.biBitCount/8;int lineByte=(w * b+3)/4*4; //每行的字节数为4的倍数//跳过颜色表 (颜色表的大小为1024)(彩色图像并没有颜色表,不需要这一步)if(b==1)fseek(fp, 1024,1);//图像数据unsigned char *imgBuf=new unsigned char[w * h * b];for(int i=0;i<h;i++){fread(imgBuf+i*w*b,w*b, 1,fp);fseek(fp, lineByte-w*b, 1);}fclose(fp);*width=w,  *height=h, *byteCount=b;return imgBuf;}bool saveBmp(char *bmpName, unsigned char *imgBuf, int width, int height, int byteCount){if(!imgBuf)return 0;//灰度图像颜色表空间1024,彩色图像没有颜色表int palettesize=0;if(byteCount==1) palettesize=1024;//一行象素字节数为4的倍数int lineByte=(width * byteCount+3)/4*4;FILE *fp=fopen(bmpName,"wb");if(fp==0) return 0;//填写文件头BITMAPFILEHEADER fileHead;fileHead.bfType = 0x4D42;fileHead.bfSize= sizeof(BITMAPFILEHEADER)+sizeof(BITMAPINFOHEADER)+ palettesize + lineByte*height;fileHead.bfReserved1 = 0;fileHead.bfReserved2 = 0;fileHead.bfOffBits=54+palettesize;fwrite(&fileHead, sizeof(BITMAPFILEHEADER),1, fp);// 填写信息头BITMAPINFOHEADER head; head.biBitCount=byteCount*8;head.biClrImportant=0;head.biClrUsed=0;head.biCompression=0;head.biHeight=height;head.biPlanes=1;head.biSize=40;head.biSizeImage=lineByte*height;head.biWidth=width;head.biXPelsPerMeter=0;head.biYPelsPerMeter=0;fwrite(&head, sizeof(BITMAPINFOHEADER),1, fp);//颜色表拷贝  if(palettesize==1024){unsigned char palette[1024];for(int i=0;i<256;i++){*(palette+i*4+0)=i;*(palette+i*4+1)=i;*(palette+i*4+2)=i;*(palette+i*4+3)=0;     }fwrite(palette, 1024,1, fp);}//准备数据并写文件unsigned char *buf=new unsigned char[height*lineByte];for(int i=0;i<height;i++){for(int j=0;j<width*byteCount; j++)*(buf+i*lineByte+j)=*(imgBuf+i*width*byteCount+j);}fwrite(buf, height*lineByte, 1, fp);delete []buf;fclose(fp);return 1;}



0 0