CUDA范例精解第6章

来源:互联网 发布:美工师 编辑:程序博客网 时间:2024/04/28 04:52

知识点:constant memory ,cuda event的使用

 

#include "C://Users//XX//Desktop//CUDA//common//cpu_bitmap.h"#include<stdio.h>#define DIM 1024#define INF 2e10f#define rnd(x) (x*rand()/RAND_MAX)//定义球体个数为20#define SPHERES 20struct Sphere{float r,g,b;//r,g,b为颜色值float radius;float x,y,z;//sphere's center coordinate__device__ float hit(float ox,float oy,float *n){float dx=ox-x;float dy=oy-y;if(dx*dx+dy*dy<radius*radius){float dz=sqrtf(radius*radius-dx*dx-dy*dy);*n=dz/sqrtf(radius*radius);return dz+z;}return -INF;}};__constant__  Sphere s[SPHERES];__global__ void kernel( unsigned char *ptr ) {// map from threadIdx/BlockIdx to pixel positionint x = threadIdx.x + blockIdx.x * blockDim.x;int y = threadIdx.y + blockIdx.y * blockDim.y;int offset = x + y * blockDim.x * gridDim.x;float   ox = (x - DIM/2);float   oy = (y - DIM/2);float   r=0, g=0, b=0;float   maxz = -INF;for(int i=0; i<SPHERES; i++) {float   n;float   t = s[i].hit( ox, oy, &n );if (t > maxz) {float fscale = n;r = s[i].r * fscale;g = s[i].g * fscale;b = s[i].b * fscale;maxz = t;}} ptr[offset*4 + 0] = (int)(r * 255);ptr[offset*4 + 1] = (int)(g * 255);ptr[offset*4 + 2] = (int)(b * 255);ptr[offset*4 + 3] = 255;}int main(void){cudaEvent_t start,stop;cudaEventCreate(&start);cudaEventCreate(&stop);cudaEventRecord(start,0);CPUBitmap bitmap(DIM,DIM);unsigned char *dev_bitmap;cudaMalloc((void**)&dev_bitmap,bitmap.image_size());Sphere *temp_s=(Sphere*)malloc(sizeof(Sphere)*SPHERES);for(int i=0;i<SPHERES;i++){temp_s[i].r=rnd(1.0f);temp_s[i].g=rnd(1.0f);temp_s[i].b=rnd(1.0f);temp_s[i].x=rnd(1000.0f)-500;temp_s[i].y=rnd(1000.0f)-500;temp_s[i].z=rnd(1000.0f)-500;temp_s[i].radius=rnd(100.0f)+20;}//拷贝到constant memory//默认最后一个参数为cudaMemcpyHostToDevicecudaMemcpyToSymbol(s,temp_s,sizeof(Sphere)*SPHERES);free(temp_s);dim3 grids(DIM./16,DIM/16);dim3 threads(16,16);kernel<<<grids,threads>>>(dev_bitmap);cudaMemcpy(bitmap.get_ptr(),dev_bitmap,bitmap.image_size(),cudaMemcpyDeviceToHost);cudaEventRecord(stop,0);cudaEventSynchronize(stop);float elapsedTime;cudaEventElapsedTime(&elapsedTime,start,stop);printf("Time to generate:  %3.1f ms\n",elapsedTime);cudaEventDestroy(start);cudaEventDestroy(stop);bitmap.display_and_exit();cudaFree(dev_bitmap);}


 

原创粉丝点击