cuda编程---第一个cuda程序

来源:互联网 发布:河南大学软件协会 编辑:程序博客网 时间:2024/05/22 04:26
前言:

1、参考: nvidia offical tutorial

2、使用 nvcc 编译程序,并且加-g -G 参数进行编译,以保存调试信息,之后可以用cuda-gdb 进行单步调试。

程序:

#include <iostream>#include <math.h>//CUDA Kernel function to add the elements of two arrays on the GPU.__global__void add(int n,float *x, float *y){   int index= blockIdx.x*blockDim.x+threadIdx.x;    int stride=blockDim.x*gridDim.x;    for(int i=index;i<n;i+=stride)    y[i]=x[i]+y[i];}int main(void){    int N = 1 << 20; //1M element.    //float *x=new float[N];    //float *y=new float[N];    //Allocate Unified Memory -- accessible from CPU or GPU    float *x,*y;    cudaMallocManaged(&x,N*sizeof(float));    cudaMallocManaged(&y,N*sizeof(float));    //initialize x and y arrays on the host.    for (int i=0;i<N;i++){        x[i]=1.0f;        y[i]=2.0f;    }    int blockSize = 256;    int numBlocks = (N + blockSize -1) /blockSize;    //run kernel on 1M elements on the CPU.    add<<<numBlocks,blockSize>>>(N,x,y);    //Wait for GPUto finish before accessing on host.    cudaDeviceSynchronize();    // Check for errors (all values should be 3.0f)    float maxError=0.0f;    for (int i=0;i<N;i++)        maxError=fmax(maxError,fabs(y[i]-3.0f));    std::cout << "Max error: " <<maxError<<std::endl;    //Free memory    cudaFree(x);    cudaFree(y);    return 0;}
原创粉丝点击