cuda编程---第一个cuda程序

来源：互联网发布：河南大学软件协会编辑：程序博客网时间：2024/05/22 04:26

前言：: 1、参考： nvidia offical tutorial; 2、使用 nvcc 编译程序，并且加-g -G 参数进行编译，以保存调试信息，之后可以用cuda-gdb 进行单步调试。

程序:

#include <iostream>#include <math.h>//CUDA Kernel function to add the elements of two arrays on the GPU.__global__void add(int n,float *x, float *y){   int index= blockIdx.x*blockDim.x+threadIdx.x;    int stride=blockDim.x*gridDim.x;    for(int i=index;i<n;i+=stride)    y[i]=x[i]+y[i];}int main(void){    int N = 1 << 20; //1M element.    //float *x=new float[N];    //float *y=new float[N];    //Allocate Unified Memory -- accessible from CPU or GPU    float *x,*y;    cudaMallocManaged(&x,N*sizeof(float));    cudaMallocManaged(&y,N*sizeof(float));    //initialize x and y arrays on the host.    for (int i=0;i<N;i++){        x[i]=1.0f;        y[i]=2.0f;    }    int blockSize = 256;    int numBlocks = (N + blockSize -1) /blockSize;    //run kernel on 1M elements on the CPU.    add<<<numBlocks,blockSize>>>(N,x,y);    //Wait for GPUto finish before accessing on host.    cudaDeviceSynchronize();    // Check for errors (all values should be 3.0f)    float maxError=0.0f;    for (int i=0;i<N;i++)        maxError=fmax(maxError,fabs(y[i]-3.0f));    std::cout << "Max error: " <<maxError<<std::endl;    //Free memory    cudaFree(x);    cudaFree(y);    return 0;}

阅读全文

0 0