cuda向设备端传递多个一维数组，并在kernel中引用方法

来源：互联网发布：网络骂人的歌曲最火的编辑：程序博客网时间：2024/06/05 01:52

向device端传递N个一维数组：
1，应该在主机端开辟N个一维数组，然后在device端开辟N个一维数组，并将数据从主机端复制设备端
2.分别在主机端和设备端开辟N个指针数组，主机端指针数组各个指针分别保存设备端各个数组的头指针；
3将主机端指针数组拷贝到设备端指针数组；
这样设备端指针数组各个指针就指向设备端各个数组的头指针了

代码示例
//功能：创建多个一维数组，在device端修改每个元素的值，然后在拷贝到Host端
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "helper_cuda.h"
#include <stdio.h>
#include<iostream>
using namespace std;
#define N 10
__global__ void gpuKernel(int **ptr,int rows)
{
int tid=threadIdx.x;
int bid=blockIdx.x;
if (bid<rows &&tid<bid+1)
{
ptr[bid][tid]=2;
}

}
int main()
{
int **hptr,**hostArray;
int **dptr,**devArray;
hptr=new int *[N];
cudaMalloc((void**)&dptr,N*sizeof(int*));
hostArray=new int *[N];
devArray=new int *[N];
for (int i=0;i<N;i++)
{
hostArray[i]=new int[N];
for (int j=0;j<i+1;j++)
{
hostArray[i][j]=1;
}
}
for (int i=0;i<N;i++)
{
cudaMalloc(&devArray[i],N*sizeof(int));
cudaMemcpy(devArray[i],hostArray[i],N*sizeof(int),cudaMemcpyHostToDevice);
hptr[i]=devArray[i];//保存每个数组的头指针
}

cudaMemcpy((void*)dptr,(void*)hptr,N*sizeof(int*),cudaMemcpyHostToDevice);

dim3 dimblock=N;
dim3 dimgrid =N;
gpuKernel<<<dimgrid,dimblock>>>(dptr,N);
for (int i=0;i<N;i++)
{
cudaMemcpy(hostArray[i],devArray[i],(i+1)*sizeof(int),cudaMemcpyDeviceToHost);
}
for (int i=0;i<N;i++)
{
for (int j=0;j<i+1;j++)
{
cout<<hostArray[i][j]<<",";
}
cout<<endl;

}
//free memory
for (int i=0;i<N;i++)
{
delete []hostArray[i];
cudaFree(devArray[i]);
}
cudaFree(dptr);
/*cudaFree(devArray);*/
delete[]hptr;
delete []hostArray;
delete[]devArray;

return 0;
}

0 0