CUDA二维矩阵加法

来源:互联网 发布:手机强制删除文件软件 编辑:程序博客网 时间:2024/06/05 09:33
#include<cuda_runtime_api.h>#include<device_launch_parameters.h>#include<stdio.h>__global__ void add(int a[][2], int b[][2], int c[][2]){int i = threadIdx.x + blockIdx.x * blockDim.x;int j = threadIdx.y + blockIdx.y * blockDim.y;if (i < 2 && j < 2){c[i][j] = a[i][j] + b[i][j];}}int main(){int (*a)[2] = new int[2][2];int (*b)[2] = new int[2][2];int (*c)[2] = new int[2][2];a[0][0] = 1; a[0][1] = 2; a[1][0] = 3; a[1][1] = 4;b[0][0] = 1; b[0][1] = 2; b[1][0] = 3; b[1][1] =4;cudaError_t error = cudaSuccess;int (*device_a)[2],(*device_b)[2],(*device_c)[2];error = cudaMalloc((void **)&device_a, sizeof(int)* 4);error = cudaMalloc((void **)&device_b, sizeof(int)* 4);error = cudaMalloc((void **)&device_c, sizeof(int)* 4);cudaMemcpy(device_a, a, sizeof(int)* 4, cudaMemcpyHostToDevice);cudaMemcpy(device_b, b, sizeof(int)* 4, cudaMemcpyHostToDevice);//    dim3 threadsPerBlock(1, 1);//    dim3 numBlocks(2 / threadsPerBlock.x, 2 / threadsPerBlock.y);dim3 blocks(1, 1);dim3 threads(2, 2);add << <blocks, threads>> >(device_a, device_b, device_c);cudaMemcpy(c, device_c, sizeof(int)* 4, cudaMemcpyDeviceToHost);for (int i = 0; i < 2; i++){for (int j = 0; j < 2; j++)printf("%10d", c[i][j]);printf("\n");}return 0;}<img src="http://img.blog.csdn.net/20150914122105544?watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQv/font/5a6L5L2T/fontsize/400/fill/I0JBQkFCMA==/dissolve/70/gravity/Center" height="355" width="555" alt="" />

0 0
原创粉丝点击