cudaMallocPitch

来源:互联网 发布:complete java 编辑:程序博客网 时间:2024/06/05 02:26



#include "cuda_runtime.h"#include "device_launch_parameters.h"#include "device_functions.h"#include <stdio.h>#include <stdlib.h>#define W 8#define H 8// 核函数__global__ void kernel(float* devPtr, size_t pitch){    int idx = threadIdx.x;    int idy = threadIdx.y;float* row=(float*)((char*)devPtr+idx*pitch);row[idy]=10;    int X=W/2;    int Y=H/2;float* rowX=(float*)((char*)devPtr+X*pitch);rowX[Y]=666;}int main(){float *dev_a;//cudaMalloc((void**)&dev_a, sizeof(float)*W*H);    size_t pitch;cudaMallocPitch( (void**) &dev_a, &pitch, sizeof(float)*W, H);    // 为a动态分配内存    float myArray2[W][H];    printf("修改前的数组内容:\n");    for (int i = 0; i < H; i++)    {        for (int j = 0; j < W; j++)        {        myArray2[i][j]=i+j*6.1;            printf("%f ", myArray2[i][j] );        }        printf("\n");    }////////////////////    cudaMemcpy2D(dev_a, pitch, myArray2, W * sizeof(float), W * sizeof(float), H, cudaMemcpyHostToDevice);    dim3 threads(W, H);    kernel<<<1, threads>>>(dev_a, pitch);    cudaMemcpy2D(myArray2, W * sizeof(int), dev_a, pitch, W * sizeof(float), H, cudaMemcpyDeviceToHost);////////////////////    printf("\n 修改后的数组内容:\n");    for (int i = 0; i < H; i++)    {        for (int j = 0; j < W; j++)        {            printf("%f ", myArray2[i][j]);        }        printf("\n");    }    return 0;}



修改前的数组内容:0.000000 6.100000 12.200000 18.299999 24.400000 30.500000 36.599998 42.700001 1.000000 7.100000 13.200000 19.299999 25.400000 31.500000 37.599998 43.700001 2.000000 8.100000 14.200000 20.299999 26.400000 32.500000 38.599998 44.700001 3.000000 9.100000 15.200000 21.299999 27.400000 33.500000 39.599998 45.700001 4.000000 10.100000 16.200001 22.299999 28.400000 34.500000 40.599998 46.700001 5.000000 11.100000 17.200001 23.299999 29.400000 35.500000 41.599998 47.700001 6.000000 12.100000 18.200001 24.299999 30.400000 36.500000 42.599998 48.700001 7.000000 13.100000 19.200001 25.299999 31.400000 37.500000 43.599998 49.700001  修改后的数组内容:10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 666.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000