mexcuda中矩阵数据的传输

来源:互联网 发布:淘宝马桶盖阻尼器 编辑:程序博客网 时间:2024/06/09 18:44

数据的传输以0开始,然后以列序传输

/* * Example of how to use the mxGPUArray API in a MEX file.  This example shows * how to write a MEX function that takes a gpuArray input and returns a * gpuArray output, e.g. B=mexFunction(A). * * Copyright 2012 The MathWorks, Inc. */#include "mex.h"#include "gpu/mxGPUArray.h"/* * Device code */void __global__ TimesTwo(double const * const A,                         double * const B,                         int const N){    /* Calculate the global linear index, assuming a 1-d grid. */    int const i = blockDim.x * blockIdx.x + threadIdx.x;    if (i < N ) {        if( i == 4 )             B[i] = A[i];        else            B[i] = 2 * A[i];    }}/* * Host code */void mexFunction(int nlhs, mxArray *plhs[],                 int nrhs, mxArray const *prhs[]){    /* Declare all variables.*/    mxGPUArray const *A;    mxGPUArray *B;    double const *d_A;    double *d_B;    int N;    char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";    char const * const errMsg = "Invalid input to MEX file.";    /* Choose a reasonably sized number of threads for the block. */    int const threadsPerBlock = 256;    int blocksPerGrid;    /* Initialize the MathWorks GPU API. */    mxInitGPU();    /* Throw an error if the input is not a GPU array. */    if ((nrhs!=1) || !(mxIsGPUArray(prhs[0]))) {        mexErrMsgIdAndTxt(errId, errMsg);    }    A = mxGPUCreateFromMxArray(prhs[0]);    /*     * Verify that A really is a double array before extracting the pointer.     */    if (mxGPUGetClassID(A) != mxDOUBLE_CLASS) {        mexErrMsgIdAndTxt(errId, errMsg);    }    /*     * Now that we have verified the data type, extract a pointer to the input     * data on the device.     */    d_A = (double const *)(mxGPUGetDataReadOnly(A));    /* Create a GPUArray to hold the result and get its underlying pointer. */    B = mxGPUCreateGPUArray(mxGPUGetNumberOfDimensions(A),                            mxGPUGetDimensions(A),                            mxGPUGetClassID(A),                            mxGPUGetComplexity(A),                            MX_GPU_DO_NOT_INITIALIZE);    d_B = (double *)(mxGPUGetData(B));    /*     * Call the kernel using the CUDA runtime API. We are using a 1-d grid here,     * and it would be possible for the number of elements to be too large for     * the grid. For this example we are not guarding against this possibility.     */    N = (int)(mxGPUGetNumberOfElements(A));    blocksPerGrid = (N + threadsPerBlock - 1) / threadsPerBlock;    TimesTwo<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, N);    /* Wrap the result up as a MATLAB gpuArray for return. */    plhs[0] = mxGPUCreateMxArrayOnGPU(B);    /*     * The mxGPUArray pointers are host-side structures that refer to device     * data. These must be destroyed before leaving the MEX function.     */    mxGPUDestroyGPUArray(A);    mxGPUDestroyGPUArray(B);}



>> y = mexGPUExample(x)


y =


     2     1     2     2
     2     2     2     2
     2     2     2     2
     2     2     2     2



0 0