CUDA by examples 第四章纠错

来源:互联网 发布:虚拟机网络没有连接 编辑:程序博客网 时间:2024/05/21 01:52

本书给出的第四章代码如下:

#include "../common/book.h"#include "../common/cpu_bitmap.h"#define DIM 1000struct cuComplex {    float   r;    float   i;    cuComplex( float a, float b ) : r(a), i(b)  {}    __device__ float magnitude2( void ) {        return r * r + i * i;    }    __device__ cuComplex operator*(const cuComplex& a) {        return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i);    }    __device__ cuComplex operator+(const cuComplex& a) {        return cuComplex(r+a.r, i+a.i);    }};__device__ int julia( int x, int y ) {    const float scale = 1.5;    float jx = scale * (float)(DIM/2 - x)/(DIM/2);    float jy = scale * (float)(DIM/2 - y)/(DIM/2);    cuComplex c(-0.8, 0.156);    cuComplex a(jx, jy);    int i = 0;    for (i=0; i<200; i++) {        a = a * a + c;        if (a.magnitude2() > 1000)            return 0;    }    return 1;}__global__ void kernel( unsigned char *ptr ) {    // map from blockIdx to pixel position    int x = blockIdx.x;    int y = blockIdx.y;    int offset = x + y * gridDim.x;    // now calculate the value at that position    int juliaValue = julia( x, y );    ptr[offset*4 + 0] = 255 * juliaValue;    ptr[offset*4 + 1] = 0;    ptr[offset*4 + 2] = 0;    ptr[offset*4 + 3] = 255;}// globals needed by the update routinestruct DataBlock {    unsigned char   *dev_bitmap;};int main( void ) {    DataBlock   data;    CPUBitmap bitmap( DIM, DIM, &data );    unsigned char    *dev_bitmap;    HANDLE_ERROR( cudaMalloc( (void**)&dev_bitmap, bitmap.image_size() ) );    data.dev_bitmap = dev_bitmap;    dim3    grid(DIM,DIM);    kernel<<<grid,1>>>( dev_bitmap );    HANDLE_ERROR( cudaMemcpy( bitmap.get_ptr(), dev_bitmap,                              bitmap.image_size(),                              cudaMemcpyDeviceToHost ) );    HANDLE_ERROR( cudaFree( dev_bitmap ) );    bitmap.display_and_exit();}

但是在实际运行中,会爆出这么一个错误:

calling a __host__ function("cuComplex::cuComplex") from a __device__ function("julia") is not allowed

错误已经告诉我们:不能够在设备上调用主机上的函数,解决方法很简单,只需要在构造函数 cuComplex( float a, float b ) : r(a), i(b) {} 前面添加 __device__ 申明其是设备函数就OK。

但是,接下来运行代码,我们仍然无法得到书上的效果图。这是因为代码中 DIM 的值设置过大造成的,不妨将其改为 500 试试。

参考

  • CUDA BY EXAMPLES第4章代码错误解决方法

  • CUDA by example 第四章的例子运行不通过

原创粉丝点击