Cuda里面的Texture Memory

来源：互联网发布：财务战略矩阵案例编辑：程序博客网时间：2024/05/21 09:06

1-dimension linear memory

How TO USE？

texture<float,1,cudaReadModeElementType> texreference;建立一个全局的texture，<type，dimension , readtype>
type 是数据类型，int ，uchar，float等等
dimension是1，2 或 3
readtype有cudaReadModeNormalizedFloat或者cudaReadModeElementType，默认是cudaReadModeElementType。如果是选择cudaReadModeNormalizedFloat，则会对输出数据进行转换，归一化为[0.0，1.0]（对无符号整型），或者[-1.0，1.0]（对有符号整型）
在device端开辟一段空间，并在这段空间传入需要的数据
如：cudaMemcpy(diarray, harray, sizeof(float)*size, cudaMemcpyHostToDevice);
绑定texture

cudaBindTexture(size_t *offset, const struct textureReference *texref, const void *devPtr, size_t size )

offset是偏移空间，一般写NULL，即不偏移
texref是第一步的texture
devPtr是第二步开辟空间的指针
size是大小

最后记得解绑

cudaUnbindTexture(&text);

代码示例

#include <iostream>#include <cuda.h>#include <cuda_runtime.h>#include <device_functions.h>#include <device_launch_parameters.h>using namespace std;texture<float, 1, cudaReadModeElementType> texreference;__global__ void kernel(float* doarray, int size){    int index;    //calculate each thread global index    index = blockIdx.x*blockDim.x + threadIdx.x;    //fetch global memory through texture reference    doarray[index] = tex1Dfetch(texreference, index);    printf("%f", doarray[index]);    return;}int main(int argc, char** argv){    int size = 3200;    float* harray;    float* diarray;    float* doarray;    //allocate host and device memory    harray = (float*)malloc(sizeof(float)*size);    cudaMalloc((void**)&diarray, sizeof(float)*size);    cudaMalloc((void**)&doarray, sizeof(float)*size);    //initialize host array before usage    for (int loop = 0; loop<size; loop++)        harray[loop] = (float)rand() / (float)(RAND_MAX - 1);    //copy array from host to device memory    cudaMemcpy(diarray, harray, sizeof(float)*size, cudaMemcpyHostToDevice);    //bind texture reference with linear memory    cudaBindTexture(0, texreference, diarray, sizeof(float)*size);    //execute device kernel    kernel << <(int)ceil((float)size / 64), 64 >> >(doarray, size);    //unbind texture reference to free resource    cudaUnbindTexture(texreference);    //copy result array from device to host memory    cudaMemcpy(harray, doarray, sizeof(float)*size, cudaMemcpyDeviceToHost);    //free host and device memory    free(harray);    cudaUnbindTexture(&texreference);    cudaFree(diarray);    cudaFree(doarray);    return 0;}

2-dimension cuda array
1.声明CUDA数组之前，必须先以结构体channelDesc描述CUDA数组中的数据类型。

struct cudaChannelFormatDesc {     int x, y, z, w;     enum cudaChannelFormatKind f;};

cudaChannelFormatKind有三种类型
cudaChannelFormatKindSigned，如果这些成员是有符号整型；
cudaChannelFormatKindUnsigned，如果这些成员是无符号整型；
cudaChannelFormatKindFloat，如果这些成员是浮点型；
示例

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 0, 0,cudaChannelFormatKindunsigned);//一个char2类型

若觉得这麻烦，还有简单的使用方法，使用模板

cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar>();

2.为array开辟一段空间，把数据放进array中
示例

cudaArray* arr;cudaMallocArray(&arr, &desc, data.cols, data.rows);cudaMemcpyToArray(arr, 0, 0, a.data, sizeof(uchar)* data.cols*data.rows, cudaMemcpyHostToDevice);

3.绑定

cudaBindTextureToArray(&texture, arr, &desc);

4.最后别忘记解绑和释放空间

cudaUnbindTexture(&texture);cudaFreeArray(arr);

代码示例

#include <iostream>#include <cuda.h>#include <cuda_runtime.h>#include <device_functions.h>#include <device_launch_parameters.h>#include <opencv.hpp>using namespace std;using namespace cv;texture<uchar,2,cudaReadModeElementType> text;__global__ void test(){    //printf("df");    int x = threadIdx.x + blockDim.x*blockIdx.x;    int y = threadIdx.y + blockDim.y*blockIdx.y;    uchar a = tex2D(text, x, y);    printf("%d %d %d\n", a,x,y);}int main(){    Mat a = imread("t.jpg",0);    cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar>();    cudaArray* arr;    cudaMallocArray(&arr, &desc, a.cols, a.rows);    cudaMemcpyToArray(arr, 0, 0, a.data, sizeof(uchar)* a.cols*a.rows, cudaMemcpyHostToDevice);    text.filterMode = cudaFilterModePoint;    text.addressMode[0] = cudaAddressModeWrap;    text.addressMode[1] = cudaAddressModeWrap;    cudaBindTextureToArray(&text, arr, &desc);    dim3 th(16,16);    dim3 bl(ceil((float)a.cols / 16), ceil((float)a.rows / 16));    test << <bl, th >> >();    cudaUnbindTexture(&text);    cudaFreeArray(arr);    return 0;}

补充说明
texture的属性
1.normalized是设置纹理坐标是否进行归一化，如果是非0，则归一化到[0，1)的坐标进行寻址，否则对尺寸为width, height, depth的纹理使用坐标[0,width-1], [0,height-1], [0,depth-1]寻址。

2.filterMode用于设置纹理的滤波模式，即如何根据坐标计算返回的纹理值。滤波模式可以是cudaFilterModePoint或者cudaFilterModeLinear。滤波模式为CudaFilterModePoint时，返回值是与坐标最接近的像元的值。CudaFilterModeLinear模式只能对返回值为浮点型的纹理使用，启用这一种模式时将拾取纹理坐标周围的像元，然后根据坐标与这些像元之间的距离进行插值计算。对一维纹理可以使用线性滤波，对二维纹理可以使用双线性滤波。返回值会是对最接近纹理坐标的两个像元（对一维纹理），四个像元（对二维纹理）或者八个像元（对三维纹理）进行插值后得到的值。**说人话就是**cudaFilterModePoint返回的就是该点的值，否则就是附近的点计算出来的值。

3 addressmode说明了寻址模式，即如何处理超出寻址范围的纹理坐标；addressmode是一个大小为3的数组，三个元素分别说明对第一、二、三个纹理坐标的取址模式；取址模式可以是cudaAddressModeClamp或cudaAddressModeWrap中的一种，前者将超出寻址范围的纹理坐标”钳位”到寻址范围内的最大或最小值，后者将超出寻址范围的纹理坐标“折叠”进合理范围。cudaAddressModeWrap只支持归一化的纹理坐标。

参考 http://blog.csdn.net/darkstorm2111203/article/details/4294012 向这位作者致敬

0 0