cuda编程---cuda硬件信息与错误处置
来源:互联网 发布:java外卖cms 编辑:程序博客网 时间:2024/06/06 10:06
一、硬件信息查询:
#include <stdio.h>int main() { int nDevices; cudaGetDeviceCount(&nDevices); for (int i=0; i < nDevices; i++) { cudaDeviceProp prop; cudaGetDeviceProperties(&prop, i); printf("Device Number: %d\n", i); printf(" Device name: %s\n", prop.name); printf(" Memory Clock Rate (KHz): %d\n", prop.memoryClockRate); printf(" Memory Bus Width (bits): %d\n",prop.memoryBusWidth); printf(" Peak Memory Bandwidth (GB/s): %f\n\n", 2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6); } return 0;}
二、错误处置:
1、代码段一:
#include <stdio.h>int main() { int nDevices; cudaError_t err = cudaGetDeviceCount(&nDevices); if (err != cudaSuccess) printf("%s\n", cudaGetErrorString(err)); for (int i=0; i < nDevices; i++) { cudaDeviceProp prop; cudaGetDeviceProperties(&prop, i); printf("Device Number: %d\n", i); printf(" Device name: %s\n", prop.name); printf(" Memory Clock Rate (KHz): %d\n", prop.memoryClockRate); printf(" Memory Bus Width (bits): %d\n",prop.memoryBusWidth); printf(" Peak Memory Bandwidth (GB/s): %f\n\n", 2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6); } return 0;}
这段代码在下面处有改变: cudaError_t err = cudaGetDeviceCount(&nDevices);
if (err != cudaSuccess)
printf("%s\n", cudaGetErrorString(err));
2、代码段二:
#include <iostream>#include <math.h>#include <stdio.h>__global__void saxpy(int n,float a,float *x,float *y){ int i = blockIdx.x*blockDim.x +threadIdx.x; if (i < n) y[i] = a*x[i] + y[i];}int main(void){ int N = 1 << 20; //1M element. //float *x=new float[N]; //float *y=new float[N]; //Allocate Unified Memory -- accessible from CPU or GPU float *x, *y, *d_x, *d_y; x = (float*)malloc(N*sizeof(float)); y = (float*)malloc(N*sizeof(float)); cudaMalloc(&d_x, N*sizeof(float)); cudaMalloc(&d_y, N*sizeof(float)); //initialize x and y arrays on the host. for (int i=0;i<N;i++){ x[i]=1.0f; y[i]=2.0f; } cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice); cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice); cudaEventRecord(start); saxpy<<< (N+255)/256, 256>>>(N, 2.0, d_x, d_y); cudaError_t errSync = cudaGetLastError(); cudaError_t errAsync = cudaDeviceSynchronize(); if (errSync != cudaSuccess) printf("Sync kernel error: %s\n", cudaGetErrorString(errSync)); if (errAsync != cudaSuccess) printf("Async kernel error: %s\n",cudaGetErrorString(errAsync)); cudaEventRecord(stop); cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost); cudaEventSynchronize(stop); float milliseconds = 0; cudaEventElapsedTime(&milliseconds, start, stop); // Check for errors (all values should be 3.0f) float maxError=0.0f; for (int i=0;i<N;i++) maxError=max(maxError,fabs(y[i]-4.0f)); printf("Max error: %f . \n", maxError); printf("Effective Bandwidth (GB/s): %f .\n", N*4*3/milliseconds/1e6); cudaFree(x); cudaFree(y); cudaFree(d_x); cudaFree(d_y); cudaEventDestroy(start); cudaEventDestroy(stop); return 0;}
阅读全文
0 0
- cuda编程---cuda硬件信息与错误处置
- CUDA编程—硬件基础
- (CUDA 编程3).CUDA硬件实现分析(一)------安营扎寨
- (CUDA 编程4).CUDA硬件实现分析(二)------规行矩步
- CUDA编程的错误处理
- CUDA编程的错误处理
- CUDA错误
- C++与CUDA混合编程
- Gtk与Cuda混合编程
- Java与Cuda混合编程
- CUDA编程
- cuda编程
- CUDA编程
- CUDA编程
- CUDA编程
- cuda 编程
- CUDA编程
- cuda编程
- 线性表 C
- 网络是怎样连接的学习笔记(一)
- Andrew NG机器学习线性回归编程作业
- Python笔记——python简介、特点、安装及helloworld
- Java内存模型和JVM内存管理
- cuda编程---cuda硬件信息与错误处置
- 01
- POJ-2774-后缀数组
- 112. Path Sum
- arcgis for js 船舶 拉旗
- 我们是如何做数据库运维和优化
- 想要购买二手域名先进行的四项检查
- Python笔记——类定义
- 设计模式之——观察者设计模式