CUDA 做排列组合
来源:互联网 发布:淘宝无法拍照怎么设置 编辑:程序博客网 时间:2024/06/05 03:18
#include <fstream>#include <stdio.h>#include <stdlib.h>#include <cuda_runtime.h>#include <iostream>using namespace std;#define ROWS 10#define COLS 6#define CHECK(res) if(res!=cudaSuccess){exit(-1);}__global__ void Kerneltest(double **dp_out_params, double * d_out_Iqsd1, double *d_MTFac, double *d_errIqsd_MTFac, unsigned int loop){ unsigned int row = blockDim.x*blockIdx.x + threadIdx.x ; unsigned int col = threadIdx.y;double min_q = 0;double max_q = 0.027;int stepNums_q = 10;double stepLen_q = (max_q-min_q)/stepNums_q;double min_mua = 0;double max_mua = 0.007;int stepNums_mua = 100;double stepLen_mua = (max_mua-min_mua)/stepNums_mua;double min_musp = 0;double max_musp = 1;int stepNums_musp = 100;double stepLen_musp = (max_musp-min_musp)/stepNums_musp;double min_alpha = 0;double max_alpha = 1;int stepNums_alpha = 100;double stepLen_alpha = (max_alpha-min_alpha)/stepNums_alpha;double min_n = 0;double max_n = 1;int stepNums_n = 100;double stepLen_n = (max_n-min_n)/stepNums_n;double min_rough = 0;double max_rough = 1;int stepNums_rough = 100;double stepLen_rough = (max_rough-min_rough)/stepNums_rough;if (row < ROWS && col < COLS){dp_out_params[row][col] = 0; __syncthreads();if( col== 5 ){int weight_q = (row+ROWS*loop) % stepNums_q;dp_out_params[row][col] = min_q + weight_q * stepLen_q;} __syncthreads();if( col== 4 ){int weight_mua =( (row+ROWS*loop) / stepNums_q ) % stepNums_mua;dp_out_params[row][col] = min_mua + weight_mua * stepLen_mua;} __syncthreads();if( col== 3 ){int weight_musp =( (row+ROWS*loop) / stepNums_q / stepNums_mua ) % stepNums_musp;dp_out_params[row][col] = min_musp + weight_musp * stepLen_musp; } __syncthreads();if( col== 2 ){int weight_alpha =( (row+ROWS*loop) / stepNums_q / stepNums_mua /stepNums_musp ) % stepNums_alpha;dp_out_params[row][col] = min_alpha + weight_alpha * stepLen_alpha; } __syncthreads();if( col== 1 ){int weight_n =( (row+ROWS*loop) / stepNums_q / stepNums_mua /stepNums_musp / stepNums_alpha ) % stepNums_n;dp_out_params[row][col] = min_n + weight_n * stepLen_n; } __syncthreads();if( col== 0 ){int weight_rough =( (row+ROWS*loop) / stepNums_q / stepNums_mua /stepNums_musp / stepNums_alpha / stepNums_n ) % stepNums_rough;dp_out_params[row][col] = min_rough + weight_rough * stepLen_rough; } __syncthreads();}///////////////////////////////////////////////////////////////////////////////////d_out_Iqsd1[row] = threadIdx.x;__syncthreads();/////////////////////////////////////////////////////////////////////////////////// unsigned int row1 = blockDim.x*blockIdx.x + threadIdx.x ;d_errIqsd_MTFac[row1] = d_out_Iqsd1[row] - d_MTFac[row];__syncthreads();}//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////int main(int argc, char **argv){cudaError_t res;////////////////////////////////////////int MTFlen = 4000;double *h_MTFac = NULL;h_MTFac = (double*)malloc(MTFlen*sizeof(double));for(int i=0; i<100; i++){h_MTFac[i] = 3.2;}double *d_MTFac = NULL; res = cudaMalloc((void**)(&d_MTFac), MTFlen*sizeof(double));CHECK(res) res = cudaMemcpy((void*)(d_MTFac), (void*)(h_MTFac), MTFlen*sizeof(double*), cudaMemcpyHostToDevice);CHECK(res)double *h_errIqsd_MTFac = NULL; h_errIqsd_MTFac = (double*)malloc(MTFlen*sizeof(double));double *d_errIqsd_MTFac = NULL; res = cudaMalloc((void**)(&d_errIqsd_MTFac), MTFlen*sizeof(double));CHECK(res) res = cudaMemcpy((void*)(d_errIqsd_MTFac), (void*)(h_errIqsd_MTFac), MTFlen*sizeof(double*), cudaMemcpyHostToDevice);CHECK(res)////////////////////////////////////////double *d_out_params = NULL; res = cudaMalloc((void**)(&d_out_params), ROWS*COLS*sizeof(double));CHECK(res)double **dp_out_params = NULL;res = cudaMalloc((void**)(&dp_out_params), ROWS*sizeof(double*));CHECK(res)double **hp_out_params = NULL;hp_out_params = (double**)malloc(ROWS*sizeof(double*));double *h_out_params = NULL;h_out_params = (double*)malloc(ROWS*COLS*sizeof(double)); for (int r = 0; r < ROWS; r++) { hp_out_params[r] = d_out_params + r*COLS; }//////////////////////////////////////// double *h_out_Iqsd1 = NULL; h_out_Iqsd1 = (double*)malloc(ROWS*sizeof(double));double *d_out_Iqsd1 = NULL;res = cudaMalloc((void **) &d_out_Iqsd1, ROWS*sizeof(double));CHECK(res)//////////////////////////////////////// res = cudaMemcpy((void*)(dp_out_params), (void*)(hp_out_params), ROWS*sizeof(double*), cudaMemcpyHostToDevice);CHECK(res) dim3 dimBlock( 15, 6, 1); dim3 dimGrid( 1, 1, 1); for(unsigned int loop=0; loop<1; loop++) { Kerneltest<<<dimGrid, dimBlock>>>(dp_out_params, d_out_Iqsd1, d_MTFac, d_errIqsd_MTFac, loop); cout<<"loop: "<<loop<<endl; } res = cudaMemcpy((void*)(h_out_params), (void*)(d_out_params), ROWS*COLS*sizeof(double*), cudaMemcpyDeviceToHost);CHECK(res) res = cudaMemcpy((void*)(h_out_Iqsd1), (void*)(d_out_Iqsd1), ROWS*sizeof(double*), cudaMemcpyDeviceToHost);CHECK(res) res = cudaMemcpy((void*)(h_MTFac), (void*)(d_MTFac), MTFlen*sizeof(double*), cudaMemcpyDeviceToHost);CHECK(res) res = cudaMemcpy((void*)(h_errIqsd_MTFac), (void*)(d_errIqsd_MTFac), MTFlen*sizeof(double*), cudaMemcpyDeviceToHost);CHECK(res)//////////////////////////////////////// ofstream f1("/home/zlf/Documents/cuda.txt"); int zz = 0;cout<<endl<<"h_out_params: "<<endl; for (int r = 0; r < ROWS; r++) { for (int c = 0; c < COLS; c++) { printf("%f ", h_out_params[r*COLS+c]); f1 << h_out_params[r*COLS+c]<<" "; } zz = zz + 1; cout<<" 行数: "<<zz; printf("\n"); f1<< " 行数: "<<zz<< "\n"; }f1.close();cout<<endl<<"h_out_Iqsd1: "<<endl; for (int r = 0; r < ROWS; r++) { cout<<h_out_Iqsd1[r]<<" "; if ((r%10)==9) { cout<<endl; } } ///////////////////////// cout<<"h_errIqsd_MTFac[i]: "<<endl;for(int i=0; i<MTFlen; i++){cout<<h_errIqsd_MTFac[i]<<" ";} /////////////////////////cout<<zz<<endl; cudaFree((void*)d_out_params); cudaFree((void*)dp_out_params); cudaFree((void*)d_out_Iqsd1); cudaFree((void*)d_MTFac); cudaFree((void*)d_errIqsd_MTFac); free(h_out_params); free(hp_out_params); free(h_out_Iqsd1); free(h_MTFac); free(h_errIqsd_MTFac); getchar(); return 0;}
阅读全文
0 0
- CUDA 做排列组合
- CUDA排列组合整数
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- 排列组合
- RROR com.alibaba.dubbo.rpc.filter.ExceptionFilter.invoke(ExceptionFilter.java:87): [DUBBO] Got unch
- 干货|angular ng-content在paletx2.0模式开发中的应用
- 程序员进阶 成为更好程序员的8种途径
- 【膜摸摸】网红步进电机驱动A4988和DRV8825的对比和区别及选用实践 for 四轴CNC雕刻/3D打印
- JavaScript代码风格要素
- CUDA 做排列组合
- JavaScript语法结构,关键字,保留字,变量
- mysql的sql语句优化
- android写入文件读取文件
- BC95 NB-IOT模块通过云平台实现到任意设备的透传
- 音视频解码基础的知识积累
- change、keypress、input和propertychange
- 扩展Spring的几种方式
- Angularjs的入门