多人多车求距离——cpu&gpu
来源:互联网 发布:步步高9688软件下载 编辑:程序博客网 时间:2024/04/27 22:48
#include <stdio.h>
#include <stdlib.h>
#include <ctime>
#include <iostream>
#include <cmath>
using namespace std;
#define M 3200 //num of person
#define N 3200 //num of car
#define B_S 32
//#define SHOW
//P[M]*C[N]=D[M][N]
__global__ void distance_gpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
{
int index_x = blockIdx.x * blockDim.x + threadIdx.x;
int index_y = blockIdx.y * blockDim.y + threadIdx.y;
if (index_x >= N || index_y >=M) return;
distance[N*index_y + index_x] = sqrt((px[index_y] - x[index_x])*(px[index_y] - x[index_x]) + (py[index_y] - y[index_x])*(py[index_y] - y[index_x]));
}
void distance_cpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
{
for (int i = 0; i<m; i++)
{
for (int j = 0; j<n; j++)
{
int xx = px[i] - x[j];
int yy = py[i] - y[j];
distance[i*n+j] = sqrt(xx*xx + yy*yy);
}
}
}
void compute_gpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
{
float *dx, *dy, *dpx, *dpy, *dd;
cudaMalloc((void **)&dpx, sizeof(float)*M);
cudaMalloc((void **)&dpy, sizeof(float)*M);
cudaMalloc((void **)&dx, sizeof(float)*N);
cudaMalloc((void **)&dy, sizeof(float)*N);
cudaMalloc((void **)&dd, sizeof(float)*N*M);
///测试时间
float elapsedTime = 0.0f;
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
cudaMemcpy(dx, x, sizeof(float)*N, cudaMemcpyHostToDevice);
cudaMemcpy(dy, y, sizeof(float)*N, cudaMemcpyHostToDevice);
cudaMemcpy(dpx, px, sizeof(float)*M, cudaMemcpyHostToDevice);
cudaMemcpy(dpy, py, sizeof(float)*M, cudaMemcpyHostToDevice);
dim3 dimGrid((N + B_S - 1) / B_S, (M + B_S - 1) / B_S);
dim3 dimBlock(B_S, B_S);
distance_gpu << <dimGrid, dimBlock >> >(dx, dy, dpx, dpy, dd,M,N);
cudaMemcpy(distance, dd, sizeof(float)*N*M, cudaMemcpyDeviceToHost);
///时间结束
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsedTime, start, stop);
printf("the time on gpu is %f ms\n", elapsedTime);
cudaFree(dx);
cudaFree(dy);
cudaFree(dpx);
cudaFree(dpy);
cudaFree(dd);
cudaEventDestroy(start);
cudaEventDestroy(stop);
}
void compute_cpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
{
clock_t start, finish;
start = clock();
distance_cpu(x, y, px, py, distance, m, n);
finish = clock();
printf("the time on cpu is %f ms\n", (double)(finish - start));
}
void verify(float *C1, float *C2, int m, int n)
{
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
{
if ((C2[i*n + j] - C1[i*m + j])>1e-5)
{
printf("error! results are not equel!");
break;
}
}
}
int main()
{
float* px = (float*)malloc(M*sizeof(float));
float* py = (float*)malloc(M*sizeof(float));
float* x = (float*)malloc(N*sizeof(float));
float* y = (float*)malloc(N*sizeof(float));
float* distance1 = (float*)malloc(N*M*sizeof(float));
float* distance2 = (float*)malloc(N*M*sizeof(float));
#ifdef SHOW
for (int i = 0; i<N; i++)
{
x[i] = rand() % 10;
y[i] = rand() % 10;
cout << " (" << x[i] << "," << y[i] << ")" ;
}
for (int i = 0; i<M; i++)
{
px[i] = rand() % 10;
py[i] = rand() % 10;
cout << endl <<"("<<px[i] << "," << py[i] << ")" << endl;
}
#endif // SHOW
compute_cpu(x, y, px, py, distance1, M, N);
#ifdef SHOW
for (int i = 0; i< M; i++)
{
for (int j = 0; j< N; j++)
cout << distance1[i*N + j] << " ";
cout << endl;
}
#endif // SHOW
compute_gpu(x, y, px, py, distance2, M, N);
#ifdef SHOW
for (int i = 0; i< M; i++)
{
for (int j = 0; j< N; j++)
cout << distance2[i*N + j] << " ";
cout << endl;
}
#endif // SHOW
verify(distance1, distance2, M, N);
free(x);
free(y);
free(px);
free(py);
free(distance1);
free(distance2);
return 0;
}
来自为知笔记(Wiz)
0 0
- 多人多车求距离——cpu&gpu
- 多人多车求距离_cpu&gpu
- 多人多车求距离_cpu&&gpu_寄存器优化_sharememory优化
- 二维矩阵实现的多人多车求距离
- 二维矩阵相乘——cpu&&gpu
- C求多人多门课平均分
- Shader学习——CPU与GPU之间的通信
- cpu gpu
- 动态规划求编辑距离——算法解题报告
- 求两点间的距离——撞错
- 第七周——友元函数求坐标距离
- OC——NSPoint 求两学生座位距离
- 动态规划求编辑距离——算法解题报告
- Julia曲线绘制-——CPU版本与GPU版本对比
- HP中国新能源—热能(CPU和GPU热能)产品说明及宣传册!
- GPU 显卡的CPU
- 移动GPU 移动CPU
- 搭建CPU+GPU 集群
- MATLAB MCR安装
- 多人多车求距离_cpu&&gpu_寄存器优化_sharememory优化
- 多人多车求距离_cpu&gpu
- Intel Parameter-Framework简介(不定期更新)
- 二维矩阵相乘——cpu&&gpu
- 多人多车求距离——cpu&gpu
- 二维矩阵实现矩阵相乘
- 二维矩阵实现的多人多车求距离
- CUDA编程快速入门
- 欢迎使用CSDN-markdown编辑器
- 无人驾驶(一)——探测设备比较
- linux中使用pthread和opencv多线程编程
- 【转载】景深 (Depth of field)
- Windows利用Github创建共享库