多人多车求距离——cpu&gpu

来源:互联网 发布:步步高9688软件下载 编辑:程序博客网 时间:2024/04/27 22:48
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <ctime>
  4. #include <iostream>
  5. #include <cmath>
  6. using namespace std;
  7. #define M 3200 //num of person
  8. #define N 3200 //num of car
  9. #define B_S 32
  10. //#define SHOW
  11. //P[M]*C[N]=D[M][N]
  12. __global__ void distance_gpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
  13. {
  14. int index_x = blockIdx.x * blockDim.x + threadIdx.x;
  15. int index_y = blockIdx.y * blockDim.y + threadIdx.y;
  16. if (index_x >= N || index_y >=M) return;
  17. distance[N*index_y + index_x] = sqrt((px[index_y] - x[index_x])*(px[index_y] - x[index_x]) + (py[index_y] - y[index_x])*(py[index_y] - y[index_x]));
  18. }
  19. void distance_cpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
  20. {
  21. for (int i = 0; i<m; i++)
  22. {
  23. for (int j = 0; j<n; j++)
  24. {
  25. int xx = px[i] - x[j];
  26. int yy = py[i] - y[j];
  27. distance[i*n+j] = sqrt(xx*xx + yy*yy);
  28. }
  29. }
  30. }
  31. void compute_gpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
  32. {
  33. float *dx, *dy, *dpx, *dpy, *dd;
  34. cudaMalloc((void **)&dpx, sizeof(float)*M);
  35. cudaMalloc((void **)&dpy, sizeof(float)*M);
  36. cudaMalloc((void **)&dx, sizeof(float)*N);
  37. cudaMalloc((void **)&dy, sizeof(float)*N);
  38. cudaMalloc((void **)&dd, sizeof(float)*N*M);
  39. ///测试时间
  40. float elapsedTime = 0.0f;
  41. cudaEvent_t start, stop;
  42. cudaEventCreate(&start);
  43. cudaEventCreate(&stop);
  44. cudaEventRecord(start, 0);
  45. cudaMemcpy(dx, x, sizeof(float)*N, cudaMemcpyHostToDevice);
  46. cudaMemcpy(dy, y, sizeof(float)*N, cudaMemcpyHostToDevice);
  47. cudaMemcpy(dpx, px, sizeof(float)*M, cudaMemcpyHostToDevice);
  48. cudaMemcpy(dpy, py, sizeof(float)*M, cudaMemcpyHostToDevice);
  49. dim3 dimGrid((N + B_S - 1) / B_S, (M + B_S - 1) / B_S);
  50. dim3 dimBlock(B_S, B_S);
  51. distance_gpu << <dimGrid, dimBlock >> >(dx, dy, dpx, dpy, dd,M,N);
  52. cudaMemcpy(distance, dd, sizeof(float)*N*M, cudaMemcpyDeviceToHost);
  53. ///时间结束
  54. cudaEventRecord(stop, 0);
  55. cudaEventSynchronize(stop);
  56. cudaEventElapsedTime(&elapsedTime, start, stop);
  57. printf("the time on gpu is %f ms\n", elapsedTime);
  58. cudaFree(dx);
  59. cudaFree(dy);
  60. cudaFree(dpx);
  61. cudaFree(dpy);
  62. cudaFree(dd);
  63. cudaEventDestroy(start);
  64. cudaEventDestroy(stop);
  65. }
  66. void compute_cpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
  67. {
  68. clock_t start, finish;
  69. start = clock();
  70. distance_cpu(x, y, px, py, distance, m, n);
  71. finish = clock();
  72. printf("the time on cpu is %f ms\n", (double)(finish - start));
  73. }
  74. void verify(float *C1, float *C2, int m, int n)
  75. {
  76. for (int i = 0; i < m; i++)
  77. for (int j = 0; j < n; j++)
  78. {
  79. if ((C2[i*n + j] - C1[i*m + j])>1e-5)
  80. {
  81. printf("error! results are not equel!");
  82. break;
  83. }
  84. }
  85. }
  86. int main()
  87. {
  88. float* px = (float*)malloc(M*sizeof(float));
  89. float* py = (float*)malloc(M*sizeof(float));
  90. float* x = (float*)malloc(N*sizeof(float));
  91. float* y = (float*)malloc(N*sizeof(float));
  92. float* distance1 = (float*)malloc(N*M*sizeof(float));
  93. float* distance2 = (float*)malloc(N*M*sizeof(float));
  94. #ifdef SHOW
  95. for (int i = 0; i<N; i++)
  96. {
  97. x[i] = rand() % 10;
  98. y[i] = rand() % 10;
  99. cout << " (" << x[i] << "," << y[i] << ")" ;
  100. }
  101. for (int i = 0; i<M; i++)
  102. {
  103. px[i] = rand() % 10;
  104. py[i] = rand() % 10;
  105. cout << endl <<"("<<px[i] << "," << py[i] << ")" << endl;
  106. }
  107. #endif // SHOW
  108. compute_cpu(x, y, px, py, distance1, M, N);
  109. #ifdef SHOW
  110. for (int i = 0; i< M; i++)
  111. {
  112. for (int j = 0; j< N; j++)
  113. cout << distance1[i*N + j] << " ";
  114. cout << endl;
  115. }
  116. #endif // SHOW
  117. compute_gpu(x, y, px, py, distance2, M, N);
  118. #ifdef SHOW
  119. for (int i = 0; i< M; i++)
  120. {
  121. for (int j = 0; j< N; j++)
  122. cout << distance2[i*N + j] << " ";
  123. cout << endl;
  124. }
  125. #endif // SHOW
  126. verify(distance1, distance2, M, N);
  127. free(x);
  128. free(y);
  129. free(px);
  130. free(py);
  131. free(distance1);
  132. free(distance2);
  133. return 0;
  134. }



来自为知笔记(Wiz)


0 0
原创粉丝点击