多人多车求距离_cpu&gpu

来源:互联网 发布:步步高9688软件下载 编辑:程序博客网 时间:2024/04/28 07:24
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <ctime>
  4. #include <iostream>
  5. #include <cmath>
  6. using namespace std;
  7. #define M 3200 //num of person
  8. #define N 3200 //num of car
  9. #define B_S 32
  10. //#define SHOW
  11. //P[M]*C[N]=D[M][N]
  12. __global__ void distance_gpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
  13. {
  14. int index_x = blockIdx.x * blockDim.x + threadIdx.x;
  15. int index_y = blockIdx.y * blockDim.y + threadIdx.y;
  16. if (index_x >= N || index_y >=M) return;
  17. distance[N*index_y + index_x] = sqrt((px[index_y] - x[index_x])*(px[index_y] - x[index_x]) + (py[index_y] - y[index_x])*(py[index_y] - y[index_x]));
  18. }
  19. void distance_cpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
  20. {
  21. for (int i = 0; i<m; i++)
  22. {
  23. for (int j = 0; j<n; j++)
  24. {
  25. int xx = px[i] - x[j];
  26. int yy = py[i] - y[j];
  27. distance[i*n+j] = sqrt(xx*xx + yy*yy);
  28. }
  29. }
  30. }
  31. void compute_gpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
  32. {
  33. float *dx, *dy, *dpx, *dpy, *dd;
  34. cudaMalloc((void **)&dpx, sizeof(float)*M);
  35. cudaMalloc((void **)&dpy, sizeof(float)*M);
  36. cudaMalloc((void **)&dx, sizeof(float)*N);
  37. cudaMalloc((void **)&dy, sizeof(float)*N);
  38. cudaMalloc((void **)&dd, sizeof(float)*N*M);
  39. ///测试时间
  40. float elapsedTime = 0.0f;
  41. cudaEvent_t start, stop;
  42. cudaEventCreate(&start);
  43. cudaEventCreate(&stop);
  44. cudaEventRecord(start, 0);
  45. cudaMemcpy(dx, x, sizeof(float)*N, cudaMemcpyHostToDevice);
  46. cudaMemcpy(dy, y, sizeof(float)*N, cudaMemcpyHostToDevice);
  47. cudaMemcpy(dpx, px, sizeof(float)*M, cudaMemcpyHostToDevice);
  48. cudaMemcpy(dpy, py, sizeof(float)*M, cudaMemcpyHostToDevice);
  49. dim3 dimGrid((N + B_S - 1) / B_S, (M + B_S - 1) / B_S);
  50. dim3 dimBlock(B_S, B_S);
  51. distance_gpu << <dimGrid, dimBlock >> >(dx, dy, dpx, dpy, dd,M,N);
  52. cudaMemcpy(distance, dd, sizeof(float)*N*M, cudaMemcpyDeviceToHost);
  53. ///时间结束
  54. cudaEventRecord(stop, 0);
  55. cudaEventSynchronize(stop);
  56. cudaEventElapsedTime(&elapsedTime, start, stop);
  57. printf("the time on gpu is %f ms\n", elapsedTime);
  58. cudaFree(dx);
  59. cudaFree(dy);
  60. cudaFree(dpx);
  61. cudaFree(dpy);
  62. cudaFree(dd);
  63. cudaEventDestroy(start);
  64. cudaEventDestroy(stop);
  65. }
  66. void compute_cpu(float *x, float *y, float *px, float *py, float *distance, int m, int n)
  67. {
  68. clock_t start, finish;
  69. start = clock();
  70. distance_cpu(x, y, px, py, distance, m, n);
  71. finish = clock();
  72. printf("the time on cpu is %f ms\n", (double)(finish - start));
  73. }
  74. void verify(float *C1, float *C2, int m, int n)
  75. {
  76. for (int i = 0; i < m; i++)
  77. for (int j = 0; j < n; j++)
  78. {
  79. if ((C2[i*n + j] - C1[i*m + j])>1e-5)
  80. {
  81. printf("error! results are not equel!");
  82. break;
  83. }
  84. }
  85. }
  86. int main()
  87. {
  88. float* px = (float*)malloc(M*sizeof(float));
  89. float* py = (float*)malloc(M*sizeof(float));
  90. float* x = (float*)malloc(N*sizeof(float));
  91. float* y = (float*)malloc(N*sizeof(float));
  92. float* distance1 = (float*)malloc(N*M*sizeof(float));
  93. float* distance2 = (float*)malloc(N*M*sizeof(float));
  94. for (int i = 0; i<N; i++)
  95. {
  96. x[i] = rand() % 10;
  97. y[i] = rand() % 10;
  98. #ifdef SHOW
  99. cout << " (" << x[i] << "," << y[i] << ")" ;
  100. #endif // SHOW
  101. }
  102. for (int i = 0; i<M; i++)
  103. {
  104. px[i] = rand() % 10;
  105. py[i] = rand() % 10;
  106. #ifdef SHOW
  107. cout << endl <<"("<<px[i] << "," << py[i] << ")" << endl;
  108. #endif // SHOW
  109. }
  110. compute_cpu(x, y, px, py, distance1, M, N);
  111. #ifdef SHOW
  112. for (int i = 0; i< M; i++)
  113. {
  114. for (int j = 0; j< N; j++)
  115. cout << distance1[i*N + j] << " ";
  116. cout << endl;
  117. }
  118. #endif // SHOW
  119. compute_gpu(x, y, px, py, distance2, M, N);
  120. #ifdef SHOW
  121. for (int i = 0; i< M; i++)
  122. {
  123. for (int j = 0; j< N; j++)
  124. cout << distance2[i*N + j] << " ";
  125. cout << endl;
  126. }
  127. #endif // SHOW
  128. verify(distance1, distance2, M, N);
  129. free(x);
  130. free(y);
  131. free(px);
  132. free(py);
  133. free(distance1);
  134. free(distance2);
  135. return 0;
  136. }



来自为知笔记(Wiz)


附件列表

     

    0 0
    原创粉丝点击