加法

来源:互联网 发布:淘宝网购物女装晚礼服 编辑:程序博客网 时间:2024/05/02 06:44

加法代码参考了别人的博客

#include <iostream>  #include <stdio.h>  //#include "kmeans.h"    using namespace std;    const int count = 1000;    void generate_data(int *arr)  {      for(int i=0;i<count;i++)       {          arr[i] = i+1;      }  }    int nextPowerOfTwo(int n)  {      n--;      n = n >> 1 | n;      n = n >> 2 | n;      n = n >> 4 | n;      n = n >> 8 | n;      n = n >> 16 | n;      //n = n >> 32 | n; //For 64-bits int       return ++n;  }    /* cnt : count  cnt2 : next power of two of count  */  __global__ static void compute_sum(int *array,int cnt , int cnt2)  {      extern __shared__ unsigned int sharedMem[];      sharedMem[threadIdx.x] = (threadIdx.x < cnt) ? array[threadIdx.x] : 0 ;      __syncthreads();        //cnt2 "must" be a power of two!      for( unsigned int s = cnt2/2 ; s > 0 ; s>>=1 )      {          if( threadIdx.x < s )              {              sharedMem[threadIdx.x] += sharedMem[threadIdx.x + s];          }          __syncthreads();      }      if(threadIdx.x == 0)      {          array[0] = sharedMem[0];          }  }      int main()  {      int *a = new int[count];      generate_data(a);        int *deviceArray;      cudaMalloc( &deviceArray,count*sizeof(int) );      cudaMemcpy( deviceArray,a,count*sizeof(int),cudaMemcpyHostToDevice );      int npt_count = nextPowerOfTwo(count);//next power of two of count      //cout<<"npt_count = "<<npt_count<<endl;      int blockSharedDataSize = npt_count * sizeof(int);            compute_sum<<<1,count,blockSharedDataSize>>>(deviceArray,count,npt_count);    int sum ;      cudaMemcpy( &sum,deviceArray,sizeof(int),cudaMemcpyDeviceToHost );      cout<<"sum = "<<sum<<endl;            return 0;  }  

[1]http://blog.csdn.net/lavorange/article/details/43031419

0 0