cuda实现字符统计

来源:互联网 发布:淘宝卖家千牛收入图片 编辑:程序博客网 时间:2024/05/20 21:20


#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include<string.h>
#include<stdlib.h>
#include<stdio.h>
#include<assert.h>
#include<time.h>
#define TEXT_FILE "kafka.txt"
typedef unsigned int uint;
__global__ void histo_kernel(char* buffer,size_t size,unsigned int* histo){
__shared__ unsigned int temp[256];
temp[threadIdx.x]=0;
__syncthreads();
int i = threadIdx.x+blockIdx.x*blockDim.x;
int offset = blockDim.x*gridDim.x;
while(i<size){
atomicAdd(&temp[buffer[i]],1);
i += offset;
}
__syncthreads();
atomicAdd(&(histo[threadIdx.x]),temp[threadIdx.x]);
}


int main(){
time_t cpu_start,cpu_end,gpu_start,gpu_end;
cudaEvent_t start,stop;
int chars_per_thread;
uint* result=(uint*)malloc(256*sizeof(uint));
uint* dev_result;
int cpu_result[256]={0};
int blockSize = 256;
size_t text_size;
char* text;
char* dev_text;
FILE* text_handle;
for(int i = 0;i<4;i++){
result[i]=0;
}
text_handle = fopen(TEXT_FILE,"r");
if(text_handle==NULL){
perror("could't find the text file");
exit(1);
}
fseek(text_handle,0,SEEK_END);
text_size = ftell(text_handle)-1;
rewind(text_handle);
text = (char*)calloc(text_size,sizeof(char));
fread(text,sizeof(char),text_size,text_handle);
fclose(text_handle);
chars_per_thread = text_size/blockSize+1;
gpu_start = clock();
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start,0);
cudaMalloc((void**)&dev_text,text_size*sizeof(char));
cudaMemcpy(dev_text,text,text_size*sizeof(char),cudaMemcpyHostToDevice);
cudaMalloc((void**)&dev_result,256*sizeof(unsigned int));
cudaMemset(dev_result,0,256*sizeof(uint));
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop,0);
int blocks = prop.multiProcessorCount;
histo_kernel<<<blocks*2,256>>>(dev_text,text_size,dev_result);
gpu_end = clock();
cudaMemcpy(result,dev_result,256*sizeof(uint),cudaMemcpyDeviceToHost);
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
float elapsedTime;
cudaEventElapsedTime(&elapsedTime,start,stop);


cpu_start = clock();
for(int i=0;i<text_size;i++){
cpu_result[text[i]]++;
}
cpu_end = clock();
bool flag = true;
printf("\nResults: \n");
for(int i=65;i<123;i++){
if(result[i]!=cpu_result[i]){
flag = false;
}
printf("Number of occurrences of %c : %d\n",i,cpu_result[i]);
printf("GPU Number of occurrences of %c : %d\n",i,result[i]);
}
printf("cpu time:%d\n",cpu_end - cpu_start);
printf("gpu time:%d\n",gpu_end - gpu_start);
printf("gpu elapsed time:%3.1f\n",elapsedTime);


if(flag==true)
printf("Count success!");
else
printf("Count failed!");


   cudaFree(dev_result);
   cudaFree(dev_text);
   free(text);
   free(result);
return 0;
}
0 0
原创粉丝点击