多线程技术模拟并行计算之二:数组前缀和(Prefix Sum)
来源:互联网 发布:js 子字符串 编辑:程序博客网 时间:2024/06/18 11:40
一、前缀和(Prefix Sum)定义:
给定一个数组A[1..n],前缀和数组PrefixSum[1..n]定义为:PrefixSum[i] = A[0]+A[1]+...+A[i-1];
例如:A[5,6,7,8] --> PrefixSum[5,11,18,26]
PrefixSum[0] =A[0] ;
PrefixSum[1] =A[0] + A[1] ;
PrefixSum[2] =A[0] + A[1] + A[2] ;
PrefixSum[3] =A[0] + A[1] + A[2] + A[3] ;
二、串行程序(Sequential Program):
时间复杂度:O(n);空间复杂度:O(n)。n是数组A的长度。
伪代码:
Procedure PS(A,n,S) S[0] = A[0]; if n = 1 then exit; else { for j = 1 to n do S[j] = S[j-1] +A[j] }</span>
串行代码sequence.c:
#include<stdio.h>#include<time.h>#include<stdlib.h>#include<unistd.h>#include<memory.h> #define n 1000#define RANGE 2 int A[n]={0};intprefix_sum[n]={0};int num; void work(){ int i ; prefix_sum[0]=A[0]; for(i=1;i<num;i++) { prefix_sum[i] = prefix_sum[i-1]+A[i]; }} void generate(){ FILE *file; if( (file=fopen("/home/huawei/leaguenew/parallel_computing/A.txt","wt"))==NULL ) { perror("open fail\n"); exit(1); } int i; srand((unsigned)time(NULL)) ; for( i =0 ; i < num ; i++ ) fprintf(file,"%-4d",rand()%RANGE); //fprintf(file,"\n"); fclose(file);} void Read(){ FILE *file; if( (file =fopen("/home/huawei/leaguenew/parallel_computing/A.txt","rt"))==NULL ) { perror("open fail\n"); exit(1); } int i; srand((unsigned)time(NULL)); for(i=0;i<num;i++) fscanf(file,"%d",&A[i]); fclose(file);} int main(){ printf("please input the scale ofinput : "); scanf("%d",&num); generate(); Read(); int h; clock_t start = clock(); work(); clock_t finish = clock(); printf("the time cost is : %.6fseconds\n",(double)(finish-start)/CLOCKS_PER_SEC); for(h=0;h<num;h++) printf("%d ",A[h]); printf("\nThe prefix sum is :\n"); for(h=0;h<num;h++) printf("%d ",prefix_sum[h]); printf("\n"); return 0;}
代码中的文件路径需要自行修改。
递归实现:
#include<iostream>using namespace std; int ps[10]; int prefix_sum(inta[] ,int n){ if( n == 0 ) { ps[0] = a[0]; return ps[0]; } ps[n] = a[n]+prefix_sum(a,n-1); return ps[n];} int main(){ int a[] = {3,1,2,5,7,4,9}; int len = sizeof(a)/sizeof(a[0]); prefix_sum(a,len-1) ; for(int i=0;i<len;i++) { cout<<ps[i]<<""; } cout<<endl; system("pause"); return 0;}
三、并行算法
3.1.Non RecursivePrefix_Sum
首先让A[1..n]来表示n个整数的数组。让B[h][j]作为辅助数据在二叉树的前向遍历中保存中间信息(后面会说明方法),其中1<=h<=log2(n) and 1<=j<=n/2^h,然而数组C被用作后向遍历树下存放结果。
Input:大小为n=2^h的一个数组A,h是一个非负整数
Output:矩阵C存放结果,C[0][j]就是前缀和的第j项,1<=j<=n
伪代码:
beginfor 1<=j<=n <strong>pardo</strong> set B[0,j] = A[j]; for h = 1 to log2(n) do for 1<=j<=n/2^h <strong>pardo</strong> set B[h,j] = B[h-1,2*j-1] + B[h-1,2*j]; for h = log2(n) to 0 do for 1<=j<n/2^h <strong>pardo</strong> { j is even : set C[h,j] = C[h+1,j/2]; j = 1 : set C[h,1] = B[h,1]; j is odd : set C[h,j] = C[h+1,(j-1)/2] + B[h,j]; }end
Data Flow(Forward)
运行时间:并行的运行时间就是O(log2(n))其实也就是树的高度,每一层并行一次时间复杂度为O(1)。
注意:这是一个前向遍历,从底而上。对于C的后向遍历同样的,区别在于从上而下。
DataFlow(Backward)
数组C的生成根据上述伪代码生成,最终的C[0][j](1<=j<=n)就是最后的前缀和的结果。
上代码:
#include<stdio.h>#include<time.h>#include<pthread.h>#include<stdlib.h>#include<unistd.h>#include<memory.h>#include<stdlib.h>#include<math.h>#define RANGE 99#define M 1000void generate(int n);void Read(int n);void *func1(void *arg);void *func2(void *arg);void *func3(void *arg);void calculate();void resultToFile();int A[M];int B[20][1000];int prefix_sum[20][1000]={0};pthread_t tids[20][1000];pthread_mutex_t mutex;int n;struct S{int i ;int j ;};int main(){printf("please input the scale of input : ");scanf("%d",&n);generate(n);Read(n);clock_t start = clock();calculate();resultToFile();clock_t finish = clock();printf("the time cost is : %.6f seconds\n",(double)(finish-start)/CLOCKS_PER_SEC);return 0;}void calculate(){int h,i,j,k,l,flag;/*======set B[0][j]=A[j] in parallel==================*/for(i=1;i<=n;i++){struct S *s;s = (struct S *)malloc(sizeof(struct S));s->i = i;if( pthread_create(&tids[0][i],NULL,func1,s) ){perror("Fail to create thread!");exit(1);}}for(j=1;j<=n;j++)pthread_join(tids[0][j],NULL);/***Show the B[0][j]***************************/#if 1printf("Show the A[i]: ");for(j=1;j<=n;j++)printf("%d ",A[j]);printf("\nShow the B[0][j]:");for(j=1;j<=n;j++)printf("%d ",B[0][j]);printf("\n");#endif/***********************************************//**Data Flow Forward Algorithm:build the tree***/for(h=1;h<=log2(n);h++){for(j=1;j<=n/pow(2,h);j++){struct S *s;s = (struct S*)malloc(sizeof(struct S));s->i = h;s->j = j;if( pthread_create(&tids[h][j],NULL,func2,s) ){perror("sth is wrong");exit(1);}}/*****wait all the threads terminate to continue*****/for(j=1;j<=n/pow(2,h);j++)pthread_join(tids[h][j],NULL);}#if 1/******Print Data Flow Forward************/printf("Data Flow Forward:\n");for(h=0;h<=log2(n);h++){for(l=1;l<=2*h+1;l++)printf(" ");for(j=1;j<=n/pow(2,h);j++){printf("%d",B[h][j]);for(l=1;l<=1+h;l++)printf(" ");}printf("\n");}#endif/*********************************************//***********Data Flow Backward:Traverse*******/for(h=log2(n);h>=0;h--){for(j=1;j<=n/pow(2,h);j++){struct S *s;s = (struct S*)malloc(sizeof(struct S));s->i = h;s->j = j;if( pthread_create(&tids[h][j],NULL,func3,s) ){perror("sth is wrong");exit(1);}}for(j=1;j<=n/pow(2,h);j++)pthread_join(tids[h][j],NULL);}#if 1/********Print Data Flow Backward************/printf("Data Flow Backward:\n");for(h=log2(n);h>=0;h--){for(l=1;l<=2*h+1;l++)printf(" ");for(j=1;j<=n/pow(2,h);j++){printf("%d ",prefix_sum[h][j]);for(l=1;l<=h;l++)printf(" ");}printf("\n");}#endif/*********************************************/}void resultToFile(){int i;/***********write the result to file**********/FILE *file;file = fopen("/home/leaguenew/parallel_computing/prefixSumResult.txt","wt");for(i=1;i<=n;i++){fprintf(file,"%-6d",prefix_sum[0][i]);}/*********************************************/}void *func1(void *arg)//set B[0][j]=A[j]{int i;struct S *p;p = (struct S*)arg;i = p->i;pthread_mutex_lock(&mutex);B[0][i]=A[i];pthread_mutex_unlock(&mutex);pthread_exit(NULL);}void *func2(void *args)//B[h][j]=B[h-1][2*j-1]+B[h-1][2*j];{int h , j;struct S *p;p = (struct S*)args;h = p->i;j = p->j;pthread_mutex_lock(&mutex);B[h][j]=B[h-1][2*j-1]+B[h-1][2*j];pthread_mutex_unlock(&mutex);pthread_exit(NULL);}void *func3(void *arg)//get the prefix sum{int h,j;struct S *p;p = (struct S*)arg;h = p->i;j = p->j;pthread_mutex_lock(&mutex);if(j==1) prefix_sum[h][1] = B[h][1];else if (j%2==0) prefix_sum[h][j] = prefix_sum[h+1][j/2] ;else prefix_sum[h][j] = prefix_sum[h+1][(j-1)/2] + B[h][j] ;pthread_mutex_unlock(&mutex);pthread_exit(NULL);}void generate(int n){FILE *file1;if( (file1=fopen("/home/leaguenew/parallel_computing/arrayA.txt","wt") )==NULL ){perror("fopen");exit(1);}int i,j;srand( (unsigned)time(NULL) );for( i = 1 ; i <= n ;i++)fprintf(file1,"%-8d",rand()%RANGE);fprintf(file1,"\n");fclose(file1);}void Read(int n){FILE *file1;if( (file1=fopen("/home/leaguenew/parallel_computing/arrayA.txt","rt") )==NULL ){perror("fopen");exit(1);}int i,j;srand( (unsigned)time(NULL) );for( i = 1 ; i <= n ;i++)fscanf(file1,"%d",&A[i]);fclose(file1);}
程序的流程是:
Main(){ Generate();//生成随机数据 Read();//读取随机数据到数组A中 Calculate() { Pthread_create(func1); Pthread_create(func2); Pthread_create(func3); } ResultToFile();//把结果写入文件}Func1();//计算:B[0][j] = A[j]Func2();//计算:B[h][j]=B[h-1][2*j-1]+B[h-1][2*j]Func3();//计算:C[i][j]
描述:首先通过随机数生成一组随机数,存放到文件中去,然后通过读取文件中的数据到数组A中。然后开始计时,调用计算函数calculate,创建多线程计算:
<span style="font-family:Microsoft YaHei;font-size:14px;">B[0][j] = A[j];B[h][j]=B[h-1][2*j-1]+B[h-1][2*j];{ if(j==1) prefix_sum[h][1]= B[h][1]; elseif (j%2==0) prefix_sum[h][j] =prefix_sum[h+1][j/2] ; elseprefix_sum[h][j] = prefix_sum[h+1][(j-1)/2] + B[h][j] ;}</span>
最后输出结果到文件。
结果截图:
n=2:
n=4:
n=8:
n=16:
n=500:
n=600:
n=800:
3.2.Get rid of the 2-D array
这是计算prefix sum的另外一种算法,去除二维数组。
伪代码:
in:A[1..n],out:Prefix[1..n]for 1<=i<=n do in parallel Prefix[i] = A[i-1] + A[i];end in parallelk = 2:while(k<n) do for k+1<=i<=n do in parallel Prefix[i] = Prefix[i-k] + Prefix[i] end in parallel k = k + k;end while
#include<stdio.h>#include<time.h>#include<pthread.h>#include<stdlib.h>#include<unistd.h>#include<memory.h>#include<stdlib.h>#include<math.h>#define RANGE 99#define M 1000void generate();void Read();void *func1(void *arg);void *func2(void *arg);void calculate();void resultToFile();int A[M]={0};int prefix_sum[M]={0};int temp[M]={0};int n;pthread_t tids[1000][1000];pthread_mutex_t mutex;struct S{int i ;int j ;};int main(){printf("please input the scale of input : ");scanf("%d",&n);generate();Read();clock_t start = clock();calculate();resultToFile();clock_t finish = clock();printf("The time cost is : %.6f second\n",(double)(finish-start)/CLOCKS_PER_SEC);return 0 ;}void resultToFile(){int i;/***********write the result to file**************/FILE *file;file = fopen("/home/leaguenew/parallel_computing/GetRidOf2DArray.txt","wt");for(i=1;i<=n;i++){fprintf(file,"%-6d",prefix_sum[i]);}/*************************************************/}void calculate(){int h,i,j,k,l ;/************************************************/for(i=1;i<=n;i++){struct S* s;s = (struct S*)malloc(sizeof(struct S));s->i = i;if( pthread_create(&tids[0][i],NULL,func1,s)){perror("Fail to create thread!");exit(1);}}for(j=1;j<=n;j++)pthread_join(tids[0][j],NULL);/************************************************/k=2;while(k<n){for(i=1;i<=n;i++)temp[i] = prefix_sum[i];for(i=k+1;i<=n;i++){struct S* s;s = (struct S*)malloc(sizeof(struct S));s->i = i;s->j = k;if( pthread_create(&tids[k][i],NULL,func2,s)){perror("Fail to create thread!");exit(1);}}for(j=k+1;j<=n;j++)pthread_join(tids[k][j],NULL);for(i=1;i<=n;i++)prefix_sum[i] = temp[i];k = k + k;}#if 0/************show the result***************/printf("A: \n");for(i=1;i<=n;i++)printf("%d ",A[i]);printf("\nprefix_sum:\n");for(i=1;i<=n;i++)printf("%d ",prefix_sum[i]);printf("\n");/******************************************/#endif}void *func1(void *arg){int i ;struct S *p;p = (struct S*)arg;i = p->i;pthread_mutex_lock(&mutex);if(i==1) prefix_sum[i] = A[i];else prefix_sum[i] = A[i-1] + A[i];pthread_mutex_unlock(&mutex);pthread_exit(NULL);}void *func2(void *arg){int i ,k ;struct S *p;p = (struct S*)arg;i = p->i;k = p->j;pthread_mutex_lock(&mutex);temp[i] = prefix_sum[i-k] + prefix_sum[i];pthread_mutex_unlock(&mutex);pthread_exit(NULL);}void generate(){FILE *file;if( (file = fopen("/home/leaguenew/parallel_computing/arrayB.txt","wt"))==NULL ){perror("fopen");exit(1);}int i , j ;srand((unsigned)time(NULL) );for( i=1;i<=n;i++ )fprintf(file,"%-6d",rand()%RANGE);fprintf(file , "\n");fclose(file);}void Read(){FILE *file;if( (file = fopen("/home/leaguenew/parallel_computing/arrayB.txt","rt"))==NULL ){perror("fopen");exit(1);}int i , j ;srand((unsigned)time(NULL) );for( i=1;i<=n;i++ )fscanf(file,"%d",&A[i]);fclose(file);}
程序的流程是:
Main(){Generate();//生成随机数据写入文件Read();//读取文件中的数据到数组中Calculate(){Pthread_create(func1);Pthread_create(func2);}ResultToFile();//把结果写入文件}Func1();//计算:prefix_sum[i] = A[i-1] + A[i];Func2();//计算:prefix[i] = prefix_sum[i-k] + prefix_sum[i];<strong>
结果截图:
n=4:
n=6:
n=16:
四、总结
a.pthread的链接库问题:编译的时候要添加–lpthread。
b.Scanf参数没有添加&造成段错误。
c.通过打印调试调试程序,及打印相关的输出语句来判断哪个位置发生错误。
d.sleep()不占计算clock()时间。
f.在UNIX和LINUX系统中在要命令中加入-lm这个功能是把数学库与源程序链接在一起。
g.Pthread_join的理解,等待所有创建的线程结束后继续进行。
h.通过动态创建结构体来给pthread_create()传递多个参数。
转载注明:http://blog.csdn.net/lavorange/article/details/39103609
- 多线程技术模拟并行计算之二:数组前缀和(Prefix Sum)
- 数组的前缀和(Prefix Sum)问题及其并行算法
- 使用MPI并行求解前缀和(prefix sum)
- 【并行计算】前缀和并行思路
- 多线程技术模拟并行计算之一:查找极大独立集(Find the Maximal Independent Set)
- 并行算法之前缀和
- 多核中的并行前缀和计算
- 多核中的并行前缀和计算
- MPI编程中并行前缀和计算
- 浅谈.NET下的多线程和并行计算(二)线程基本知识
- java 多线程并行计算之矩阵乘法(星星笔记)
- java 多线程并行计算之求素数(星星笔记)
- 我的并行计算之路(二)MPI点对点通信MPI_Send和MPI_Recv
- CSU1642-Problem B-前缀和数组/模拟
- 前缀和并行算法
- LeetCode OJ 之 Longest Common Prefix (最长公共前缀)
- 并行计算与多核多线程技术 课程报告
- 浅谈.NET下的多线程和并行计算(七)基于多线程的基本组件
- GTK GDK 事件 信号
- android项目 之 记事本(3) -------- 添加拍照功能
- “聊天剽窃手"技术分析 - java部分
- 黑马程序员——java基础:数据类型
- 互联网公司待遇
- 多线程技术模拟并行计算之二:数组前缀和(Prefix Sum)
- leetcode 之 Word Ladder
- UVA - 10588 Queuing at the doctors (队列)
- Splay tree 区间翻转 模板
- python中字符串和列表的相互转换
- hdu 1426 数独, dfs
- C#实现万年历(农历、节气、节日、星座、属相、生肖、闰年等)
- CSDN开源夏令营 百度数据可视化实践 ECharts 11 (期末总结)
- K&R---函数与程序结构中,逆波兰计算器的实现