MPI计算Pi后10万位过程

来源：互联网发布：吴昕淘宝店为什么关了编辑：程序博客网时间：2024/06/05 02:18

1) 搭建Beowulf机群

2) 并行计算Pi(任务分配)

二. 实验环境

1) 硬件环境：2核CPU、8G内存计算机；

2) 软件环境：Centos 6.5 32位、GCC、MPICH2、GotoBlas2、GMP

本教程只能作为参考，不是一步一步记下实验步骤，

实验过程中如果遇到问题，自己百度解决。关键是搭建环境，ssh，然后是mpi。

搭建beowulf机群

1.SSH无密码登录

首先配置hosts文件

10.199.156.142 master

10.199.157.195 node1

每个节点安装sshserver

yuminstall openssh-server

将每个节点上的id_rsa.pub文件追加到gserver150的~/.ssh/authorized_keys末尾

cat ~/.ssh/id_rsa.pub | ssh root@master "cat - >> ~/.ssh/authorized_keys"

追加后的authorized_keys文件为

在ssh-client一边使用ssh-add将刚生成的private key加入到ssh agent中

ssh-add ~/.ssh/id_rsa

关闭防火墙

/etc/init.d/iptables status

/etc/init.d/iptables stop

将master上的authorized_keys文件复制到所有子节点的 ~/.ssh目录下

scp root@master:~/.ssh/authorized_keys~/.ssh/

将authorized_keys的权限改为600

chmod 600 ~/.ssh/authorized_keys

无密码登陆效果：

2.安装MPICH2

1) 下载mpich2（版本1.4.191）

我用到的mpi压缩包

2)解压到当前文件夹

tar zxvf mpich2-1.4.1p1.tar.gz

进入解压后目录

cdmpich2-1.4.1p1

安装命令

./configure --prefix=/opt/mpich2 --with-pm=hydra:mpd--with-mpe

make

sudo make install

3)配置MPI

1.配置环境变量（/etc/profile）

sudovi /etc/profile

在/etc/profile的底部添加下面几行

export MPI_ROOT=/opt/mpich2

export PATH=$MPI_ROOT/bin:$PATH

export MANPATH=$MPI_ROOT/bin:$MANPATH环境搭建完成测试：

执行下面命令使/etc/profile生效

source /etc/profile

2. 测试环境变量

which mpd

which mpicc

which mpiexec

which mpirun

3.创建配置文件.mpd.conf

root用户下mpd.conf位于/etc路径下，文件名为"mpd.conf"

4.创建配置文件mpd.hosts

路径：~/mpd.hosts。

并行计算pi:

安装GMP

gmp库下载地址https://gmplib.org/#DOWNLOAD

tar -jvxf gmp-5.1.0.tar.bz2

cd gmp-5.1.0

./configure --enable-cxx

make

make check

make install

算法bbp:

贝利-波尔温-普劳夫公式（BBP公式）提供了一个计算圆周率π的第n位二进制数的spigot算法（spigot algorithm）。这个求和公式是在1995年由西蒙·普劳夫提出的，并以公布这个公式的论文作者大卫·贝利（David H. Bailey）、皮特·波尔温（Peter Borwein）和普劳夫的名字命名。在论文发表之前，普劳夫已将此公式在他的网站上公布。这个公式是：

这个公式的发现曾震惊学界。数百年来，求出π的第n位小数而不求出它的前n-1位曾被认为是不可能的。

自从这个发现以来，发现了更多的无理数常数的类似公式，它们都有一个类似的形式：

其中α是目标常数，p和q是整系数多项式，b ≥ 2是整数的数制。

这种形式的公式被称为BBP式公式（BBP-typeformulas）。由特定的p,q和b可组合出一些著名的常数。

算法流程图：

每个线程分配相应循环的任务，然后把计算的结果加到一起，得到更精确的pi。

其中用到了高精度算法库，gmp。本实验主要用到mpf_t数据类型并且是10进制，进行高精度浮点运算。

并且在传输计算的过得时候，通过mpf_get_str把子节点计算的数据转换为字符串，并且获取数据的指数和字符串的长度。在master上受到子节点送过来的字符串和长度还有指数后，进行计算用mpf_set_str把字符串转换为mpf_t类型，从而可以进行加法运算。

部分源代码:

/*该函数实现归约求和的功能*/ void Myreduce (mpf_t sendbuf){ MPI_Status status; char block_floor[DPREC+1],block_roof[DPREC+1];char *word; int i;//gmp variables  //mp_exp_t *exponent; // holds the exponent for the result stringmp_exp_t exponent ;mpf_t rExponent, tmp ,tmp2;    //MPI variables ;    long int lExponent;int length;    MPI_Request *request ; int myid,numprocs;//*recvbuf=0.0;int node; MPI_Comm_rank(MPI_COMM_WORLD,&myid); MPI_Comm_size(MPI_COMM_WORLD,&numprocs);//initial mpf_t variable             mpf_init(tmp);        mpf_init(tmp2) ;    //mpf_set_prec(tmp, DPREC) ;    //mpf_set_prec(tmp2, DPREC) ;    mpf_init(rExponent) ; //非root节点向root节点发送数据 if(myid!=0){/*send mpf_t*/ mpf_out_str(stdout,10,dprec + 10, sendbuf);  MPI_Send(&myid, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);//So the master knows who I amword = mpf_get_str(NULL, &exponent, 10, 0, sendbuf) ;                length = strlen(word); MPI_Send(&exponent, 1, MPI_LONG, 0, 0, MPI_COMM_WORLD) ;//Send the exponentMPI_Send(&length, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);//Send the length of the string    MPI_Send(word, strlen(word)+1, MPI_CHAR, 0, 0,MPI_COMM_WORLD) ;//Send the stringfree(word);} //master节点接收数据并对数据求和，完成规约操作 else if(myid==0){     mpf_add(pi,pi,sendbuf);              mpf_out_str(stdout,10,dprec + 10, pi);   for(i=1;i<numprocs; i++){                MPI_Recv(&node, 1, MPI_INT, MPI_ANY_SOURCE, 0,MPI_COMM_WORLD, &status) ;            printf("Node %i just called! ", node) ;        MPI_Recv(&lExponent, 1, MPI_LONG, node, 0,MPI_COMM_WORLD, &status) ;            mpf_set_d(rExponent, pow(10.0,(double)lExponent)) ;                    MPI_Recv(&length, 1, MPI_INT, node, 0,MPI_COMM_WORLD, &status) ;                       //printf("length received from node%d.\n",node) ;            word = (char *)malloc(sizeof(char)*length+10) ;                       char su[40];                        sprintf(su,"@-%d",length);//update exponent            MPI_Recv(word, length+1, MPI_CHAR, node, 0,MPI_COMM_WORLD, &status) ;                          strcat(word,su);            mpf_set_str(tmp, word, 10) ;            mpf_mul(tmp, tmp, rExponent) ;                  mpf_add(pi, pi, tmp);              mpf_out_str(stdout,10,dprec + 10, pi);                free(word) ;                       }                                    }//mpf_set(pi,recvbuf); }  main(int argc, char *argv[]) {   //pthread_t thread_context[PNUM];   struct timeval t0, t1;   int my_rank,num_procs;    char processor_name[MPI_MAX_PROCESSOR_NAME];   mpf_t  pi_tmp;    int proc_len;    /* Define mpi variables for convenience */    int len=VECLEN, myid, numprocs;   // struct pthread_arg *t; /* MPI Initialization */ MPI_Init (&argc, &argv); MPI_Comm_size (MPI_COMM_WORLD, &numprocs); MPI_Comm_rank (MPI_COMM_WORLD, &myid); MPI_Get_processor_name(processor_name,&proc_len);FILE *f; f=fopen("thread.txt","a");     gettimeofday(&t0, NULL);   dprec=DPREC;            /* decimal precision */   loopdiv = dprec / 10000;   prec= bprec(dprec+10);     mpf_set_default_prec(prec); printf("myid=%d",myid);  //thread_func(); mpf_init(pi);   mpf_set_si(pi,0);   time_t t00, t11;   int loopcounter=0;   //int i = 0;   int j;   int k;   float percent;   float fprec = (prec * 0.78) / ( LOG_TEN_TWO * pnum ) ;   time(&t00);   mpf_init(A);   mpf_init(B);   mpf_init(C);   mpf_init(O);   mpf_init(fta);   mpf_init(ftb);   mpf_init(ftc);   mpf_set_si(A,1) ;                /*  A=1  */   mpf_set_si(B,0) ;                /*  B=0  */   mpf_set_si(O,-1) ;                /*  O=-1 */    mpf_set_ui (fta, 16);              /* 16^n  (init)*/   mpf_pow_ui(A, fta, myid);     //MPI_Bcast(&dprec,1,MPI_INT,0,MPI_COMM_WORLD);   //把n广播给本通信环境中的所有进程int mycounter=0;printf("\n------------------numberprocs=%d\n",numprocs);if(myid==0) fprintf(f,"\n------------------numberprocs=%d\n",numprocs);   for ( j = myid ; j < dprec; j+=numprocs) {   /* but, j never be reached dprec */     mpf_set_ui (fta, 8);            /* 8  */     mpf_mul_ui (B, fta, j);    /* 8n -> B */          /* B is 8n */     mpf_add_ui(fta,B,1);            /*  8n + 1 -> B + 1 */     mpf_ui_div(ftb,4,fta);    /*  4/(8n+1) */      mpf_add_ui(fta,B,4);            /* 8n + 4 -> B + 4 */     mpf_ui_div(ftc,2,fta);    /* 2/(8n+4) */      mpf_sub(fta, ftb, ftc);    /* 4/(8n+1) -  2/(8n+4) */            mpf_add_ui(ftb,B,5);            /*  8n + 5 -> B + 5*/     mpf_ui_div(ftc,1,ftb);    /*  1/(8n+5) */            mpf_sub(ftb, fta, ftc);    /*  4/(8n+1) -  2/(8n+4)  - 1/(8n+5)  */      mpf_add_ui(fta,B,6);            /*  8n + 6 -> B + 6*/     mpf_ui_div(ftc,1,fta);    /*  1/(8n+6) */      mpf_sub(fta, ftb, ftc);    /*  4/(8n+1) - 2/(8n+4) - 1/(8n+5) - 1/(8n+6) */      mpf_div(ftb,fta,A);    /* M /16^n */      /*      * For Next Loop       */     mpf_set(O,C);     mpf_add(C,O,ftb);        /* O will be used as Pi(n-1) */       /*      *  Precision  check       */     if ( !mpf_cmp(O,C ) ) {              break;               /* precision is enough  */     }           for(k=0; k < numprocs ; k++) {         //16^(n + pnum)  for next loop       mpf_mul_ui(fta,A,16);                 mpf_set(A,fta);     }       loopcounter=j;mycounter++;     if ( mycounter>loopdiv) {mycounter=0;       time(&t11);       percent = (((loopcounter)/(fprec))*100.0);           if ( percent < 98.0 ) {           printf("(Thread #%d)%.0f% (%d sec)\n",myid,percent,t11-t00); //在屏幕打印实验结果        fprintf(f,"(Thread #%d)%.0f% (%d sec)\n",myid,percent,t11-t00); //记录实验结果          }     }    }      Myreduce(C);     if(myid==0){   printf("------------print final pi-------------------\n");      mpf_out_str(stdout,10,dprec + 10, pi);gmp_fprintf (f,"fixed point mpf %.*Ff with %d digits\n", dprec + 10, pi, dprec + 10);   gettimeofday(&t1, NULL);//获取执行完的时间      sprintf(printbuf,"Time: %.3f",      ((t1.tv_sec-t0.tv_sec)*1000000 + (t1.tv_usec-t0.tv_usec))/1000000.0);        }  fclose(f);MPI_Finalize();    }

实验结果:

下载地址:

源代码地址

0 0