MPI(Message Passing Interface)是目前最重要的一个基于消息传递的并行编程工具,它具有移植性好、功能强大、效率高等许多优点,而且有多种不同的免费、高效、实用的实现版本,几乎所有的并行计算机厂商都提供对它的支持,成为了事实上的并行编程标准。

MPI是一个库,而不是一门语言,因此对MPI的使用必须和特定的语言结合起来进行。MPI不是一个独立的自包含系统,而是建立在本地并行程序设计环境之上,其进程管理和I/O均由本地并行程序设计环境提供。例如,MPI可以建立在IBM SP2的POE/MPL之上,也可以建立在Intel Paragon的OSF/NX。除了这些商业版本的MPI实现,还有一些免费版的MPI实现,主要有MPICH,LAM和CHIMP。


sudo apt-get install mpich
编写C语言或C++代码时,在头文件中包含include "mpi.h" 便可以在代码中使用mpi的并行语句了。
C语言编译mpi程序:mpicc example.c -o example
C++编译mpi程序:mpic++ example.c -o example
运行mpi程序:mpiexec -n 4 ./example (4为指定运行的进程数)



    int n = 100, x,sum;    int h = 1.0/n;    for(int i=1; i<=n; i++)    {        x= (i - 0.5)/n;        sun += 4.0/(1+x*x);    }    pi = sum*h;    printf("pi = %d\n",pi);


//*计算π的C语言 MPI编程代码段*// #include "mpi.h"#include <stdio.h>#include <math.h>double f(double);double f(double a){    return (4.0/(1.0 + a*a));} int main(int argc, char *argv[]){    int done = 0, n, myid, numprocs, i;    double PI25DT = 3.141592653589793238462643;    double mypi, pi, h, sum, x;    double startwtime = 0.0, endwtime;    int namelen;    char processor_name[MPI_MAX_PROCESSOR_NAME];    MPI_Init(&argc, &argv);  //mpi的初始化    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);  //获取线程数    MPI_Comm_rank(MPI_COMM_WORLD, &myid);  //获取线程id值    MPI_Get_processor_name(processor_name, &namelen);  //获取处理器名称    fprintf(stderr, "Process %d on %s\n", myid, processor_name);    n = 0;    while(!done)    {        if(myid == 0)        {    /*      printf("Enter the number of intervals: (0 quits)");            scanf("%d",&n); */            if(n == 0)                n = 100;            else                n = 0;            startwtime = MPI_Wtime();        }        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);  //进行广播传送消息        if(n == 0)            done = 1;        else        {            h = 1.0/(double)n;            sum = 0.0;            for(i=myid+1; i<=n; i+=numprocs)  //各线程计算自己的面积            {                x = h * ((double)i - 0.5);                sum += f(x);            }            mypi = h * sum;            MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);   //归约,mypi为发送方,pi为接收方            if(myid == 0)            {                printf("pi is approximately %.16f,Error is %.16f\n",pi, fabs(pi-PI25DT));                endwtime = MPI_Wtime();                printf("wall clock time = %f\n", endwtime-startwtime);            }        }    }    MPI_Finalize();   //mpi结束    return 0;}






#include <stdlib.h>#include <stdio.h>#include <limits.h>#include <assert.h>#include <sys/time.h>#include <unistd.h>#include "mpi.h"int i,j,k;int N = 36;int cmp(const void * a, const void * b) {  if (*(int*)a < *(int*)b) return -1;  if (*(int*)a > *(int*)b) return 1;  else return 0;}void phase1(int *array, int N, int startIndex, int subArraySize, int *pivots, int p) {  // 对子数组进行局部排序  qsort(array + startIndex, subArraySize, sizeof(array[0]), cmp);  // 正则采样  for (i = 0; i < p; i++) {    pivots[i] = array[startIndex + (i * (N / (p * p)))];      }  return;}void phase2(int *array, int startIndex, int subArraySize, int *pivots, int *partitionSizes, int p, int myId) {  int *collectedPivots = (int *) malloc(p * p * sizeof(pivots[0]));  int *phase2Pivots = (int *) malloc((p - 1) * sizeof(pivots[0]));          //主元  int index = 0;  //收集消息,根进程在它的接受缓冲区中包含所有进程的发送缓冲区的连接。  MPI_Gather(pivots, p, MPI_INT, collectedPivots, p, MPI_INT, 0, MPI_COMM_WORLD);         if (myId == 0) {    qsort(collectedPivots, p * p, sizeof(pivots[0]), cmp);          //对正则采样的样本进行排序    // 采样排序后进行主元的选择    for (i = 0; i < (p -1); i++) {      phase2Pivots[i] = collectedPivots[(((i+1) * p) + (p / 2)) - 1];    }  }  //发送广播  MPI_Bcast(phase2Pivots, p - 1, MPI_INT, 0, MPI_COMM_WORLD);  // 进行主元划分,并计算划分部分的大小  for ( i = 0; i < subArraySize; i++) {    if (array[startIndex + i] > phase2Pivots[index]) {      //如果当前位置的数字大小超过主元位置,则进行下一个划分      index += 1;    }    if (index == p) {      //最后一次划分,子数组总长减掉当前位置即可得到最后一个子数组划分的大小      partitionSizes[p - 1] = subArraySize - i + 1;      break;    }    partitionSizes[index]++ ;   //划分大小自增  }  free(collectedPivots);  free(phase2Pivots);  return;}void phase3(int *array, int startIndex, int *partitionSizes, int **newPartitions, int *newPartitionSizes, int p) {  int totalSize = 0;  int *sendDisp = (int *) malloc(p * sizeof(int));  int *recvDisp = (int *) malloc(p * sizeof(int));  // 全局到全局的发送,每个进程可以向每个接收者发送数目不同的数据.  MPI_Alltoall(partitionSizes, 1, MPI_INT, newPartitionSizes, 1, MPI_INT, MPI_COMM_WORLD);  // 计算划分的总大小,并给新划分分配空间  for ( i = 0; i < p; i++) {    totalSize += newPartitionSizes[i];  }  *newPartitions = (int *) malloc(totalSize * sizeof(int));  // 在发送划分之前计算相对于sendbuf的位移,此位移处存放着输出到进程的数据  sendDisp[0] = 0;  recvDisp[0] = 0;      //计算相对于recvbuf的位移,此位移处存放着从进程接受到的数据  for ( i = 1; i < p; i++) {    sendDisp[i] = partitionSizes[i - 1] + sendDisp[i - 1];    recvDisp[i] = newPartitionSizes[i - 1] + recvDisp[i - 1];  }  //发送数据,实现n次点对点通信  MPI_Alltoallv(&(array[startIndex]), partitionSizes, sendDisp, MPI_INT, *newPartitions, newPartitionSizes, recvDisp, MPI_INT, MPI_COMM_WORLD);  free(sendDisp);  free(recvDisp);  return;}void phase4(int *partitions, int *partitionSizes, int p, int myId, int *array) {  int *sortedSubList;  int *recvDisp, *indexes, *partitionEnds, *subListSizes, totalListSize;  indexes = (int *) malloc(p * sizeof(int));  partitionEnds = (int *) malloc(p * sizeof(int));  indexes[0] = 0;  totalListSize = partitionSizes[0];  for ( i = 1; i < p; i++) {    totalListSize += partitionSizes[i];    indexes[i] = indexes[i-1] + partitionSizes[i-1];    partitionEnds[i-1] = indexes[i];  }  partitionEnds[p - 1] = totalListSize;  sortedSubList = (int *) malloc(totalListSize * sizeof(int));  subListSizes = (int *) malloc(p * sizeof(int));  recvDisp = (int *) malloc(p * sizeof(int));  // 归并排序  for ( i = 0; i < totalListSize; i++) {    int lowest = INT_MAX;    int ind = -1;    for (j = 0; j < p; j++) {      if ((indexes[j] < partitionEnds[j]) && (partitions[indexes[j]] < lowest)) {    lowest = partitions[indexes[j]];    ind = j;      }    }    sortedSubList[i] = lowest;    indexes[ind] += 1;  }  // 发送各子列表的大小回根进程中  MPI_Gather(&totalListSize, 1, MPI_INT, subListSizes, 1, MPI_INT, 0, MPI_COMM_WORLD);  // 计算根进程上的相对于recvbuf的偏移量  if (myId == 0) {    recvDisp[0] = 0;    for ( i = 1; i < p; i++) {      recvDisp[i] = subListSizes[i - 1] + recvDisp[i - 1];    }  }  //发送各排好序的子列表回根进程中  MPI_Gatherv(sortedSubList, totalListSize, MPI_INT, array, subListSizes, recvDisp, MPI_INT, 0, MPI_COMM_WORLD);  free(partitionEnds);  free(sortedSubList);  free(indexes);  free(subListSizes);  free(recvDisp);  return;}//PSRS排序函数,调用了4个过程函数void psrs_mpi(int *array, int N)    {    int p, myId, *partitionSizes, *newPartitionSizes, nameLength;    int subArraySize, startIndex, endIndex, *pivots, *newPartitions;    char processorName[MPI_MAX_PROCESSOR_NAME];    MPI_Comm_size(MPI_COMM_WORLD,&p);    MPI_Comm_rank(MPI_COMM_WORLD,&myId);    MPI_Get_processor_name(processorName,&nameLength);    printf("Process %d is on %s\n",myId, processorName);    pivots = (int *) malloc(p*sizeof(int));    partitionSizes = (int *) malloc(p*sizeof(int));    newPartitionSizes = (int *) malloc(p*sizeof(int));    for ( k = 0; k < p; k++) {      partitionSizes[k] = 0;    }    // 获取起始位置和子数组大小    startIndex = myId * N / p;    if (p == (myId + 1)) {      endIndex = N;    }     else {      endIndex = (myId + 1) * N / p;    }    subArraySize = endIndex - startIndex;    MPI_Barrier(MPI_COMM_WORLD);    //调用各阶段函数    phase1(array, N, startIndex, subArraySize, pivots, p);    if (p > 1) {      phase2(array, startIndex, subArraySize, pivots, partitionSizes, p, myId);      phase3(array, startIndex, partitionSizes, &newPartitions, newPartitionSizes, p);      phase4(newPartitions, newPartitionSizes, p, myId, array);    }    if (myId == 0)      for(k = 0; k < N; k++){        printf("%d ",array[k]);     }     printf("\n");    if (p > 1) {      free(newPartitions);    }    free(partitionSizes);    free(newPartitionSizes);    free(pivots);  free(array);  MPI_Finalize();}int main(int argc, char *argv[]) {  int *array;  array = (int *) malloc(N*sizeof(int));    srand(100);    for ( k = 0; k < N; k++) {      array[k] = rand()%100;    }    MPI_Init(&argc,&argv);      //MPI初始化    psrs_mpi(array,N);          //调用PSRS算法进行并行排序  return 0;}
