openMP编程(上篇)之指令和锁

来源：互联网发布：python二分法编辑：程序博客网时间：2024/05/22 06:47

parallel :

#include "omp.h"    //openmp的头文件#include "stdio.h"#define NUM_THREADS 4   int main(){   int i ;   omp_set_num_threads(NUM_THREADS) ;  //设置线程的个数   #pragma omp parallel      {     //  被parallel所表示的这个区域叫做并行块，每个线程都会执行这个块中的所有代码     printf ("hello world! \n");     for (i=0;i<5;i++)       printf("i=%d,thread = %d\n",i,omp_get_thread_num());   }}

hello world! i=0,thread = 0i=1,thread = 0i=2,thread = 0i=3,thread = 0i=4,thread = 0hello world! i=0,thread = 3i=1,thread = 3i=2,thread = 3i=3,thread = 3i=4,thread = 3hello world! i=0,thread = 1i=1,thread = 1i=2,thread = 1i=3,thread = 1i=4,thread = 1hello world! i=0,thread = 2i=1,thread = 2i=2,thread = 2i=3,thread = 2i=4,thread = 2

parallel for :

牵扯到for循环时，往往需要用到parallel for指令。

#include "omp.h"#include "stdio.h"#define NUM_THREADS 3int main(){   int i,j,k ;   omp_set_num_threads(NUM_THREADS);  #pragma omp parallel for       //此并行块中的for循环，把for循环体中的代码并行执行，即整个for循环被拆分为多个线程执行       //注意，parallel是连for循环一起并行        for (i = 0;i<5;i++)             printf("i= %d,thread=%d\n",i,omp_get_thread_num());   for (j=0;j<4;j++)  //普通循环，仅一个线程      printf("j= %d,thread=%d\n",j,omp_get_thread_num());       return 0;}

i= 0,thread=0i= 1,thread=0i= 4,thread=2i= 2,thread=1i= 3,thread=1j= 0,thread=0j= 1,thread=0j= 2,thread=0j= 3,thread=0

这种写法很有局限，就是#pragma omp parallel for 只能作用到紧跟着的for循环，也就是说，并行块中第一句话只能是for循环，不能是其他代码。因为这个写法为for循环专属。可以将上述写成如下形式：

#include "omp.h"#include "stdio.h"#define NUM_THREADS 3int main(){   int i,j,k ;   omp_set_num_threads(NUM_THREADS);  #pragma omp parallel   {     printf("HelloWorld! , thread=%d\n",omp_get_thread_num());  //每个线程都执行这条语句     #pragma omp for           //这个并行块中的代码，对for循环体中的代码进行并行执行     for (i = 0;i<5;i++){        printf("i= %d,thread=%d\n",i,omp_get_thread_num());     }     #pragma omp for        //这个并行块中的代码，对for循环体中的代码进行并行执行     for (j=0;j<4;j++){        printf("j= %d,thread=%d\n",j,omp_get_thread_num());     }  }   return 0;}

HelloWorld! , thread=0i= 0,thread=0i= 1,thread=0HelloWorld! , thread=2i= 4,thread=2HelloWorld! , thread=1i= 2,thread=1i= 3,thread=1j= 0,thread=0j= 1,thread=0j= 2,thread=1j= 3,thread=1

可见，第二种写法完全能够完成对for循环的拆分并行，而且能够多次对多个for循环进行操作，更好的是，这种写法衍生了另一种功能，就是能够输出helleworld的那条输出语句，这条语句能够被所有的线程执行，如果for循环需要为每个线程赋值一个变量，那么这个变量可以放在此输出语句的位置，示例请看文章最后的例子。

barrier:

#include <stdio.h>#include "omp.h"int main (){  int i,j ;  omp_set_num_threads (5);  #pragma omp parallel  {     printf ("hello world!,thread=%d\n", omp_get_thread_num ());     #pragma omp barrier   //执行到此代码时，程序暂停，直到上一条输出语句被所有线程都执行完后，才开始执行下面的语句。     #pragma omp for          for ( i = 0; i < 5; i++)             printf ("i= %d,thread=%d\n",i, omp_get_thread_num ());     #pragma omp barrier   //执行到此代码时，程序暂停，直到上一条的for循环语句被所有线程都并行执行完后，才开始执行下面的语句。     #pragma omp for          for ( j = 0; j < 5; j++)             printf ("j= %d ,thread= %d\n", j,omp_get_thread_num ());  }}

hello world!,thread=4hello world!,thread=1hello world!,thread=3hello world!,thread=2hello world!,thread=0i= 4,thread=4i= 0,thread=0i= 3,thread=3i= 1,thread=1i= 2,thread=2j= 0 ,thread= 0j= 1 ,thread= 1j= 2 ,thread= 2j= 4 ,thread= 4j= 3 ,thread= 3

master / single :

看了对于for循环的并行之后，产生了一个新的问题，如果要在两个并行的for循环之间插入一个单线程执行的语句，应该如下做：

#include "omp.h"#include "stdio.h"#define NUM_THREADS 5int main(){   int i ,j ;   omp_set_num_threads(NUM_THREADS) ;   #pragma omp parallel for        for (i=0;i<4;i++)          printf ("i = %d ,thread=%d \n",i,omp_get_thread_num());  //以下输出语句位于两个for循环之间的代码，只能由一个线程来执行   printf ("I am a single thread %d \n",omp_get_thread_num());   #pragma omp parallel for        for (j=0;j<4;j++)          printf ("j = %d ,thread=%d \n",j,omp_get_thread_num());   return 0;}

i = 0 ,thread=0 i = 3 ,thread=3 i = 2 ,thread=2 i = 1 ,thread=1 I am a single thread 0 j = 3 ,thread=3 j = 1 ,thread=1 j = 0 ,thread=0 j = 2 ,thread=2

但是上述的程序看起来很麻烦，master和single指令就是解决这个问题的：

#include <stdio.h>#include "omp.h"#define NUM_THREADS 5int main (){  int i ,j;  omp_set_num_threads (NUM_THREADS);  #pragma omp parallel  {    #pragma omp for       for (i = 0; i < 4; i++)          printf ("i= %d, thread= %d\n",i, omp_get_thread_num ());    #pragma omp barrier   // #pragma omp master  //下面的程序由主线程执行   #pragma omp single     //下面的程序由随便一个单线程执行         printf ("I am a single thread ! thread= %d\n", omp_get_thread_num ());    #pragma omp barrier    #pragma omp for       for (j = 0; j < 5; j++)          printf ("j= %d, thread= %d\n",j, omp_get_thread_num ());  }}

i= 2, thread= 2i= 0, thread= 0i= 1, thread= 1i= 3, thread= 3I am a single thread ! thread= 2j= 2, thread= 2j= 0, thread= 0j= 3, thread= 3j= 1, thread= 1j= 4, thread= 4

效果是一样的，master 是指定用主线程0，而single是随机的一个单线程执行

parallel sections:

#include <stdio.h>#include "omp.h"#define  NUM_THREADS 10int main () {  omp_set_num_threads (NUM_THREADS);  #pragma omp parallel sections  {      #pragma omp section    //并行执行        printf ("thread %d section A!\n", omp_get_thread_num ());      #pragma omp section   //并行执行        printf ("thread %d section B!\n", omp_get_thread_num ());      #pragma omp section   //并行执行        printf ("thread %d section C!\n", omp_get_thread_num ());      #pragma omp section   //并行执行        printf ("thread %d section D!\n", omp_get_thread_num ());      #pragma omp section   //并行执行        printf ("thread %d section E!\n", omp_get_thread_num ());  }}

thread 4 section A!thread 4 section E!thread 8 section D!thread 3 section C!thread 0 section B!

同parallel for 相似，可以写成如下形式：

#include <stdio.h>#include "omp.h"#define  NUM_THREADS 3int main () {  omp_set_num_threads (NUM_THREADS);  #pragma omp parallel  {    #pragma omp sections    {      #pragma omp section         printf ("thread %d section A!\n", omp_get_thread_num ());      #pragma omp section         printf ("thread %d section B!\n", omp_get_thread_num ());    }    #pragma omp sections    {       #pragma omp section         printf ("thread %d section C!\n", omp_get_thread_num ());      #pragma omp section         printf ("thread %d section D!\n", omp_get_thread_num ());      #pragma omp section         printf ("thread %d section E!\n", omp_get_thread_num ());    }  }}

ordered:

#include <stdio.h>#include <omp.h>main (){  int i ;   omp_set_num_threads(5) ;  #pragma omp parallel for ordered  for ( i = 1; i <= 5; i++)    {         #pragma omp ordered //指定以下的循环体按照顺序执行        printf ("i=%d,thread=%d\n", i,omp_get_thread_num());    }}

i=1,thread=0i=2,thread=1i=3,thread=2i=4,thread=3i=5,thread=4

critical:

这个指令可以有枷锁的效果，所指定的代码表示只允许一个线程进行操作

/* *加和程序,从1一直加到100的和 * * */#include <stdio.h>#include "omp.h"int main(){  int sum=0;  #pragma omp parallel  {    int i=0;    int id=omp_get_thread_num();  //获得当前并行区域中活动线程个数    int nthread=omp_get_num_threads();  //返回当前的线程号    for(i=id+1;i<=100;i+=nthread)      #pragma omp critical  //对sum进行互斥的操作，同一时间，只允许一个线程对sum变量进行操作         sum=sum+i;          }       printf("sum=%d\n",sum);}

sum=5050

使用锁

另一个互斥访问资源的方法就是使用锁

#include <stdio.h>#include <omp.h>int main(){  int sum=0;  int i ;  omp_lock_t lck ; //定义一把锁  omp_init_lock(&lck); //初始化一把锁  #pragma omp parallel for    for( i=1;i<=100;i++)    {      omp_set_lock(&lck);  //给下面的sum上锁,同一时间只有一个线程能对sum变量操作      sum=sum+i;      omp_unset_lock(&lck);  // 解锁    }  printf("sum=%d\n",sum);  omp_destroy_lock(&lck);  //关闭这把锁}

sum=5050

上述代码中，只定义的了一把锁，如果要定义多把锁，并使用多把锁，看下面的代码：

/* *随机产生0~9之间1000个数，统计0~9的个数。 *histogram[]存放统计的个数 * * */#include <stdio.h>#include <stdlib.h>#include "omp.h"int  main (){  int array[1000];  omp_lock_t locks[10]; //定义10把锁  int histogram[10];  omp_set_num_threads (5);  srandom (10);  int i ;  #pragma omp parallel for   // 多线程随机产生1000个数放在array数组中    for ( i = 0; i < 1000; i++)      array[i] = random () % 10;  #pragma omp parallel for    // 多线程初始化10把锁和初始化histogram数组     for ( i = 0; i < 10; i++)     {        omp_init_lock (&locks[i]);        histogram[i] = 0;     }  #pragma omp parallel for   // 统计出现0~9的个数     for ( i = 0; i < 1000; i++)     {        omp_set_lock(&locks[array[i]]);  //上锁        histogram[array[i]] += 1 ;        omp_unset_lock(&locks[array[i]]); //解锁     }     for ( i = 0; i < 10; i++)         printf ("histogram[%d]=%d\n", i, histogram[i]);     //普通方式（单线程）关闭10把锁     for ( i = 0; i < 10; i++)        omp_destroy_lock (&locks[i]); }

histogram[0]=97histogram[1]=109histogram[2]=95histogram[3]=108histogram[4]=89histogram[5]=103histogram[6]=85histogram[7]=111histogram[8]=110histogram[9]=93

/* * 普通方式求Pi,不利用多线程技术*/#include <stdio.h>static long num_steps = 100000;//分成1000份 void main(){   int i;   double x, pi, sum = 0.0;   double  step = 1.0/(double)num_steps;   for(i=1;i<= num_steps;i++){       x = (i-0.5)*step;       sum=sum+4.0/(1.0+x*x);       }   pi=step*sum;   printf("%lf\n",pi);}~

3.141593

/* *利用 parallel for 进行多线程求解 * */#include <stdio.h>#include <omp.h>static long num_steps = 100000; double step;#define NUM_THREADS 2void main (){     int i;     double x, pi, sum[NUM_THREADS];    double  step = 1.0/(double) num_steps;    omp_set_num_threads(NUM_THREADS); //设置2线程     #pragma omp parallel     {         double x;         int id;         id = omp_get_thread_num();        sum[id]=0;         #pragma omp for         for (i=0;i< num_steps; i++){             x = (i+0.5)*step;            sum[id] += 4.0/(1.0+x*x);        }   }    for(i=0, pi=0.0;i<NUM_THREADS;i++)         pi += sum[i] * step; printf("%lf\n",pi);}

0 0