openMP编程(上篇)之指令和锁
来源:互联网 发布:python二分法 编辑:程序博客网 时间:2024/05/22 06:47
parallel :
#include "omp.h" //openmp的头文件#include "stdio.h"#define NUM_THREADS 4 int main(){ int i ; omp_set_num_threads(NUM_THREADS) ; //设置线程的个数 #pragma omp parallel { // 被parallel所表示的这个区域叫做并行块,每个线程都会执行这个块中的所有代码 printf ("hello world! \n"); for (i=0;i<5;i++) printf("i=%d,thread = %d\n",i,omp_get_thread_num()); }}
hello world! i=0,thread = 0i=1,thread = 0i=2,thread = 0i=3,thread = 0i=4,thread = 0hello world! i=0,thread = 3i=1,thread = 3i=2,thread = 3i=3,thread = 3i=4,thread = 3hello world! i=0,thread = 1i=1,thread = 1i=2,thread = 1i=3,thread = 1i=4,thread = 1hello world! i=0,thread = 2i=1,thread = 2i=2,thread = 2i=3,thread = 2i=4,thread = 2
parallel for :
牵扯到for循环时,往往需要用到parallel for
指令。
#include "omp.h"#include "stdio.h"#define NUM_THREADS 3int main(){ int i,j,k ; omp_set_num_threads(NUM_THREADS); #pragma omp parallel for //此并行块中的for循环,把for循环体中的代码并行执行,即整个for循环被拆分为多个线程执行 //注意,parallel是连for循环一起并行 for (i = 0;i<5;i++) printf("i= %d,thread=%d\n",i,omp_get_thread_num()); for (j=0;j<4;j++) //普通循环,仅一个线程 printf("j= %d,thread=%d\n",j,omp_get_thread_num()); return 0;}
i= 0,thread=0i= 1,thread=0i= 4,thread=2i= 2,thread=1i= 3,thread=1j= 0,thread=0j= 1,thread=0j= 2,thread=0j= 3,thread=0
这种写法很有局限,就是#pragma omp parallel for
只能作用到紧跟着的for循环,也就是说,并行块中第一句话只能是for循环,不能是其他代码。因为这个写法为for循环专属。可以将上述写成如下形式:
#include "omp.h"#include "stdio.h"#define NUM_THREADS 3int main(){ int i,j,k ; omp_set_num_threads(NUM_THREADS); #pragma omp parallel { printf("HelloWorld! , thread=%d\n",omp_get_thread_num()); //每个线程都执行这条语句 #pragma omp for //这个并行块中的代码,对for循环体中的代码进行并行执行 for (i = 0;i<5;i++){ printf("i= %d,thread=%d\n",i,omp_get_thread_num()); } #pragma omp for //这个并行块中的代码,对for循环体中的代码进行并行执行 for (j=0;j<4;j++){ printf("j= %d,thread=%d\n",j,omp_get_thread_num()); } } return 0;}
HelloWorld! , thread=0i= 0,thread=0i= 1,thread=0HelloWorld! , thread=2i= 4,thread=2HelloWorld! , thread=1i= 2,thread=1i= 3,thread=1j= 0,thread=0j= 1,thread=0j= 2,thread=1j= 3,thread=1
可见,第二种写法完全能够完成对for循环的拆分并行,而且能够多次对多个for循环进行操作,更好的是,这种写法衍生了另一种功能,就是能够输出helleworld的那条输出语句,这条语句能够被所有的线程执行,如果for循环需要为每个线程赋值一个变量,那么这个变量可以放在此输出语句的位置,示例请看文章最后的例子。
barrier:
#include <stdio.h>#include "omp.h"int main (){ int i,j ; omp_set_num_threads (5); #pragma omp parallel { printf ("hello world!,thread=%d\n", omp_get_thread_num ()); #pragma omp barrier //执行到此代码时,程序暂停,直到上一条输出语句被所有线程都执行完后,才开始执行下面的语句。 #pragma omp for for ( i = 0; i < 5; i++) printf ("i= %d,thread=%d\n",i, omp_get_thread_num ()); #pragma omp barrier //执行到此代码时,程序暂停,直到上一条的for循环语句被所有线程都并行执行完后,才开始执行下面的语句。 #pragma omp for for ( j = 0; j < 5; j++) printf ("j= %d ,thread= %d\n", j,omp_get_thread_num ()); }}
hello world!,thread=4hello world!,thread=1hello world!,thread=3hello world!,thread=2hello world!,thread=0i= 4,thread=4i= 0,thread=0i= 3,thread=3i= 1,thread=1i= 2,thread=2j= 0 ,thread= 0j= 1 ,thread= 1j= 2 ,thread= 2j= 4 ,thread= 4j= 3 ,thread= 3
master / single :
看了对于for循环的并行之后,产生了一个新的问题,如果要在两个并行的for循环之间插入一个单线程执行的语句,应该如下做:
#include "omp.h"#include "stdio.h"#define NUM_THREADS 5int main(){ int i ,j ; omp_set_num_threads(NUM_THREADS) ; #pragma omp parallel for for (i=0;i<4;i++) printf ("i = %d ,thread=%d \n",i,omp_get_thread_num()); //以下输出语句位于两个for循环之间的代码,只能由一个线程来执行 printf ("I am a single thread %d \n",omp_get_thread_num()); #pragma omp parallel for for (j=0;j<4;j++) printf ("j = %d ,thread=%d \n",j,omp_get_thread_num()); return 0;}
i = 0 ,thread=0 i = 3 ,thread=3 i = 2 ,thread=2 i = 1 ,thread=1 I am a single thread 0 j = 3 ,thread=3 j = 1 ,thread=1 j = 0 ,thread=0 j = 2 ,thread=2
但是上述的程序看起来很麻烦,master和single指令就是解决这个问题的:
#include <stdio.h>#include "omp.h"#define NUM_THREADS 5int main (){ int i ,j; omp_set_num_threads (NUM_THREADS); #pragma omp parallel { #pragma omp for for (i = 0; i < 4; i++) printf ("i= %d, thread= %d\n",i, omp_get_thread_num ()); #pragma omp barrier // #pragma omp master //下面的程序由主线程执行 #pragma omp single //下面的程序由随便一个单线程执行 printf ("I am a single thread ! thread= %d\n", omp_get_thread_num ()); #pragma omp barrier #pragma omp for for (j = 0; j < 5; j++) printf ("j= %d, thread= %d\n",j, omp_get_thread_num ()); }}
i= 2, thread= 2i= 0, thread= 0i= 1, thread= 1i= 3, thread= 3I am a single thread ! thread= 2j= 2, thread= 2j= 0, thread= 0j= 3, thread= 3j= 1, thread= 1j= 4, thread= 4
效果是一样的,master 是指定用主线程0,而single是随机的一个单线程执行
parallel sections:
#include <stdio.h>#include "omp.h"#define NUM_THREADS 10int main () { omp_set_num_threads (NUM_THREADS); #pragma omp parallel sections { #pragma omp section //并行执行 printf ("thread %d section A!\n", omp_get_thread_num ()); #pragma omp section //并行执行 printf ("thread %d section B!\n", omp_get_thread_num ()); #pragma omp section //并行执行 printf ("thread %d section C!\n", omp_get_thread_num ()); #pragma omp section //并行执行 printf ("thread %d section D!\n", omp_get_thread_num ()); #pragma omp section //并行执行 printf ("thread %d section E!\n", omp_get_thread_num ()); }}
thread 4 section A!thread 4 section E!thread 8 section D!thread 3 section C!thread 0 section B!
同parallel for
相似,可以写成如下形式:
#include <stdio.h>#include "omp.h"#define NUM_THREADS 3int main () { omp_set_num_threads (NUM_THREADS); #pragma omp parallel { #pragma omp sections { #pragma omp section printf ("thread %d section A!\n", omp_get_thread_num ()); #pragma omp section printf ("thread %d section B!\n", omp_get_thread_num ()); } #pragma omp sections { #pragma omp section printf ("thread %d section C!\n", omp_get_thread_num ()); #pragma omp section printf ("thread %d section D!\n", omp_get_thread_num ()); #pragma omp section printf ("thread %d section E!\n", omp_get_thread_num ()); } }}
ordered:
#include <stdio.h>#include <omp.h>main (){ int i ; omp_set_num_threads(5) ; #pragma omp parallel for ordered for ( i = 1; i <= 5; i++) { #pragma omp ordered //指定以下的循环体按照顺序执行 printf ("i=%d,thread=%d\n", i,omp_get_thread_num()); }}
i=1,thread=0i=2,thread=1i=3,thread=2i=4,thread=3i=5,thread=4
critical:
这个指令可以有枷锁的效果,所指定的代码表示只允许一个线程进行操作
/* *加和程序,从1一直加到100的和 * * */#include <stdio.h>#include "omp.h"int main(){ int sum=0; #pragma omp parallel { int i=0; int id=omp_get_thread_num(); //获得当前并行区域中活动线程个数 int nthread=omp_get_num_threads(); //返回当前的线程号 for(i=id+1;i<=100;i+=nthread) #pragma omp critical //对sum进行互斥的操作,同一时间,只允许一个线程对sum变量进行操作 sum=sum+i; } printf("sum=%d\n",sum);}
sum=5050
使用锁
另一个互斥访问资源的方法就是使用锁
#include <stdio.h>#include <omp.h>int main(){ int sum=0; int i ; omp_lock_t lck ; //定义一把锁 omp_init_lock(&lck); //初始化一把锁 #pragma omp parallel for for( i=1;i<=100;i++) { omp_set_lock(&lck); //给下面的sum上锁,同一时间只有一个线程能对sum变量操作 sum=sum+i; omp_unset_lock(&lck); // 解锁 } printf("sum=%d\n",sum); omp_destroy_lock(&lck); //关闭这把锁}
sum=5050
上述代码中,只定义的了一把锁,如果要定义多把锁,并使用多把锁,看下面的代码:
/* *随机产生0~9之间1000个数,统计0~9的个数。 *histogram[]存放统计的个数 * * */#include <stdio.h>#include <stdlib.h>#include "omp.h"int main (){ int array[1000]; omp_lock_t locks[10]; //定义10把锁 int histogram[10]; omp_set_num_threads (5); srandom (10); int i ; #pragma omp parallel for // 多线程随机产生1000个数放在array数组中 for ( i = 0; i < 1000; i++) array[i] = random () % 10; #pragma omp parallel for // 多线程初始化10把锁和初始化histogram数组 for ( i = 0; i < 10; i++) { omp_init_lock (&locks[i]); histogram[i] = 0; } #pragma omp parallel for // 统计出现0~9的个数 for ( i = 0; i < 1000; i++) { omp_set_lock(&locks[array[i]]); //上锁 histogram[array[i]] += 1 ; omp_unset_lock(&locks[array[i]]); //解锁 } for ( i = 0; i < 10; i++) printf ("histogram[%d]=%d\n", i, histogram[i]); //普通方式(单线程)关闭10把锁 for ( i = 0; i < 10; i++) omp_destroy_lock (&locks[i]); }
histogram[0]=97histogram[1]=109histogram[2]=95histogram[3]=108histogram[4]=89histogram[5]=103histogram[6]=85histogram[7]=111histogram[8]=110histogram[9]=93
/* * 普通方式求Pi,不利用多线程技术*/#include <stdio.h>static long num_steps = 100000;//分成1000份 void main(){ int i; double x, pi, sum = 0.0; double step = 1.0/(double)num_steps; for(i=1;i<= num_steps;i++){ x = (i-0.5)*step; sum=sum+4.0/(1.0+x*x); } pi=step*sum; printf("%lf\n",pi);}~
3.141593
/* *利用 parallel for 进行多线程求解 * */#include <stdio.h>#include <omp.h>static long num_steps = 100000; double step;#define NUM_THREADS 2void main (){ int i; double x, pi, sum[NUM_THREADS]; double step = 1.0/(double) num_steps; omp_set_num_threads(NUM_THREADS); //设置2线程 #pragma omp parallel { double x; int id; id = omp_get_thread_num(); sum[id]=0; #pragma omp for for (i=0;i< num_steps; i++){ x = (i+0.5)*step; sum[id] += 4.0/(1.0+x*x); } } for(i=0, pi=0.0;i<NUM_THREADS;i++) pi += sum[i] * step; printf("%lf\n",pi);}
0 0
- openMP编程(上篇)之指令和锁
- OpenMP Tutorial学习笔记(3)OpenMP指令之指令格式和指令范围
- OpenMP指令之THREADPRIVATE指令
- OpenMP Tutorial学习笔记(9)OpenMP指令之THREADPRIVATE指令
- 并行编程之OpenMP
- OpenMP: OpenMP和MPI是并行编程
- 多核并行编程之OpenMP
- OpenMP: OpenMP编程指南
- OpenMP: OpenMP编程指南
- OpenMP 之for指令并行求和(学习笔记)
- OpenMP Tutorial学习笔记(4)OpenMP指令之同步构造(Parallel)
- OpenMP Tutorial学习笔记(5)OpenMP指令之共享工作构造(Work-Sharing)
- OpenMP Tutorial学习笔记(6)OpenMP指令之组合共享工作构造(Combined Work-Sharing)
- OpenMP Tutorial学习笔记(7)OpenMP指令之任务构造(Task Constructs)
- OpenMP Tutorial学习笔记(8)OpenMP指令之同步构造(Synchronization Constructs)
- OpenMP Tutorial学习笔记(10)OpenMP指令之数据范围属性的子句
- OpenMp编程
- OpenMP和MPI是并行编程
- Centos 7安装Gnome图形界面
- 数据的导出
- IMWeb训练营作业
- 十分钟学会Fiddler,学不会算我输
- 洛谷 P1550 [USACO08OCT]打井Watering Hole
- openMP编程(上篇)之指令和锁
- 【IMWeb训练营作业】---Vue2.0 学习(一)
- DHT12温湿度传感器STM32驱动IIC
- C++的new
- bzoj3771 Triple
- Android 官方数据绑定框架 Data Binding 简单使用
- 使用Calendar创建可视化日历
- 20170419 关于绘制余弦曲线和直线的一些问题
- js语句出现不能建立Date()