Windows下在GSL(GNU Scientific Library)中使用OpenBLAS库

来源:互联网 发布:淘宝 低版本接口不存在 编辑:程序博客网 时间:2024/05/02 00:21
GNU Scientific Library是一个用于科学计算的开源库,拥有丰富的数学及矩阵计算函数,但是其自带的矩阵计算库BLAS效率明显没有ATLAS、OpenBLAS等优化后的计算库高,本文主要介绍如何在GSL中链接OpenBLAS库。

1. 下载并编译OpenBLAS库

OpenBLAS库最早是老外开发的GotoBLAS库,使用了大量的内联汇编来匹配不同计算平台的硬件差异,在Goto放弃更新GotoBLAS库后,中科院软件所的一帮人负责起了更新,并改名OpenBLAS,网址:https://github.com/xianyi/OpenBLAS
编译OpenBLAS非常简单,只需要安装好mingw32或者mingw64,配合msys搭建好类unix的编译环境,然后cd到OpenBLAS的源码根目录下:

make BINARY=64 USE_OPENMP=1 USE_THREAD=1

成功后:

make install PREFIX="path to install"

注意在目前最新的0.28版中,USE_OPENMP=1在WIN64平台上貌似有错误,lapack_tesing无法通过

cd lapack-netlibmake lapack_testing

2. 下载并编译GSL库

a. 下载GSL源码

地址:http://www.gnu.org/software/gsl/


b. 下载VS2012版GSL工程

地址:http://brgladman.org/oldsite/computing/gnu_scientific_library.php

解压GSL源码文件,并将VS2012版工程文件解压到GSL源码的根目录下

本文中使用的是VS2010,只需把VS2012工程中的

<PlatformToolset>v110</PlatformToolset>
全部替换为

<PlatformToolset>v100</PlatformToolset>
即可。


c. 用OpenBLAS库编译GSL库

打开build.vc11目录下的gsl.dll.sln,首先编译gslhdrs,这个是VS2012版工程作者写的给GSL添加导出函数的软件包。

然后将步骤1中编译好的libopenblas.lib和libopenblas.dll放入dll/x64/release目录中,并将gsl库的链接依赖从cblas.lib改为libopenblas.lib

编译后得到的gsl.lib和gsl.dll即为所需的GSL库


3. 测试

a. 测试代码1

#include <stdio.h>  #include <gsl/gsl_cblas.h>#pragma comment (lib, "libopenblas.lib")//#pragma comment (lib, "gsl.lib")     typedef struct { float real; float imag; } CBLAS_TEST_COMPLEX;        int main()  {      /*CBLAS_TEST_COMPLEX a[10*20], b[20*30], c[10*30];     CBLAS_TEST_COMPLEX alpha = {1.0, 0.0};     CBLAS_TEST_COMPLEX beta = {0.0, 0.0};     cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 10, 30,                 20, &alpha, a, 20, b, 30, &beta, c, 30);*/                                                        int i,j,k;                 CBLAS_TEST_COMPLEX a[2*2], b[2*3], c[2*3];                   for(i=0;i<4;i++)      { a[i].real=1.0;          a[i].imag=2.0;      }                   for(i=0;i<6;i++)      { b[i].real=2.0;          b[i].imag=3.0;      }      //初始化为0      for(i=0;i<6;i++)      { c[i].real=0.0;          c[i].imag=0.0;      }                                CBLAS_TEST_COMPLEX alpha = {1.0, 0.0};      CBLAS_TEST_COMPLEX beta = {0.0, 0.0};      cblas_cgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,2,3,2, &alpha,a,2,b,3,&beta,c,3);                                printf("A[]=\n");                for(i=0;i<4;i++)                    printf("%.2f+(%.2fi)\n",a[0].real,a[0].imag);      printf("B[]=\n");       for(i=0;i<6;i++)            printf("%.2f +(%.2fi)\n",b[0].real,b[0].imag);      printf("C=A*B\n");      for(i=0;i<6;i++)                                printf("%.2f +(%.2fi)\n",c[0].real,c[0].imag);                                  }  

结果:


b. 测试代码2

#include <stdio.h>#include <gsl/gsl_cblas.h>#pragma comment (lib, "libopenblas.lib")//#pragma comment (lib, "gsl.lib")intmain (){int lda = 3;float A[] = { 0.11, 0.12, 0.13,0.21, 0.22, 0.23 };int ldb = 2;float B[] = { 1011, 1012,1021, 1022,1031, 1032 };int ldc = 2;float C[] = { 0.00, 0.00,0.00, 0.00 };/* Compute C = A B */cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 2, 2, 3, 1.0, A, lda, B, ldb, 0.0, C, ldc);printf("[ %g, %g\n", C[0], C[1]);printf("  %g, %g ]\n", C[2], C[3]);return 0;  }

结果:


c. 测试代码3

#include <iostream>#include <gsl/gsl_sf.h>//#pragma comment (lib, "libopenblas.lib")#pragma comment (lib, "gsl.lib")int main(){ std::cout << gsl_sf_gamma_inc( 1.5, 0.5 ) << std::endl; std::cout << gsl_sf_gamma_inc_Q( 1.5, 0.5 ) << std::endl; std::cout << gsl_sf_gamma_inc_P( 1.5, 0.5 ) << std::endl;}

结果:


d. 测试代码4

/* C source code is found in dgemm_example.c */#define min(x,y) (((x) < (y)) ? (x) : (y))  #include <stdio.h>  #include <stdlib.h>  #include "cblas.h"  #pragma comment (lib, "libopenblas.dll.a")  int main(){double *A, *B, *C;int m, n, k, i, j;double alpha, beta;int thread = openblas_get_parallel();penblas_set_num_threads(thread);printf("Threads = %d\n", thread);char *cpuname = new char[50];cpuname = openblas_get_corename();printf("CPU name = %s\n", cpuname);printf("\n This example computes real matrix C=alpha*A*B+beta*C using \n"" Intel MKL function dgemm, where A, B, and  C are matrices and \n"" alpha and beta are double precision scalars\n\n");m = 2000, k = 200, n = 1000;printf(" Initializing data for matrix multiplication C=A*B for matrix \n"" A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n);alpha = 1.0; beta = 0.0;printf(" Allocating memory for matrices aligned on 64-byte boundary for better \n"" performance \n\n");A = new double[m*k]; //(double *)mkl_malloc( m*k*sizeof( double ), 64 );  B = new double[k*n]; //(double *)mkl_malloc( k*n*sizeof( double ), 64 );  C = new double[m*n]; //(double *)mkl_malloc( m*n*sizeof( double ), 64 );  if (A == NULL || B == NULL || C == NULL) {printf("\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");delete A; //mkl_free(A);  delete B; //mkl_free(B);  delete C; //mkl_free(C);  return 1;}printf(" Intializing matrix data \n\n");for (i = 0; i < (m*k); i++) {A[i] = (double)(i + 1);}for (i = 0; i < (k*n); i++) {B[i] = (double)(-i - 1);}for (i = 0; i < (m*n); i++) {C[i] = 0.0;}printf(" Computing matrix product using Intel MKL dgemm function via CBLAS interface \n\n");cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,m, n, k, alpha, A, k, B, n, beta, C, n);printf("\n Computations completed.\n\n");printf(" Top left corner of matrix A: \n");for (i = 0; i < min(m, 6); i++) {for (j = 0; j < min(k, 6); j++) {printf("%12.0f", A[j + i*k]);}printf("\n");}printf("\n Top left corner of matrix B: \n");for (i = 0; i < min(k, 6); i++) {for (j = 0; j < min(n, 6); j++) {printf("%12.0f", B[j + i*n]);}printf("\n");}printf("\n Top left corner of matrix C: \n");for (i = 0; i < min(m, 6); i++) {for (j = 0; j < min(n, 6); j++) {printf("%12.5G", C[j + i*n]);}printf("\n");}printf("\n Deallocating memory \n\n");delete A; //mkl_free(A);  delete B; //mkl_free(B);  delete C; //mkl_free(C);  printf(" Example completed. \n\n");return 0;}
参考资料:Multiplying Matrices Using dgemm

e. 测试代码5

#include "stdio.h"#include "stdlib.h"#include "cblas.h"#define WIN32#include <time.h>#ifdef WIN32#include <windows.h>#else#include <sys/time.h>#endif#ifdef WIN32intgettimeofday(struct timeval *tp, void *tzp){time_t clock;struct tm tm;SYSTEMTIME wtm;GetLocalTime(&wtm);tm.tm_year     = wtm.wYear - 1900;tm.tm_mon     = wtm.wMonth - 1;tm.tm_mday     = wtm.wDay;tm.tm_hour     = wtm.wHour;tm.tm_min     = wtm.wMinute;tm.tm_sec     = wtm.wSecond;tm. tm_isdst    = -1;clock = mktime(&tm);tp->tv_sec = clock;tp->tv_usec = wtm.wMilliseconds * 1000;return (0);}#endif#pragma comment (lib, "libopenblas1.lib") //extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*); int main(int argc, char* argv[]){int i;printf("test!\n");// if(argc<4)// {// printf("Input Error\n");// return 1;// } int m = atoi(argv[1]);int n = atoi(argv[2]);int k = atoi(argv[3]);int thread = atoi(argv[4]);int sizeofa = m * k;int sizeofb = k * n;int sizeofc = m * n;char ta = 'N';char tb = 'N';double alpha = 1.2;double beta = 0.001; struct timeval start,finish;double duration; double* A = new double[sizeofa];//(double*)malloc(sizeof(double) * sizeofa);double* B = new double[sizeofb];//(double*)malloc(sizeof(double) * sizeofb);double* C = new double[sizeofc];//(double*)malloc(sizeof(double) * sizeofc); srand((unsigned)time(NULL)); for (i=0; i<sizeofa; i++)A[i] = i%3+1;//(rand()%100)/10.0; for (i=0; i<sizeofb; i++)B[i] = i%3+1;//(rand()%100)/10.0; for (i=0; i<sizeofc; i++)C[i] = i%3+1;//(rand()%100)/10.0;//#if 0printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc);openblas_set_num_threads(thread);gettimeofday(&start, NULL); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, m, B, k, beta, C, m);gettimeofday(&finish, NULL); duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000;double gflops = 2.0 * m *n*k;gflops = gflops/duration*1.0e-6; FILE *fp;fp = fopen("timeDGEMM.txt", "a");fprintf(fp, "%d threads -> %dx%dx%d\t%lf s\t%lf MFLOPS\n", thread,m, n, k, duration, gflops);fclose(fp); delete A;//free(A);delete B;//free(B);delete C;//free(C);return 0;}

参考资料:time_clbas_dgemm.c

原创粉丝点击