Windows下在GSL(GNU Scientific Library)中使用OpenBLAS库
来源:互联网 发布:淘宝 低版本接口不存在 编辑:程序博客网 时间:2024/05/02 00:21
GNU Scientific Library是一个用于科学计算的开源库,拥有丰富的数学及矩阵计算函数,但是其自带的矩阵计算库BLAS效率明显没有ATLAS、OpenBLAS等优化后的计算库高,本文主要介绍如何在GSL中链接OpenBLAS库。
编译OpenBLAS非常简单,只需要安装好mingw32或者mingw64,配合msys搭建好类unix的编译环境,然后cd到OpenBLAS的源码根目录下:
成功后:
注意在目前最新的0.28版中,USE_OPENMP=1在WIN64平台上貌似有错误,lapack_tesing无法通过
结果:
结果:
结果:
1. 下载并编译OpenBLAS库
OpenBLAS库最早是老外开发的GotoBLAS库,使用了大量的内联汇编来匹配不同计算平台的硬件差异,在Goto放弃更新GotoBLAS库后,中科院软件所的一帮人负责起了更新,并改名OpenBLAS,网址:https://github.com/xianyi/OpenBLAS编译OpenBLAS非常简单,只需要安装好mingw32或者mingw64,配合msys搭建好类unix的编译环境,然后cd到OpenBLAS的源码根目录下:
make BINARY=64 USE_OPENMP=1 USE_THREAD=1
成功后:
make install PREFIX="path to install"
注意在目前最新的0.28版中,USE_OPENMP=1在WIN64平台上貌似有错误,lapack_tesing无法通过
cd lapack-netlibmake lapack_testing
2. 下载并编译GSL库
a. 下载GSL源码
地址:http://www.gnu.org/software/gsl/
b. 下载VS2012版GSL工程
地址:http://brgladman.org/oldsite/computing/gnu_scientific_library.php
解压GSL源码文件,并将VS2012版工程文件解压到GSL源码的根目录下
本文中使用的是VS2010,只需把VS2012工程中的
<PlatformToolset>v110</PlatformToolset>全部替换为
<PlatformToolset>v100</PlatformToolset>即可。
c. 用OpenBLAS库编译GSL库
打开build.vc11目录下的gsl.dll.sln,首先编译gslhdrs,这个是VS2012版工程作者写的给GSL添加导出函数的软件包。
然后将步骤1中编译好的libopenblas.lib和libopenblas.dll放入dll/x64/release目录中,并将gsl库的链接依赖从cblas.lib改为libopenblas.lib
编译后得到的gsl.lib和gsl.dll即为所需的GSL库
3. 测试
a. 测试代码1
#include <stdio.h> #include <gsl/gsl_cblas.h>#pragma comment (lib, "libopenblas.lib")//#pragma comment (lib, "gsl.lib") typedef struct { float real; float imag; } CBLAS_TEST_COMPLEX; int main() { /*CBLAS_TEST_COMPLEX a[10*20], b[20*30], c[10*30]; CBLAS_TEST_COMPLEX alpha = {1.0, 0.0}; CBLAS_TEST_COMPLEX beta = {0.0, 0.0}; cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 10, 30, 20, &alpha, a, 20, b, 30, &beta, c, 30);*/ int i,j,k; CBLAS_TEST_COMPLEX a[2*2], b[2*3], c[2*3]; for(i=0;i<4;i++) { a[i].real=1.0; a[i].imag=2.0; } for(i=0;i<6;i++) { b[i].real=2.0; b[i].imag=3.0; } //初始化为0 for(i=0;i<6;i++) { c[i].real=0.0; c[i].imag=0.0; } CBLAS_TEST_COMPLEX alpha = {1.0, 0.0}; CBLAS_TEST_COMPLEX beta = {0.0, 0.0}; cblas_cgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,2,3,2, &alpha,a,2,b,3,&beta,c,3); printf("A[]=\n"); for(i=0;i<4;i++) printf("%.2f+(%.2fi)\n",a[0].real,a[0].imag); printf("B[]=\n"); for(i=0;i<6;i++) printf("%.2f +(%.2fi)\n",b[0].real,b[0].imag); printf("C=A*B\n"); for(i=0;i<6;i++) printf("%.2f +(%.2fi)\n",c[0].real,c[0].imag); }
结果:
b. 测试代码2
#include <stdio.h>#include <gsl/gsl_cblas.h>#pragma comment (lib, "libopenblas.lib")//#pragma comment (lib, "gsl.lib")intmain (){int lda = 3;float A[] = { 0.11, 0.12, 0.13,0.21, 0.22, 0.23 };int ldb = 2;float B[] = { 1011, 1012,1021, 1022,1031, 1032 };int ldc = 2;float C[] = { 0.00, 0.00,0.00, 0.00 };/* Compute C = A B */cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, 2, 2, 3, 1.0, A, lda, B, ldb, 0.0, C, ldc);printf("[ %g, %g\n", C[0], C[1]);printf(" %g, %g ]\n", C[2], C[3]);return 0; }
结果:
c. 测试代码3
#include <iostream>#include <gsl/gsl_sf.h>//#pragma comment (lib, "libopenblas.lib")#pragma comment (lib, "gsl.lib")int main(){ std::cout << gsl_sf_gamma_inc( 1.5, 0.5 ) << std::endl; std::cout << gsl_sf_gamma_inc_Q( 1.5, 0.5 ) << std::endl; std::cout << gsl_sf_gamma_inc_P( 1.5, 0.5 ) << std::endl;}
结果:
d. 测试代码4
/* C source code is found in dgemm_example.c */#define min(x,y) (((x) < (y)) ? (x) : (y)) #include <stdio.h> #include <stdlib.h> #include "cblas.h" #pragma comment (lib, "libopenblas.dll.a") int main(){double *A, *B, *C;int m, n, k, i, j;double alpha, beta;int thread = openblas_get_parallel();penblas_set_num_threads(thread);printf("Threads = %d\n", thread);char *cpuname = new char[50];cpuname = openblas_get_corename();printf("CPU name = %s\n", cpuname);printf("\n This example computes real matrix C=alpha*A*B+beta*C using \n"" Intel MKL function dgemm, where A, B, and C are matrices and \n"" alpha and beta are double precision scalars\n\n");m = 2000, k = 200, n = 1000;printf(" Initializing data for matrix multiplication C=A*B for matrix \n"" A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n);alpha = 1.0; beta = 0.0;printf(" Allocating memory for matrices aligned on 64-byte boundary for better \n"" performance \n\n");A = new double[m*k]; //(double *)mkl_malloc( m*k*sizeof( double ), 64 ); B = new double[k*n]; //(double *)mkl_malloc( k*n*sizeof( double ), 64 ); C = new double[m*n]; //(double *)mkl_malloc( m*n*sizeof( double ), 64 ); if (A == NULL || B == NULL || C == NULL) {printf("\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");delete A; //mkl_free(A); delete B; //mkl_free(B); delete C; //mkl_free(C); return 1;}printf(" Intializing matrix data \n\n");for (i = 0; i < (m*k); i++) {A[i] = (double)(i + 1);}for (i = 0; i < (k*n); i++) {B[i] = (double)(-i - 1);}for (i = 0; i < (m*n); i++) {C[i] = 0.0;}printf(" Computing matrix product using Intel MKL dgemm function via CBLAS interface \n\n");cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,m, n, k, alpha, A, k, B, n, beta, C, n);printf("\n Computations completed.\n\n");printf(" Top left corner of matrix A: \n");for (i = 0; i < min(m, 6); i++) {for (j = 0; j < min(k, 6); j++) {printf("%12.0f", A[j + i*k]);}printf("\n");}printf("\n Top left corner of matrix B: \n");for (i = 0; i < min(k, 6); i++) {for (j = 0; j < min(n, 6); j++) {printf("%12.0f", B[j + i*n]);}printf("\n");}printf("\n Top left corner of matrix C: \n");for (i = 0; i < min(m, 6); i++) {for (j = 0; j < min(n, 6); j++) {printf("%12.5G", C[j + i*n]);}printf("\n");}printf("\n Deallocating memory \n\n");delete A; //mkl_free(A); delete B; //mkl_free(B); delete C; //mkl_free(C); printf(" Example completed. \n\n");return 0;}参考资料:Multiplying Matrices Using dgemm
e. 测试代码5
#include "stdio.h"#include "stdlib.h"#include "cblas.h"#define WIN32#include <time.h>#ifdef WIN32#include <windows.h>#else#include <sys/time.h>#endif#ifdef WIN32intgettimeofday(struct timeval *tp, void *tzp){time_t clock;struct tm tm;SYSTEMTIME wtm;GetLocalTime(&wtm);tm.tm_year = wtm.wYear - 1900;tm.tm_mon = wtm.wMonth - 1;tm.tm_mday = wtm.wDay;tm.tm_hour = wtm.wHour;tm.tm_min = wtm.wMinute;tm.tm_sec = wtm.wSecond;tm. tm_isdst = -1;clock = mktime(&tm);tp->tv_sec = clock;tp->tv_usec = wtm.wMilliseconds * 1000;return (0);}#endif#pragma comment (lib, "libopenblas1.lib") //extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*); int main(int argc, char* argv[]){int i;printf("test!\n");// if(argc<4)// {// printf("Input Error\n");// return 1;// } int m = atoi(argv[1]);int n = atoi(argv[2]);int k = atoi(argv[3]);int thread = atoi(argv[4]);int sizeofa = m * k;int sizeofb = k * n;int sizeofc = m * n;char ta = 'N';char tb = 'N';double alpha = 1.2;double beta = 0.001; struct timeval start,finish;double duration; double* A = new double[sizeofa];//(double*)malloc(sizeof(double) * sizeofa);double* B = new double[sizeofb];//(double*)malloc(sizeof(double) * sizeofb);double* C = new double[sizeofc];//(double*)malloc(sizeof(double) * sizeofc); srand((unsigned)time(NULL)); for (i=0; i<sizeofa; i++)A[i] = i%3+1;//(rand()%100)/10.0; for (i=0; i<sizeofb; i++)B[i] = i%3+1;//(rand()%100)/10.0; for (i=0; i<sizeofc; i++)C[i] = i%3+1;//(rand()%100)/10.0;//#if 0printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc);openblas_set_num_threads(thread);gettimeofday(&start, NULL); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, m, B, k, beta, C, m);gettimeofday(&finish, NULL); duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000;double gflops = 2.0 * m *n*k;gflops = gflops/duration*1.0e-6; FILE *fp;fp = fopen("timeDGEMM.txt", "a");fprintf(fp, "%d threads -> %dx%dx%d\t%lf s\t%lf MFLOPS\n", thread,m, n, k, duration, gflops);fclose(fp); delete A;//free(A);delete B;//free(B);delete C;//free(C);return 0;}
参考资料:time_clbas_dgemm.c
- Windows下在GSL(GNU Scientific Library)中使用OpenBLAS库
- Windows下使用GSL(GNU Scientific Library)
- GSL - GNU Scientific Library 在 windows 上的使用
- Windows下使用VC++编译GNU科学计算库 GNU Scientific Library(GSL)
- Windows下使用VC++编译GNU科学计算库 GNU Scientific Library(GSL)
- GSL(GNU Scientific Library)在vs2010下的配置
- GSL(GNU Scientific Library)在vs2010下的配置
- Qt 环境下使用Gsl( GNU Scientific Library)库
- Visual Stdio 环境下使用 GSL (GNU Scientific Library)
- GSL - GNU Scientific Library
- GSL--GNU Scientific Library
- GSL--GNU Scientific Library
- GSL - GNU Scientific Library
- linux 下安装gsl(GNU Scientific Library)
- 数值计算库gsl(GNU Scientific Library)
- 使用GNU Scientific Library(GSL)及GSL的配置
- GSL--GNU Scientific Library 小记
- C/C++ GNU Scientific Library(GSL) for Windows
- 九度OJ 题目1532:棋盘寻宝扩展(动态规划)
- HDU1019 Least Common Multiple 就是求最小公倍数
- C++函数中那些不可以被声明为虚函数的函数
- gdb调试多进程程序和多线程程序
- 两道容斥题
- Windows下在GSL(GNU Scientific Library)中使用OpenBLAS库
- Killing An Oracle Session Using SQL
- Oracle存储过程基本语法
- struts2使用struts2-json-plugin, 返回json值为空..
- java验证码-- jsp +servlet-- 根据网上整理
- POJ 2676 Sudoku
- 非阻塞connect的实现
- 如何尽快熟悉一个行业
- Ubuntu编译Linux内核过程