CBLAS/GotoBLAS2/MKL/CUDA HPL Configuration

来源:互联网 发布:淘宝批发市场 编辑:程序博客网 时间:2024/05/15 09:04

1. CBLAS

* Need BLAS Lib

TOPdir       = /home/shir/mv/hpl
INCdir       = $(TOPdir)/include
BINdir       = $(TOPdir)/bin
LIBdir       = $(TOPdir)/lib
HPLlib       = $(LIBdir)/libhpl.a

MPdir        = /home/shir/mv/mv2
MPinc        = -I/home/shir/mv/mv2/include
MPlib        = /home/shir/mv/mv2/lib/libmpich.a /home/shir/mv/mv2/lib/libmpichf90.a

LAdir        = /home/shir/mv/BLAS /home/shir/mv/CBLAS
LAinc        =
LAlib        = /home/shir/mv/CBLAS/lib/cblas_LINUX.a /home/shir/mv/BLAS/blas_LINUX.a

F2CDEFS      = -DAdd_ -DF77_INTEGER=int -DStringSunStyle
HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc)
HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib) -lgfortran

HPL_OPTS     = -DHPL_CALL_CBLAS

HPL_DEFS     = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES)
CC           = /home/shir/mv/mv2/bin/mpicc
CCNOOPT      = $(HPL_DEFS)
CCFLAGS      = $(HPL_DEFS)
#
LINKER       = /home/shir/mv/mv2/bin/mpif77
LINKFLAGS    =
#
ARCHIVER     = ar
ARFLAGS      = r
RANLIB       = echo


2. GotoBLAS2

LAdir       =
LAinc        =
LAlib       = /home/shir/mv/GotoBLAS2/libgoto2.a
F2CDEFS       = -DAdd_
HPL_OPTS     = -DHPL_COPY_L -DHPL_CALL_CBLAS -DHPL_DETAILED_TIMING
CC           = /home/shir/mv/mv2/bin/mpicc
CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops
LINKER       = /home/shir/mv/mv2/bin/mpif77


3. MKL

LAdir        = /opt/intel/mkl
LAinc        = -I$(LAdir)/include
LAlib        = -L/opt/intel/mkl/lib/intel64  /opt/intel/mkl/lib/intel64/libmkl_intel_lp64.a -Wl,--start-group /opt/intel/mkl/lib/intel64/libmkl_sequential.a/opt/intel/mkl/lib/intel64/libmkl_core.a -Wl,--end-group  -L/opt/intel/mkl/lib/intel64 -lpthread -lm
F2CDEFS      = (keep blank)
CCFLAGS      = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops
LINKER       = $(MPdir)/bin/mpicxx


4. CUDA

TOPdir = /home/shir/mv/hpl-2.0_FERMI_v13
MPdir        = /home/shir/cmvapich2  (enable-shared version mvapich2)
MPinc        = -I$(MPdir)/include
MPlib        = $(MPdir)/lib/libmpich.so
LAdir        = /opt/intel/mkl/lib/intel64
LAlib        = -L $(TOPdir)/src/cuda -ldgemm -g -L/usr/local/cuda/lib64 -L/usr/lib64 -lcuda -lcudart -lcublas -L$(LAdir) -    lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core           -L/opt/intel/Compiler/11.1/069/lib/intel64 -liomp5 -lpthread
F2CDEFS      = -DAdd__ -DF77_INTEGER=int -DStringSunStyle
HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) -I/usr/local/cuda/include
HPL_LIBS     = $(HPLlib) $(LAlib) $(MPlib)
HPL_OPTS     =  -DCUDA
CC      = $(MPdir)/bin/mpicc
CCFLAGS = $(HPL_DEFS) -O3 -w -fomit-frame-pointer -funroll-loops-fopenmp# don't use -openmp
CCNOOPT      = $(HPL_DEFS) -O0 -w
LINKER       = $(MPdir)/bin/mpicxx


run_linpack注意

export LD_LIBRARY_PATH=$HPL_DIR/src/cuda:$LD_LIBRARY_PATH #保持原样

export MKL_NUM_THREADS=8#改成数字8


module load intel/latest

LAdir        = /opt/intel/composer_xe_2011_sp1.9.293/mkl/lib/intel64

 LAlib        = -L $(TOPdir)/src/cuda -ldgemm -g -L/usr/local/cuda/lib64 -L/usr/lib64 -lcuda -lcudart -lcublas -L$(LAdir) -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -L/opt/intel/composer_xe_2011_sp1.9.293/compiler/lib/intel64 -liomp5 -lpthread


Running Command

module load cuda, intel

~/cmvapich2/bin/mpirun_rsh -np 2 -hostfile hostsMV2_ENABLE_AFFINITY=0 ./run_linpack

Check status

nvidia-smi #information for NVIDIA GPU

cat /proc/cpuinfo | less  # or meminfo

ssh gpu01 --> top --> 1 # CPU usage


continued


原创粉丝点击