Strassen Parallel Iterative

来源:互联网 发布:sql注入举例 编辑:程序博客网 时间:2024/06/05 10:52

"2DArray2.h":

----------------------------------------------------------------------------------------------------------------------------------

#include <stddef.h>
#include <iostream>
//OR #define NULL ((void *)0)

template < typename T >
T **Allocate2DArray( int nRows, int nCols)
{
    //(step 1) allocate memory for array of elements of column
    T **ppi = new T *[nRows];

    //(step 2) allocate memory for array of elements of each row
    T *curPtr = new T [nRows * nCols];
 
 for (int j = 0; j < nRows * nCols; j++)
  curPtr[j] = 0;

    // Now point the pointers in the right place
    for( int i = 0; i < nRows; ++i)
    {
        *(ppi + i) = curPtr;
         curPtr += nCols;
    }
    return ppi;
}

template < typename T >
void Free2DArray(T** Array)
{
    delete [] *Array;
    delete [] Array;
}
// int mf, nf, pf;
// int ml, nl, pl;
template < typename T >
struct cloud {

 T **C;
 T **A, **B;
 T **M1, **M2, **M3, **M4, **M5, **M6, **M7;

 T **tAM1, **tBM1;
 T **tAM2;
 T **tBM3;
 T **tBM4;
 T **tAM5;
 T **tAM6, **tBM6;
 T **tAM7, **tBM7;
 
 T **A11, **A12, **A21, **A22;
 T **B11, **B12, **B21, **B22;
// T **C11, **C12, **C21, **C22;

 friend void PIcopyQtrMatrix3(double, int, double, int, int);
 friend void PIAddMatBlocks3(double, int, int, double, double);

 cloud () {
//  mf = nf = pf = 0;
//  ml = nl = pl = 0;
  C = NULL;
  A = B = NULL;
  M1 = M2 = M3 = M4 = M5 = M6 = M7 = NULL;

  tAM1 = tBM1 = NULL;
  tAM2 = NULL;
  tBM3 = NULL;
  tBM4 = NULL;
  tAM5 = NULL;
  tAM6 = tBM6 = NULL;
  tAM7 = tBM7 = NULL;

  A11 = A12 = A21 = A22 = NULL;
  B11 = B12 = B21 = B22 = NULL;
//  C11 = C12 = C21 = C22 = NULL;
 }
 cloud (
//  int _mf, int _nf, int _pf,
//  int _ml, int _nl, int _pl,
  T **_C,
  T **_A, T **_B,
  T **_M1,T **_M2,T **_M3,T **_M4,T **_M5,T **_M6,T **_M7,

  T **_tAM1, T **_tBM1,
  T **_tAM2,
  T **_tBM3,
  T **_tBM4,
  T **_tAM5,
  T **_tAM6, T **_tBM6,
  T **_tAM7, T **_tBM7,

  T **_A11, T **_A12, T **_A21, T **_A22,
  T **_B11, T **_B12, T **_B21, T **_B22
//  T **_C11, T **_C12, T **_C21, T **_C22
 ) {
//  mf = _mf, nf = _nf, pf = _pf,
//  ml = _ml, nl = _nl, pl = _pl,
  C = _C,
  A = _A, B = _B,
  M1=_M1, M2=_M2, M3=_M3, M4=_M4, M5=_M5, M6=_M6, M7=_M7;

  tAM1 = _tAM1, tBM1 = _tBM1,
  tAM2 = _tAM2,
  tBM3 = _tBM3,
  tBM4 = _tBM4,
  tAM5 = _tAM5,
  tAM6 = _tAM6, tBM6 = _tBM6,
  tAM7 = _tAM7, tBM7 = _tBM7,

  A11 = _A11, A12 = _A12, A21 = _A21, A22 = _A22,
  B11 = _B11, B12 = _B12, B21 = _B21, B22 = _B22;
//  C11 = _C11, C12 = _C12, C21 = _C21, C22 = _C22;
 }
 void unpack (
//  int (&_mf), int (&_nf), int (&_pf),
//  int (&_ml), int (&_nl), int (&_pl),
  T **(&_C),
  T **(&_A), T **(&_B),
  T **(&_M1),T **(&_M2),T **(&_M3),T **(&_M4),T **(&_M5),T **(&_M6),T **(&_M7),

  T **(&_tAM1), T **(&_tBM1),
  T **(&_tAM2),
  T **(&_tBM3),
  T **(&_tBM4),
  T **(&_tAM5),
  T **(&_tAM6), T **(&_tBM6),
  T **(&_tAM7), T **(&_tBM7),

  T **(&_A11), T **(&_A12), T **(&_A21), T **(&_A22),
  T **(&_B11), T **(&_B12), T **(&_B21), T **(&_B22)
//  T **(&_C11), T **(&_C12), T **(&_C21), T **(&_C22)
  ) {
//  _mf = mf, _nf = nf, _pf = pf,
//  _ml = ml, _nl = nl, _pl = pl,
  _C = C,
  _A = A, _B = B,
  _M1=M1, _M2=M2, _M3=M3, _M4=M4, _M5=M5, _M6=M6, _M7=M7;

  _tAM1 = tAM1, _tBM1 = tBM1,
  _tAM2 = tAM2,
  _tBM3 = tBM3,
  _tBM4 = tBM4,
  _tAM5 = tAM5,
  _tAM6 = tAM6, _tBM6 = tBM6,
  _tAM7 = tAM7, _tBM7 = tBM7,

  _A11 = A11, _A12 = A12, _A21 = A21, _A22 = A22,
  _B11 = B11, _B12 = B12, _B21 = B21, _B22 = B22;
//  _C11 = C11, _C12 = C12, _C21 = C21, _C22 = C22;
 }
 //M1 = (A11 + A22)*(B11 + B22)
 void section1(int m2, int n2, int p2, T **(&A), T **(&B), T **(&C)) {
  PIAddMatBlocks3(tAM1, m2, p2, A11, A22),
  PIAddMatBlocks3(tBM1, p2, n2, B11, B22),
  A = tAM1, B = tBM1, C = M1;
 }

 //M2 = (A21 + A22)*B11
 void section2(int m2, int n2, int p2, T **(&A), T **(&B), T **(&C)) {
  PIAddMatBlocks3(tAM2, m2, p2, A21, A22),
  A = tAM2, B = B11, C = M2 ;
 }

 //M3 = A11*(B12 - B22)
 void section3(int m2, int n2, int p2, T **(&A), T **(&B), T **(&C)) {
  PISubMatBlocks3(tBM3, p2, n2, B12, B22),
  A = A11, B = tBM3, C = M3 ;
 }

 //M4 = A22*(B21 - B11)
 void section4(int m2, int n2, int p2, T **(&A), T **(&B), T **(&C)) {
  PISubMatBlocks3(tBM4, p2, n2, B21, B11),
  A = A22, B = tBM4, C = M4 ;
 }

 //M5 = (A11 + A12)*B22
 void section5(int m2, int n2, int p2, T **(&A), T **(&B), T **(&C)) {
  PIAddMatBlocks3(tAM5, m2, p2, A11, A12),
  A = tAM5, B = B22, C = M5 ;
 }

 //M6 = (A21 - A11)*(B11 + B12)
 void section6(int m2, int n2, int p2, T **(&A), T **(&B), T **(&C)) {
  PISubMatBlocks3(tAM6, m2, p2, A21, A11),
  PIAddMatBlocks3(tBM6, p2, n2, B11, B12),
  A = tAM6, B = tBM6, C = M6 ;
 }

 //M7 = (A12 - A22)*(B21 + B22)
 void section7(int m2, int n2, int p2, T **(&A), T **(&B), T **(&C)) {
  PISubMatBlocks3(tAM7, m2, p2, A12, A22),
  PIAddMatBlocks3(tBM7, p2, n2, B21, B22),
  A = tAM7, B = tBM7, C = M7 ;
 }
};

----------------------------------------------------------------------------------------------------------------------------------

 

#include <assert.h>
#include <malloc.h>
#include <math.h>
#include <omp.h>
#include <stack>
#include <stdio.h>
#include <time.h>
#include "2DArray2.h"

using namespace std;

#define IGRAIN  8192
#define PIGRAIN 8192
#define PIMX 7
#define MaxThreads 16

stack< cloud<double> >PISt3[MaxThreads];
const int threads = omp_get_num_procs();
 
void PImatmultleaf3(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C)   
/* 
  subroutine that uses the simple triple loop to multiply 
  a submatrix from A with a submatrix from B and store the 
  result in a submatrix of C.  
*/ 
// mf, ml; /* first and last+1 i index */ 
// nf, nl; /* first and last+1 j index */ 
// pf, pl; /* first and last+1 k index */ 
{      for (int k = pf; k < pl; k++) 
 for (int i = mf; i < ml; i++)
  for (int j = nf; j < nl; j++)
   
    C[i][j] += A[i][k]*B[k][j];
}  

void PIcopyQtrMatrix3(double **X, int m, double **Y, int mf, int nf)
{
 for (int i = 0; i < m; i++)
  X[i] = &Y[mf+i][nf];
}

void PIAddMatBlocks3(double **T, int m, int n, double **X, double **Y)
{
 for (int i = 0; i < m; i++)
  for (int j = 0; j < n; j++)
   T[i][j] = X[i][j] + Y[i][j];
}

void PISubMatBlocks3(double **T, int m, int n, double **X, double **Y)
{
 for (int i = 0; i < m; i++)
  for (int j = 0; j < n; j++)
   T[i][j] = X[i][j] - Y[i][j];
}

int PItreeLength3(int mf, int ml, int nf, int nl, int pf, int pl) {
 return  (int)( 2 + log( (ml-mf)*(nl-nf)*(pl-pf)/(double)IGRAIN )/log( 8.0 ) );
}

void PIstrassenMMult3(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C,int id)

 int m2, n2, p2;

 double **A11, **A12, **A21, **A22;
 double **B11, **B12, **B21, **B22;
 double **C11, **C12, **C21, **C22;

 double **M1, **M2, **M3, **M4, **M5, **M6, **M7;

 double **tAM1, **tBM1, **tAM2, **tBM3, **tBM4, **tAM5, **tAM6, **tBM6, **tAM7, **tBM7;

 typedef void (cloud<double>::*PF)(int, int, int, double **&, double **&, double **&);
 PF section[8] = {
  NULL,
  &cloud<double>::section1, &cloud<double>::section2, &cloud<double>::section3,
  &cloud<double>::section4, &cloud<double>::section5, &cloud<double>::section6,
  &cloud<double>::section7
 }; 
 
 cloud<double> co;
 int t = 0;
 int *x = new int[PItreeLength3(mf, ml, nf, nl, pf, pl)]; 
 memset(x, 0, _msize(x));

 do {
  if ((ml-mf) * (nl-nf) * (pl-pf) < IGRAIN) {
   PImatmultleaf3 (mf, ml, nf, nl, pf, pl, A, B, C);
   ml <<= 1; nl <<= 1; pl <<= 1;
   x[t--] = 0;
  }
  else {
    m2 = (ml-mf)/2;   assert(m2!=1);
    n2 = (nl-nf)/2;   assert(n2!=1);
    p2 = (pl-pf)/2;   assert(p2!=1);

   if (x[t] == 0)
   {
    M1 = Allocate2DArray< double >(m2, n2);
    M2 = Allocate2DArray< double >(m2, n2);
    M3 = Allocate2DArray< double >(m2, n2);
    M4 = Allocate2DArray< double >(m2, n2);
    M5 = Allocate2DArray< double >(m2, n2);
    M6 = Allocate2DArray< double >(m2, n2);
    M7 = Allocate2DArray< double >(m2, n2);

    A11 = new double*[m2];
    A12 = new double*[m2];
    A21 = new double*[m2];
    A22 = new double*[m2];

    B11 = new double*[p2];
    B12 = new double*[p2];
    B21 = new double*[p2];
    B22 = new double*[p2];

    tAM1 = Allocate2DArray< double >(m2, p2);
    tBM1 = Allocate2DArray< double >(p2, n2);
    tAM2 = Allocate2DArray< double >(m2, p2);
    tBM3 = Allocate2DArray< double >(p2, n2);
    tBM4 = Allocate2DArray< double >(p2, n2);
    tAM5 = Allocate2DArray< double >(m2, p2);
    tAM6 = Allocate2DArray< double >(m2, p2);
    tBM6 = Allocate2DArray< double >(p2, n2);
    tAM7 = Allocate2DArray< double >(m2, p2);
    tBM7 = Allocate2DArray< double >(p2, n2);

    PIcopyQtrMatrix3(A11, m2, A, mf, pf);
    PIcopyQtrMatrix3(A12, m2, A, mf, p2);
    PIcopyQtrMatrix3(A21, m2, A, m2, pf);
    PIcopyQtrMatrix3(A22, m2, A, m2, p2);

    PIcopyQtrMatrix3(B11, p2, B, pf, nf);
    PIcopyQtrMatrix3(B12, p2, B, pf, n2);
    PIcopyQtrMatrix3(B21, p2, B, p2, nf);
    PIcopyQtrMatrix3(B22, p2, B, p2, n2);

    PISt3[id].push( cloud<double> (
      C, A, B,
      M1, M2, M3, M4, M5, M6, M7,
      tAM1, tBM1, tAM2, tBM3, tBM4, tAM5, tAM6, tBM6, tAM7, tBM7,
      A11, A12, A21, A22,
      B11, B12, B21, B22
     ));
   }// end if x[t] = 0

   cloud<double>&co = PISt3[id].top(); 
   x[t]++;
   if (x[t] <= 7)
   { //do each one of 7 sections
    
    (co.*section[x[t]])(m2, n2, p2, A, B, C);

    ml >>= 1; nl >>= 1; pl >>= 1;
    t++;
    continue;
   }

   co.unpack (
     C, A, B,
     M1, M2, M3, M4, M5, M6, M7,
     tAM1, tBM1, tAM2, tBM3, tBM4, tAM5, tAM6, tBM6, tAM7, tBM7,
     A11, A12, A21, A22,
     B11, B12, B21, B22
    );
   PISt3[id].pop();
   x[t--] = 0;
   ml <<= 1; nl <<= 1; pl <<= 1;

   C11 = new double*[m2];
   C12 = new double*[m2];
   C21 = new double*[m2];
   C22 = new double*[m2];

   PIcopyQtrMatrix3(C11, m2, C, mf, nf);
   PIcopyQtrMatrix3(C12, m2, C, mf, n2);
   PIcopyQtrMatrix3(C21, m2, C, m2, nf);
   PIcopyQtrMatrix3(C22, m2, C, m2, n2);

   for (int i = 0; i < m2; i++)
   for (int j = 0; j < n2; j++) {
    C11[i][j] = M1[i][j] + M4[i][j] - M5[i][j] + M7[i][j];
    C12[i][j] = M3[i][j] + M5[i][j];
    C21[i][j] = M2[i][j] + M4[i][j];
    C22[i][j] = M1[i][j] - M2[i][j] + M3[i][j] + M6[i][j];
   }

   Free2DArray< double >(M1);
   Free2DArray< double >(M2);
   Free2DArray< double >(M3);
   Free2DArray< double >(M4);
   Free2DArray< double >(M5);
   Free2DArray< double >(M6);
   Free2DArray< double >(M7);

   delete[] A11; delete[] A12; delete[] A21; delete[] A22;
   delete[] B11; delete[] B12; delete[] B21; delete[] B22;
   delete[] C11; delete[] C12; delete[] C21; delete[] C22;

   Free2DArray< double >(tAM1);
   Free2DArray< double >(tBM1);
   Free2DArray< double >(tAM2);
   Free2DArray< double >(tBM3);
   Free2DArray< double >(tBM4);
   Free2DArray< double >(tAM5);
   Free2DArray< double >(tAM6);
   Free2DArray< double >(tBM6);
   Free2DArray< double >(tAM7);
   Free2DArray< double >(tBM7);
  }// end else
 }
 while (PISt3[id].empty() == false);

 delete[] x;
}
/*
// serial iterative algorithm
void PImatmultS3(int m, int n, int p, double **A, double **B, double **C)
{
  int i,j;
  for (i=0; i < m; i++)
    for (j=0; j < n; j++)
       C[i][j] = 0;
    PIstrassenMMult3(0, m, 0, n, 0, p, A, B, C, 0);
}
*/

void PIcopy3(
  int &mf, int &ml, int &nf, int &nl, int &pf, int &pl, double **(&A), double **(&B), double **(&C),
  int mf2, int ml2, int nf2, int nl2, int pf2, int pl2, double **A2,   double **B2,   double **C2
  ) {
   mf = mf2;ml = ml2;nf = nf2;nl = nl2;pf = pf2;pl = pl2;A = A2;B = B2;C = C2;
}
// parrallel iterative algorithm
void PImatmultS3(int m, int n, int p, double **A, double **B, double **C)
{  
 int mf; int ml; int nf; int nl; int pf; int pl;
 int i,j;
 for (i=0; i < m; i++)
  for (j=0; j < n; j++)
       C[i][j] = 0;

 PIcopy3( mf, ml, nf, nl, pf, pl, A, B, C,  
   0,  m,  0,  n,  0,  p,  A, B, C);

 if ((ml-mf)*(nl-nf)*(pl-pf) < PIGRAIN) {
  PImatmultleaf3(mf, ml, nf, nl, pf, pl, A, B, C);
 }
 else {
  int m2 = (ml-mf)/2;    assert(m2!=1);
  int n2 = (nl-nf)/2;    assert(n2!=1);
  int p2 = (pl-pf)/2;    assert(p2!=1);

  double **M1 = Allocate2DArray< double >(m2, n2);
  double **M2 = Allocate2DArray< double >(m2, n2);
  double **M3 = Allocate2DArray< double >(m2, n2);
  double **M4 = Allocate2DArray< double >(m2, n2);
  double **M5 = Allocate2DArray< double >(m2, n2);
  double **M6 = Allocate2DArray< double >(m2, n2);
  double **M7 = Allocate2DArray< double >(m2, n2);

  double **A11 = new double*[m2];
  double **A12 = new double*[m2];
  double **A21 = new double*[m2];
  double **A22 = new double*[m2];

  double **B11 = new double*[p2];
  double **B12 = new double*[p2];
  double **B21 = new double*[p2];
  double **B22 = new double*[p2];

  double **C11 = new double*[m2];
  double **C12 = new double*[m2];
  double **C21 = new double*[m2];
  double **C22 = new double*[m2];

  double **tAM1 = Allocate2DArray< double >(m2, p2);
  double **tBM1 = Allocate2DArray< double >(p2, n2);
  double **tAM2 = Allocate2DArray< double >(m2, p2);
  double **tBM3 = Allocate2DArray< double >(p2, n2);
  double **tBM4 = Allocate2DArray< double >(p2, n2);
  double **tAM5 = Allocate2DArray< double >(m2, p2);
  double **tAM6 = Allocate2DArray< double >(m2, p2);
  double **tBM6 = Allocate2DArray< double >(p2, n2);
  double **tAM7 = Allocate2DArray< double >(m2, p2);
  double **tBM7 = Allocate2DArray< double >(p2, n2);

  PIcopyQtrMatrix3(A11, m2, A, mf, pf);
  PIcopyQtrMatrix3(A12, m2, A, mf, p2);
  PIcopyQtrMatrix3(A21, m2, A, m2, pf);
  PIcopyQtrMatrix3(A22, m2, A, m2, p2);

  PIcopyQtrMatrix3(B11, p2, B, pf, nf);
  PIcopyQtrMatrix3(B12, p2, B, pf, n2);
  PIcopyQtrMatrix3(B21, p2, B, p2, nf);
  PIcopyQtrMatrix3(B22, p2, B, p2, n2);

  PIcopyQtrMatrix3(C11, m2, C, mf, nf);
  PIcopyQtrMatrix3(C12, m2, C, mf, n2);
  PIcopyQtrMatrix3(C21, m2, C, m2, nf);
  PIcopyQtrMatrix3(C22, m2, C, m2, n2);

#pragma omp parallel sections num_threads(PIMX) default(shared)
  {  
#pragma omp section
   {
  //int id = omp_get_thread_num();
//    clock_t before, after;
//    before = clock();
 //M1 = (A11 + A22)*(B11 + B22)
  PIAddMatBlocks3(tAM1, m2, p2, A11, A22);
  PIAddMatBlocks3(tBM1, p2, n2, B11, B22);
  PIstrassenMMult3(0, m2, 0, n2, 0, p2, tAM1, tBM1, M1, 1);
//    after = clock();
//    printf("section M1 done in %7.4f msecs/n",(float)(after - before));
    //printf("id=%d/n",id);
   }
#pragma omp section
   {
  //int id = omp_get_thread_num();
//    clock_t before, after;
//    before = clock();
 //M6 = (A21 - A11)*(B11 + B12)
  PISubMatBlocks3(tAM6, m2, p2, A21, A11);
  PIAddMatBlocks3(tBM6, p2, n2, B11, B12);
  PIstrassenMMult3(0, m2, 0, n2, 0, p2, tAM6, tBM6, M6, 6);
//    after = clock();
//    printf("section M6 done in %7.4f msecs/n",(float)(after - before));
    //printf("id=%d/n",id);
   }
#pragma omp section
   {
  //int id = omp_get_thread_num();
//    clock_t before, after;
//    before = clock();
 //M7 = (A12 - A22)*(B21 + B22)
  PISubMatBlocks3(tAM7, m2, p2, A12, A22);
  PIAddMatBlocks3(tBM7, p2, n2, B21, B22);
  PIstrassenMMult3(0, m2, 0, n2, 0, p2, tAM7, tBM7, M7, 7);
//    after = clock();
//    printf("section M7 done in %7.4f msecs/n",(float)(after - before));
    //printf("id=%d/n",id);
   }
#pragma omp section
   {
  //int id = omp_get_thread_num();
//    clock_t before, after;
//    before = clock();
 //M2 = (A21 + A22)*B11
  PIAddMatBlocks3(tAM2, m2, p2, A21, A22);
  PIstrassenMMult3(0, m2, 0, n2, 0, p2, tAM2, B11, M2, 2);
//    after = clock();
//    printf("section M2 done in %7.4f msecs/n",(float)(after - before));
    //printf("id=%d/n",id);
   }
#pragma omp section
   {
  //int id = omp_get_thread_num();
//    clock_t before, after;
//    before = clock();
 //M3 = A11*(B12 - B22)
  PISubMatBlocks3(tBM3, p2, n2, B12, B22);
  PIstrassenMMult3(0, m2, 0, n2, 0, p2, A11, tBM3, M3, 3);
//    after = clock();
//    printf("section M3 done in %7.4f msecs/n",(float)(after - before));
    //printf("id=%d/n",id);
   }
#pragma omp section
   {
  //int id = omp_get_thread_num();
//    clock_t before, after;
//    before = clock();
 //M4 = A22*(B21 - B11)
  PISubMatBlocks3(tBM4, p2, n2, B21, B11);
  PIstrassenMMult3(0, m2, 0, n2, 0, p2, A22, tBM4, M4, 4);
//    after = clock();
//    printf("section M4 done in %7.4f msecs/n",(float)(after - before));
    //printf("id=%d/n",id);
   }
#pragma omp section
   {
  //int id = omp_get_thread_num();
//    clock_t before, after;
//    before = clock();
 //M5 = (A11 + A12)*B22
  PIAddMatBlocks3(tAM5, m2, p2, A11, A12);
  PIstrassenMMult3(0, m2, 0, n2, 0, p2, tAM5, B22, M5, 5);
//    after = clock();
//    printf("section M5 done in %7.4f msecs/n",(float)(after - before));
    //printf("id=%d/n",id);
   }
  }// end parallel sections

#pragma omp parallel for schedule(guided,20)
  for (int i = 0; i < m2; i++)
   for (int j = 0; j < n2; j++) {
    C11[i][j] = M1[i][j] + M4[i][j] - M5[i][j] + M7[i][j];
    C22[i][j] = M1[i][j] - M2[i][j] + M3[i][j] + M6[i][j];
    C12[i][j] = M3[i][j] + M5[i][j];
    C21[i][j] = M2[i][j] + M4[i][j];
   }

  Free2DArray< double >(M1);
  Free2DArray< double >(M2);
  Free2DArray< double >(M3);
  Free2DArray< double >(M4);
  Free2DArray< double >(M5);
  Free2DArray< double >(M6);
  Free2DArray< double >(M7);

  //只 delete 各行首地址,不 delete main 函数里原矩阵 A,B 各列元素内存
  delete[] A11; delete[] A12; delete[] A21; delete[] A22;
  delete[] B11; delete[] B12; delete[] B21; delete[] B22;
  delete[] C11; delete[] C12; delete[] C21; delete[] C22;

  Free2DArray< double >(tAM1);
  Free2DArray< double >(tBM1);
  Free2DArray< double >(tAM2);
  Free2DArray< double >(tBM3);
  Free2DArray< double >(tBM4);
  Free2DArray< double >(tAM5);
  Free2DArray< double >(tAM6);
  Free2DArray< double >(tBM6);
  Free2DArray< double >(tAM7);
  Free2DArray< double >(tBM7);
 }// end else
}

 

 http://hi.csdn.net/link.php?url=http://blog.csdn.net%2Ftiandyoin