pthread多线程加速示例(大型矩阵乘法)

来源:互联网 发布:期货软件开发 编辑:程序博客网 时间:2024/05/29 17:32
#include <cmnheader.h>#include <time.h>#include "MatrixLib.h"#pragma comment(lib,"MatrixLib.lib")#pragma warning(disable:4996)void  checkResult(char* str, int value, FILE* pflog) {     if (value != 0)     {     fprintf(pflog, "Failed with %d at %s", value, str);     exit(1);     } }typedef struct{FILE* pflog;double** R;double** A;double** B;int start_row;int end_row;int num_col;} threadParm_t;void *oneThread(void *param){threadParm_t *p = (threadParm_t *)param;fprintf(p->pflog, "# Thread  \'%.8X %.8X\'  is now running.\n", getpid());double** R = p->R;double** A = p->A;double** B = p->B;int start_row = p->start_row;int end_row = p->end_row;int num_col = p->num_col;double tmp;for (int i = start_row; i < end_row; ++i){for (int j = 0; j < num_col; ++j){tmp = 0;for (int k = 0; k < num_col; ++k){tmp += A[i][k] * B[j][k];}R[i][j] = tmp;}}return NULL;}void OneTry(const int N, const int C,FILE* pflog){fprintf(pflog,"== %4d * %4d Matrix Multiply, %d Threads. ==\n", N, N, C);clock_t start = clock();double** X = NewSquareMatrix(N);double** Y = NewSquareMatrix(N);double** Z = NewSquareMatrix(N);TransformSquareMat(Z, N); // 转置一次int start_row = 0, end_row = 0;int  inc_row = N / C;end_row = start_row + inc_row;int i, rc;pthread_t* threads = new pthread_t[C];threadParm_t* tparams = new threadParm_t[C];for (i = 0; i < C; ++i){tparams[i].pflog = pflog;tparams[i].R = X;tparams[i].A = Y;tparams[i].B = Z;tparams[i].num_col = N;tparams[i].start_row = start_row;tparams[i].end_row = end_row;start_row = end_row + 1;end_row += inc_row;rc = pthread_create(&threads[i], NULL, oneThread, & tparams[i]);checkResult("!! pthread_create()\n", rc,pflog);fprintf(pflog,"**********  %2d of %2d threads created  **********\n", i + 1,C);}fprintf(pflog,"@ Waiting for worker threads' end...\n");int* status = new int[C];for (i = 0; i < C; ++i){rc = pthread_join(threads[i], (void**)(&status[i]));checkResult("!! pthread_join()\n", rc,pflog);}fprintf(pflog,"@ Check all thread's results\n");for (i = 0; i < C; ++i){if (status[i] != NULL){fprintf(pflog,"!! Unexpected thread status\n");}}//TransformSquareMat(Z, N); // 恢复SafeDeleteSquareMat(X, N);SafeDeleteSquareMat(Y, N);SafeDeleteSquareMat(Z, N);clock_t finish = clock();fprintf(pflog,"@ All finished. Total time:%.8f(sec).\n\n",(finish-start)/(1.0*CLOCKS_PER_SEC));}int main(int argc, char **argv){FILE* pflog = fopen("trace_log.txt", "a");const int N = 4096, C = 32;printf("Matrix N=%d,Thread C=%d, now running...", N, C);time_t rawtime;time(&rawtime);tm* tminfo = localtime(&rawtime);fprintf(pflog, "NEW LOG @%s", asctime(tminfo));OneTry(4096,32,pflog);fflush(pflog);fclose(pflog);printf("finshed!\n");system("pause");return 0;}


cmnheader.h同之前有关pthread的文章
MatrixLib.dll是自己写的,代码如下
Matrix.h文件内容

/** 矩阵操作的定义* 用于导出DLL*/#ifndef MATRIX_LIB_H#define MATRIX_LIB_H// 兼容C版本导出符号#ifdef __cplusplus    // 定义DLLEXPORT时启用导出#ifdef DLLEXPORT#define MAPI extern "C" __declspec (dllexport)// 未定义DLLEXPORT宏时即为导入#else  /* DLLEXPORT */#define MAPI extern "C" __declspec (dllimport)#endif /* DLLEXPORT  */#else /* __cplusplus     */#ifdef DLLIMPORT#define MAPI  __declspec (dllexport)#else /* DLLIMPORT */#define MAPI  __declspec (dllimport)#endif /* DLLIMPORT */#endif /* __cplusplus */#include <stdio.h>#include <stdlib.h>#include <time.h>MAPI double** NewSquareMatrix(const int n);MAPI void TransformSquareMat(double** mat,const int n);MAPI double  SafeDeleteSquareMat(double** mat, const int n);MAPI double  SquareMatMultiply(double** R, double** A, double** B, const int n);MAPI double  SquareMatMultiplyTrans(double** R,double** A,double** B,const int n);MAPI double  RndInitSquareMat(double **mat, const int n);MAPI void TraceLogInfo(char* strInfo, FILE* pflog = stdout);MAPI void TraceLogDuration(double durationTime, FILE* pflog = stdout);#endif /* MATRIX_LIB_H */


MatrixLib.cpp文件内容

// 启用DLLEXPORT宏定义,声明将要导出DLL#define DLLEXPORT#include "MatrixLib.h"// 创建一个n阶方阵mat并分配内存,返回首地址MAPI double **NewSquareMatrix(const int n){double** mat = new double*[n];for (int i = 0; i < n; ++i){mat[i] = new double[n];}return mat;}// 方阵的转置MAPI void TransformSquareMat(double** mat,const int n){    double tmp;for(int i=0;i<n;++i)    for(int j=i+1;j<n;++j){tmp=mat[i][j];            mat[i][j]=mat[j][i];            mat[j][i]=tmp;}}// 删除方阵mat所占有的内存空间并置空指针MAPI double SafeDeleteSquareMat(double** mat, const int n){clock_t start = clock();if (mat != NULL){for (int i = 0; i < n; ++i){delete[] mat[i];mat[i] = NULL;}delete[] mat;mat = NULL;}clock_t finish = clock();return (1.0*(finish - start) / CLOCKS_PER_SEC);}// 矩阵乘法(方阵) R<-A*B (正常版本)MAPI double SquareMatMultiply(double** R, double** A, double** B,const int n){clock_t start = clock();for (int i = 0; i < n; ++i){for (int j = 0; j < n; ++j){double tmp = 0;for (int k = 0; k < n; ++k){tmp += A[i][k] * B[k][j];}R[i][j] = tmp;}}clock_t finish = clock();return (1.0*(finish - start) / CLOCKS_PER_SEC);}// 矩阵乘法(方阵) R<-A*B, 转置加速版本MAPI double SquareMatMultiplyTrans(double** R, double** A, double** B,const int n){    // 增加了两次转置的时间占用// 利用辅助空间O(n^2)可降至一次clock_t start = clock();TransformSquareMat(B,n); // 转置一次for (int i = 0; i < n; ++i){for (int j = 0; j < n; ++j){double tmp = 0;for (int k = 0; k < n; ++k){tmp += A[i][k] * B[j][k];  // 更少的跳跃(非跨行)}R[i][j] = tmp;}}TransformSquareMat(B,n); // 再次转置以恢复矩阵Bclock_t finish = clock();return (1.0*(finish - start) / CLOCKS_PER_SEC);}// 以随机数填充矩阵,返回操作持续时间MAPI double RndInitSquareMat(double **mat, const int n){clock_t start = clock();for (int i = 0; i < n; ++i){for (int j = 0; j < n; ++j){mat[i][j] = rand();}}clock_t finish = clock();return (1.0*(finish - start) / CLOCKS_PER_SEC);}// 将信息打印到(日志)文件MAPI void TraceLogInfo(char* pszInfo, FILE* pflog){fprintf(pflog, "%s\n", pszInfo);}// 将信息打印到(日志)文件MAPI void TraceLogDuration(double durationTime, FILE* pflog){fprintf(pflog, "DurationTime = %10.6f(sec).\n", durationTime);}


日志片段

NEW LOG @Fri Apr 18 16:42:44 2014== 4096 * 4096 Matrix Multiply, 32 Threads. ==**********   1 of 32 threads created  ********************   2 of 32 threads created  ********************   3 of 32 threads created  ********************   4 of 32 threads created  **********# Thread  '00000C48 00B728A0'  is now running.# Thread  '00000C48 00B72B80'  is now running.**********   5 of 32 threads created  **********# Thread  '00000C48 00B72E60'  is now running.# Thread  '00000C48 00B73140'  is now running.**********   6 of 32 threads created  ********************   7 of 32 threads created  ********************   8 of 32 threads created  ********************   9 of 32 threads created  ********************  10 of 32 threads created  ********************  11 of 32 threads created  ********************  12 of 32 threads created  ********************  13 of 32 threads created  ********************  14 of 32 threads created  ********************  15 of 32 threads created  ********************  16 of 32 threads created  ********************  17 of 32 threads created  **********# Thread  '00000C48 00B73640'  is now running.**********  18 of 32 threads created  ********************  19 of 32 threads created  ********************  20 of 32 threads created  ********************  21 of 32 threads created  ********************  22 of 32 threads created  ********************  23 of 32 threads created  ********************  24 of 32 threads created  ********************  25 of 32 threads created  ********************  26 of 32 threads created  ********************  27 of 32 threads created  ********************  28 of 32 threads created  ********************  29 of 32 threads created  ********************  30 of 32 threads created  ********************  31 of 32 threads created  ********************  32 of 32 threads created  **********@ Waiting for worker threads' end...# Thread  '00000C48 00B73920'  is now running.# Thread  '00000C48 00B731E8'  is now running.# Thread  '00000C48 00B73290'  is now running.# Thread  '00000C48 00B73338'  is now running.# Thread  '00000C48 00B74248'  is now running.# Thread  '00000C48 00B747F0'  is now running.# Thread  '00000C48 00B74AD0'  is now running.# Thread  '00000C48 00B74DB0'  is now running.# Thread  '00000C48 00B78AB0'  is now running.# Thread  '00000C48 00B7B8A8'  is now running.# Thread  '00000C48 00B7BAA0'  is now running.# Thread  '00000C48 00B7B950'  is now running.# Thread  '00000C48 00B704C0'  is now running.# Thread  '00000C48 00B781E0'  is now running.# Thread  '00000C48 00B7ACD8'  is now running.# Thread  '00000C48 00B7AED0'  is now running.# Thread  '00000C48 00B7AE28'  is now running.# Thread  '00000C48 00B7B170'  is now running.# Thread  '00000C48 00B7B800'  is now running.# Thread  '00000C48 00B7AD80'  is now running.# Thread  '00000C48 00B7B9F8'  is now running.# Thread  '00000C48 00B7B218'  is now running.# Thread  '00000C48 00B7AF78'  is now running.# Thread  '00000C48 00B7B020'  is now running.# Thread  '00000C48 00B7B0C8'  is now running.# Thread  '00000C48 00B7B2C0'  is now running.# Thread  '00000C48 00B74510'  is now running.@ Check all thread's results@ All finished. Total time:44.18600000(sec).


0 0
原创粉丝点击