加速指令集和计时的应用

来源:互联网 发布:中国移动网络投诉电话 编辑:程序博客网 时间:2024/05/18 01:48
#define WIN  #include <Windows.h>#include <intrin.h>  #include <stdlib.h>  #include <math.h>  #include <iostream>using namespace std;#define N 4*100000      // 注意:必须是4的倍数,否则使用SSE指令计算,要进行一些处理,从而保证正确。  _MM_ALIGN16 float op1[N];_MM_ALIGN16 float op2[N];_MM_ALIGN16 float result1[N];_MM_ALIGN16 float result2[N];void init(){    for (int i = 0; i < N; i++)    {        op1[i] = (float)rand() / (float)RAND_MAX;        op2[i] = (float)rand() / (float)RAND_MAX;    }}void checkResult(int debug){    bool isSame = true;    for (int i = 0; i < N; i++)    {        if (debug)        {            cout << "result1:" << result1[i] << "result2:" << result2[i] << endl;        }        else        {            if (fabs(result1[i] - result2[i]) > 0.000001)            {                isSame = false;                break;            }        }    }    if (!debug) {        if (isSame)            printf("Result is Same\n");        else            printf("Result is not same\n");    }}void add1(){    for (int i = 0; i < N; i++)        result1[i] = op1[i] + op2[i];}void add2(){    __m128  a;    __m128  b;    __m128  c;    for (int i = 0; i < N; i = i + 4)    {        // Load          a = _mm_load_ps(op1 + i);        b = _mm_load_ps(op2 + i);        c = _mm_add_ps(a, b);   // c = a + b          _mm_store_ps(result2 + i, c);    }}int main(){    init();    _LARGE_INTEGER time_start;    /*开始时间*/    _LARGE_INTEGER time_over;        /*结束时间*/    double dqFreq;                /*计时器频率*/    LARGE_INTEGER f;            /*计时器频率*/    QueryPerformanceFrequency(&f);    dqFreq = (double)f.QuadPart;    QueryPerformanceCounter(&time_start);    //---    printf("Add a vector:\n");    add1();    //    QueryPerformanceCounter(&time_over);    cout << ((time_over.QuadPart - time_start.QuadPart) / dqFreq) << endl;//单位为秒,精度为1000 000/(cpu主频)微秒    printf("\n");    _LARGE_INTEGER time_start1;    /*开始时间*/    _LARGE_INTEGER time_over1;        /*结束时间*/    double dqFreq1;                /*计时器频率*/    LARGE_INTEGER f1;            /*计时器频率*/    QueryPerformanceFrequency(&f1);    dqFreq1 = (double)f.QuadPart;    QueryPerformanceCounter(&time_start1);    printf("Add a vector with SSE instructions:\n");    add2();    QueryPerformanceCounter(&time_over1);    cout << ((time_over1.QuadPart - time_start1.QuadPart) / dqFreq1) << endl;    printf("\n");    checkResult(0);    return 0;}
0 0
原创粉丝点击