尾递归优化【-O2】

来源:互联网 发布:wp8软件 快手 编辑:程序博客网 时间:2024/04/27 01:12


用了“尾递归”,不一定有尾递归优化。优化要开启-O2选项才行!而且有其他约束条件哦。

c代码:

#include <stdio.h>#include <string.h>int geta(int n1, int n2, int n){     if(n<=1)        return n2;    else    {       // printf("&n1: %p\n&n2: %p\n&n:%p\n", &n1, &n2, &n);        return geta(n2,n1+n2,n-1);    }} #define N 30#define ASD(x) #xint main(){     printf("The " ASD(N) " %d\n", geta(1,1,N));    return 0;    }  

geta函数尾递归优化前:

(gdb) disas getaDump of assembler code for function geta:   0x080483e4 <+0>:push   ebp   0x080483e5 <+1>:mov    ebp,esp   0x080483e7 <+3>:sub    esp,0x18   0x080483ea <+6>:cmp    DWORD PTR [ebp+0x10],0x1   0x080483ee <+10>:jg     0x80483f5 <geta+17>   0x080483f0 <+12>:mov    eax,DWORD PTR [ebp+0xc]   0x080483f3 <+15>:jmp    0x8048416 <geta+50>   0x080483f5 <+17>:mov    eax,DWORD PTR [ebp+0x10]   0x080483f8 <+20>:lea    edx,[eax-0x1]   0x080483fb <+23>:mov    eax,DWORD PTR [ebp+0xc]   0x080483fe <+26>:mov    ecx,DWORD PTR [ebp+0x8]   0x08048401 <+29>:add    eax,ecx   0x08048403 <+31>:mov    DWORD PTR [esp+0x8],edx   0x08048407 <+35>:mov    DWORD PTR [esp+0x4],eax   0x0804840b <+39>:mov    eax,DWORD PTR [ebp+0xc]   0x0804840e <+42>:mov    DWORD PTR [esp],eax   0x08048411 <+45>:call   0x80483e4 <geta>      // !!函数递归调用!!   0x08048416 <+50>:leave     0x08048417 <+51>:ret    End of assembler dump.

优化后:

(gdb) disas getaDump of assembler code for function geta:   0x08048450 <+0>:push   ebx   0x08048451 <+1>:mov    edx,DWORD PTR [esp+0x10]   0x08048455 <+5>:mov    ecx,DWORD PTR [esp+0xc]   0x08048459 <+9>:mov    ebx,DWORD PTR [esp+0x8]   0x0804845d <+13>:cmp    edx,0x1   0x08048460 <+16>:mov    eax,ecx   0x08048462 <+18>:jg     0x804846c <geta+28>   0x08048464 <+20>:jmp    0x8048477 <geta+39>   0x08048466 <+22>:xchg   ax,ax   0x08048468 <+24>:mov    ebx,ecx   0x0804846a <+26>:mov    ecx,eax   0x0804846c <+28>:sub    edx,0x1   0x0804846f <+31>:cmp    edx,0x1   0x08048472 <+34>:lea    eax,[ebx+ecx*1]   0x08048475 <+37>:jne    0x8048468 <geta+24>   0x08048477 <+39>:pop    ebx   0x08048478 <+40>:ret    End of assembler dump.
优化后没有函数调用,栈帧不生长,相当于变递归为迭代了。


回到上一篇博客的例子,用尾递归计算Fibonacci,尾递归未优化,因此,尾递归虽然优化了时间性能(从指数级变为线性),但是空间复杂度依然是线性,比不上迭代版本的O(1)。这里加以优化,再对比。先再贴一遍代码:



PS:顺便提一下其他优化。

1. 不使用栈帧。call一个函数的时候,压入参数,push eip,然后就开始sub esp了,没有保存ebp和ebp切换的工作。

-fomit-frame-pointer

Dump of assembler code for function fib_rw(int, int, int):   0x080486e4 <+0>:sub    esp,0x1c                      <----没有针对ebp的操作   0x080486e7 <+3>:cmp    DWORD PTR [esp+0x28],0x1   0x080486ec <+8>:jg     0x80486f4 <fib_rw(int, int, int)+16>   0x080486ee <+10>:mov    eax,DWORD PTR [esp+0x24]   0x080486f2 <+14>:jmp    0x8048719 <fib_rw(int, int, int)+53>   0x080486f4 <+16>:mov    eax,DWORD PTR [esp+0x28]   0x080486f8 <+20>:lea    edx,[eax-0x1]   0x080486fb <+23>:mov    eax,DWORD PTR [esp+0x24]   0x080486ff <+27>:mov    ecx,DWORD PTR [esp+0x20]   0x08048703 <+31>:add    eax,ecx   0x08048705 <+33>:mov    DWORD PTR [esp+0x8],edx   0x08048709 <+37>:mov    DWORD PTR [esp+0x4],eax   0x0804870d <+41>:mov    eax,DWORD PTR [esp+0x24]   0x08048711 <+45>:mov    DWORD PTR [esp],eax   0x08048714 <+48>:call   0x80486e4 <fib_rw(int, int, int)>   0x08048719 <+53>:add    esp,0x1c   0x0804871c <+56>:ret    End of assembler dump.

2.循环展开。对编译时能确定执行几次的循环,进行展开。 -funroll-loops

3.尾递归单独的优化选项据说是

-foptimize-sibling-calls.
,但是我单独试了下好像没用。




测试:

#include <iostream>#include <sys/time.h>#include <stdlib.h>using namespace std;//尾递归int fib_rw(int a, int b, int n){    if(n<=1)return b;     // cout<<"a: "<<&a  <<" b: "<<&b<<" n: "<<&n<<endl;    return fib_rw(b, a+b, n-1);    }//迭代int fib_dp(int n){    int a=1;    int b=1;    int i;    for(i=2;i<=n;i++)    {        b=a+b;        a=b-a;    }    return b;}////// mainint main(int argc, char*argv[]){    int N=20;    int TIMES=1000;        if(argc>=2)        N=atoi(argv[1]);    if(argc>=3)        TIMES=atoi(argv[2]);        struct timeval begin,end;    int re;        ////////////////////////    gettimeofday(&begin,0); //尾递归    {        int i=TIMES;        while(--i)        {            re=fib_rw(1,1,N);        }    }        gettimeofday(&end,0);     if(begin.tv_usec>end.tv_usec)      {        end.tv_sec--;        end.tv_usec+=1000000;    }    cout<<"尾递归 "<<re<<" time: "<<end.tv_sec - begin.tv_sec<<" s "<<end.tv_usec - begin.tv_usec<<" us"<<endl;           ////////////////////////    gettimeofday(&begin,0); // 迭代    {        int i=TIMES;        while(--i)        {            re=fib_dp(N);        }    }        gettimeofday(&end,0);     if(begin.tv_usec>end.tv_usec)      {        end.tv_sec--;        end.tv_usec+=1000000;    }    cout<<"迭代  "<<re<<" time: "<<end.tv_sec - begin.tv_sec<<" s "<<end.tv_usec - begin.tv_usec<<" us"<<endl;     return 0;}

编译

chen@chen-book1:~$ g++ a.cpp -o a -O2

运行

chen@chen-book1:~$ ./a 5 1000
尾递归 8 time: 0 s 14 us
迭代  8 time: 0 s 12 us
chen@chen-book1:~$ ./a 10 1000
尾递归 89 time: 0 s 31 us
迭代  89 time: 0 s 24 us
chen@chen-book1:~$ ./a 15 1000
尾递归 987 time: 0 s 39 us
迭代  987 time: 0 s 37 us
chen@chen-book1:~$ ./a 20 1000
尾递归 10946 time: 0 s 51 us
迭代  10946 time: 0 s 49 us
chen@chen-book1:~$ ./a 25 1000
尾递归 121393 time: 0 s 61 us
迭代  121393 time: 0 s 62 us
chen@chen-book1:~$ ./a 30 1000
尾递归 1346269 time: 0 s 74 us
迭代  1346269 time: 0 s 74 us
chen@chen-book1:~$ ./a 35 1000
尾递归 14930352 time: 0 s 83 us
迭代  14930352 time: 0 s 87 us
chen@chen-book1:~$ ./a 40 1000
尾递归 165580141 time: 0 s 104 us
迭代  165580141 time: 0 s 100 us
chen@chen-book1:~$ ./a 45 1000
尾递归 1836311903 time: 0 s 106 us
迭代  1836311903 time: 0 s 112 us


蓝的是迭代版本(真直!)红的是尾递归,两者差不多吧~

原创粉丝点击