C++宏，普通函数，内联函数的运行速度以及三者的差异

来源：互联网发布：淘宝买家素质差编辑：程序博客网时间：2024/05/18 18:55

</pre>下面论证一下：c/c++中 运行速度对比，宏>内联函数>普通函数   以及3者的差异<p></p><p></p><p>首先简单的代码验证下：</p><p>#include <iostream></p><pre name="code" class="cpp">

#include <boost/timer.hpp>

#define _SUM(x,y) x+y
using std::cout;
using std::endl;
using boost::timer;

const int MAX_ARR_SIZE = 50000;

int sum1(int a,int b)
{
   return a + b;
}

inline int sum2(int a,int b)
{
   return a + b;
}

int main(void)
{
   long iarray1[MAX_ARR_SIZE];
   long iarray2[MAX_ARR_SIZE];

   timer t1;
   cout<<t1.elapsed()<<endl;
   for (size_t star1(0); star1 < MAX_ARR_SIZE;++star1)
   {
       iarray1[star1] = star1;
   }

   for (size_t star1(0); star1 < MAX_ARR_SIZE; ++star1)
   {
       iarray2[star1] = star1;
   }
   double tt1 = t1.elapsed();
   cout << t1.elapsed()<<endl;

   for (size_t star1(0); star1 < MAX_ARR_SIZE; ++star1)
   {
       long result = sum1(iarray1[star1], iarray2[star1]);
       if (star1 == MAX_ARR_SIZE-1)
       {
           cout << "非内联:" << iarray1[star1] << " + " << iarray2[star1]<<" = " << result << endl;
       }
   }
   cout << "执行非内联的sum1的时间为：" << t1.elapsed() - tt1<<endl;

   double tt2 = t1.elapsed();

   for (size_t star1(0); star1 < MAX_ARR_SIZE; ++star1)
   {
       long result = sum2(iarray1[star1], iarray2[star1]);
       if (star1 == MAX_ARR_SIZE - 1)
       {
           cout << "内联:" << iarray1[star1] << " + " << iarray2[star1] << " = " << result << endl;
       }
   }
   cout << "执行内联的sum2的时间为：" << t1.elapsed() - tt2 << endl;

   double tt3 = t1.elapsed();

   for (size_t star1(0); star1 < MAX_ARR_SIZE; ++star1)
   {
       long result = _SUM(iarray1[star1], iarray2[star1]);
       if (star1 == MAX_ARR_SIZE - 1)
       {
           cout << "宏:" << iarray1[star1] << " + " << iarray2[star1] << " = " << result << endl;
       }
   }
   cout << "执行宏的_SUM的时间为：" << t1.elapsed() - tt3 << endl;

   return 0;
}

运行结果为下：

0
0.047
非内联:49999 + 49999 = 99998
执行非内联的sum1的时间为：0.014
内联:49999 + 49999 = 99998
执行内联的sum2的时间为：0.006
宏:49999 + 49999 = 99998
执行宏的_SUM的时间为：0.004

老爷机，性能比较差，但结果比较明显，宏>内联函数>普通函数

原因下面分析：

写个最简单的测试程序：

test1.c 这个测试下宏

#include<stdio.h>

#define _SUM(x,y) x+y

void main()
{
printf("%d\n",_SUM(5,6));
}

对test1.c执行预编译 :

#gcc -E test1.c -o test1.i

得到下面结果 test1.i

...以上省略啊（包含的头文件加进来）

# 2 "test1.c" 2

void main()
{
printf("%d\n",5 +6);
}

由此可见，在预编译时期，宏被展开了

再将预编译后的文件test1.i 进行编译

gcc –S test1.i –o test1.s

得到以下结果： test1.S

    .file   "test1.c"
   .section   .rodata
.LC0:
   .string   "%d\n"
   .text
.globl main
   .type   main, @function
main:
   pushl   %ebp
   movl   %esp, %ebp
   andl   $-16, %esp
   subl   $16, %esp
   movl   $.LC0, %eax
   movl   $11, 4(%esp)       #在这里我们的表达式在编译时期计算出来，并转换为了立即数
   movl   %eax, (%esp)
   call   printf
   leave
   ret
   .size   main, .-main
   .ident   "GCC: (Ubuntu 4.4.3-4ubuntu5) 4.4.3"
   .section   .note.GNU-stack,"",@progbits

宏表达式被转换成了立即数

下面是测试代码2：普通函数的

test2.c

#include<stdio.h>

int sum(int x,int y)
{
return x+y;
}

void main()
{
printf("%d\n",sum(5,6));
}

同样将其预编译：

gcc -E test2.c -o test2.i

得到下面代码：

....

# 2 "test2.c" 2

int sum(int x,int y)
{
return x+y;
}

void main()
{
printf("%d\n",sum(5,6));
}

再将其编译：

gcc -S test2.i -o test2.S

得到下面代码：

    .file   "test2.c"
   .text
.globl sum
   .type   sum, @function
sum:
   pushl   %ebp
   movl   %esp, %ebp
   movl   12(%ebp), %eax
   movl   8(%ebp), %edx
   leal   (%edx,%eax), %eax
   popl   %ebp
   ret
   .size   sum, .-sum
   .section   .rodata
.LC0:
   .string   "%d\n"
   .text
.globl main
   .type   main, @function
main:
   pushl   %ebp
   movl   %esp, %ebp
   andl   $-16, %esp
   subl   $16, %esp
   movl   $6, 4(%esp)
   movl   $5, (%esp)
   call   sum                             #在这个地方产生了函数调用，也是使程序运行速度下降的原因
   movl   $.LC0, %edx
   movl   %eax, 4(%esp)
   movl   %edx, (%esp)
   call   printf
   leave
   ret
   .size   main, .-main
   .ident   "GCC: (Ubuntu 4.4.3-4ubuntu5) 4.4.3"
   .section   .note.GNU-stack,"",@progbits

可见上面程序产生了函数掉用，即参数的压栈

下面是测试代码3:测试内联函数的

test3.c

#include<stdio.h>

inline int sum(int x, int y) __attribute__((always_inline)); //注意这里加上编译属性，让编译器始终展开此内联函数，否则编译出来后跟普通函数没啥区别

inline int sum(int x, int y)
{
return x + y;
}

void main()
{
printf("%d\n",sum(5,6));
}

同样先预编译：

gcc -E test3.c -o test3.i

得到下面代码： test3.i

....

# 2 "test3.c" 2

inline int sum(int x, int y) __attribute__((always_inline));

inline int sum(int x, int y)
{
return x + y;
}

void main()
{
printf("%d\n",sum(5,6));
}

由此可见，内联函数的展开不管预编译的事情，明显是在编译时展开的

下面将 test3.i编译

gcc -S tets3.i -o test3.S

得到下面的代码 test3.S:

    .file   "test4.c"
   .text
.globl sum
   .type   sum, @function
sum:
   pushl   %ebp
   movl   %esp, %ebp
   movl   12(%ebp), %eax
   movl   8(%ebp), %edx
   leal   (%edx,%eax), %eax
   popl   %ebp
   ret
   .size   sum, .-sum
   .section   .rodata
.LC0:
   .string   "%d\n"
   .text
.globl main
   .type   main, @function
main:
   pushl   %ebp
   movl   %esp, %ebp
   andl   $-16, %esp
   subl   $32, %esp
   movl   $5, 28(%esp)
   movl   $6, 24(%esp)
   movl   24(%esp), %eax
   movl   28(%esp), %edx
   leal   (%edx,%eax), %eax #这里可以看出，没有产生函数调用，而是将sum的代码展开在此
   movl   %eax, %edx
   movl   $.LC0, %eax
   movl   %edx, 4(%esp)
   movl   %eax, (%esp)
   call   printf
   leave
   ret
   .size   main, .-main
   .ident   "GCC: (Ubuntu 4.4.3-4ubuntu5) 4.4.3"
   .section   .note.GNU-stack,"",@progbits

上面可以清楚的看到，编译器将内联函数展开到了函数的调用出，并减少了函数调用产生的效率和压栈操作，所以效率比普通函数调用要高一些

但是比起宏直接转换为立即数肯定还是要低点，毕竟内联函数展开后，代码量都比宏展开的多，很明显。而宏的话在预编译的时候就已经被展开了，在编译时期就是

当成一个普通表达式处理，而无关任何函数调用的事情。

好，现在已经论证完，宏，普通函数，内联函数的差异，以及三者的运行速度问题了。关于三者什么时候用，什么情况下用，后面再讨论吧。

0 0