GCC-3.4.6源代码学习笔记(178)

来源:互联网 发布:福建安全知识网络竞赛 编辑:程序博客网 时间:2024/05/29 10:06

5.13.5.3.2.2.3.         普通的内联函数

处理完了强制内联函数(使用“always_inline”)后,剩下的函数由编译器便宜行事。只要我们不通过编译选项-fno-inline明确禁止函数内联,1326行的flag_really_no_inline就是0

 

cgraph_decide_inline (continue)

 

1320 #ifdef ENABLE_CHECKING

1321   for (node = cgraph_nodes;node; node = node->next)

1322     if (node->aux || node->output)

1323       abort ();

1324 #endif

1325

1326   if (!flag_really_no_inline)

1327   {

1328     cgraph_decide_inlining_of_small_functions(inlined, inlined_callees);

1329 #ifdef ENABLE_CHECKING

1330     for (node =cgraph_nodes;node; node = node->next)

1331      if (node->aux || node->output)

1332         abort ();

1333 #endif

1334

1335     if (cgraph_dump_file)

1336       fprintf (cgraph_dump_file, "/nDecidingon functions called once:/n");

 

这里,参数inlinedinlined_callees作为缓存使用,其在前面所设置的内容无关重要,将在后面被改写。下面的分析中使用了Fibonacci heaps来根据展开的行数排序。关于Fibonacci heaps的细节这里不深究。

 

1100       static void

1101       cgraph_decide_inlining_of_small_functions (struct cgraph_node**inlined,      in cgraphunit.c

1102                                           structcgraph_node **inlined_callees)

1103       {

1104        int i;

1105        struct cgraph_node *node;

1106        fibheap_t heap = fibheap_new ();

1107        struct fibnode **heap_node =

1108          xcalloc (cgraph_max_uid,sizeof (structfibnode *));

1109        int ninlined, ninlined_callees;

1110  int max_insns = ((HOST_WIDEST_INT) initial_insns

1111              * (100 + PARAM_VALUE(PARAM_INLINE_UNIT_GROWTH)) / 100);

1112

1113   /* Put all inline candidates into the heap.  */

1114

1115   for (node = cgraph_nodes; node; node = node->next)

1116  {

1117    if (!node->local.inlinable || !node->callers

1118        || node->local.disregard_inline_limits)

1119      continue;

1120      

1121          if (!cgraph_default_inline_p(node))

1122          {

1123            cgraph_set_inline_failed (node,

1124                                 N_("--param max-inline-insns-singlelimit reached"));

1125            continue;

1126          }

1127          heap_node[node->uid] =

1128                 fibheap_insert (heap, cgraph_estimate_growth (node), node);

1129        }

 

因为现在是编译器做主,那么函数能否内联,除了它本身适合内联外,还取决于其展开大小。对于声明为“inline”的函数,这个限制是MAX_INLINE_INSNS_SINGLE(默认为500),对于没有声明为“inline”的函数,其限制则是MAX_INLINE_INSNS_AUTO(默认为100)。注意,单位是指令,不是行数。

 

1068 static bool

1069 cgraph_default_inline_p (struct cgraph_node *n)                                   in cgraphunit.c

1070 {

1071   if (!DECL_INLINE (n->decl) ||!DECL_SAVED_TREE (n->decl))

1072     return false;

1073   if (DECL_DECLARED_INLINE_P (n->decl))

1074     returnn->global.insns < MAX_INLINE_INSNS_SINGLE;

1075   else

1076     returnn->global.insns < MAX_INLINE_INSNS_AUTO;

1077 }

 

对于不能内联的函数,同时要把其失败的原因告诉其调用者,除非调用者已经被强制内联了(彼时,其inline_failedNULL)。

 

1081 static void

1082 cgraph_set_inline_failed (struct cgraph_node *node, constchar *reason)        in cgraphunit.c

1083 {

1084   structcgraph_edge *e;

1085

1086   if (cgraph_dump_file)

1087     fprintf (cgraph_dump_file, "Inliningfailed: %s/n", reason);

1088   for (e = node->callers; e; e = e->next_caller)

1089     if (e->inline_failed)

1090       e->inline_failed = reason;

1091 }

 

一旦函数的大小在限制之内,就把它加入Fibonacci heaps,不过作为键值使用的,则是展开它所带来的指令数的增加量。

 

918  static int

919  cgraph_estimate_growth (struct cgraph_node *node)                             in cgraphunit.c

920  {

921    int growth = 0;

922    int calls_saved = 0;

923    int clones_added = 0;

924    structcgraph_edge *e;

925 

926    for (e = node->callers; e; e = e->next_caller)

927      if (e->inline_failed)

928      {

929        growth += ((cgraph_estimate_size_after_inlining(1, e->caller, node)

930                  -

931                  e->caller->global.insns)*e->caller->global.cloned_times);

932        calls_saved +=e->caller->global.cloned_times;

933        clones_added +=e->caller->global.cloned_times;

934      }

935 

936    /* ??? Wrong forself recursive functions or cases where we decide to not

937      inline for different reasons, but it is notbig deal as in that case

938      we will keep thebody around, but we will also avoid some inlining.  */

939    if (!node->needed &&!node->origin && !DECL_EXTERNAL (node->decl))

940      growth -= node->global.insns,clones_added--;

941 

942    if (!calls_saved)

943      calls_saved = 1;

944 

945    returngrowth;

946  }

 

不过因为现在还没有对调用者、被调用者进行分析,这是个比较粗略的估算(没有考虑该函数所直接、间接调用函数的大小)。随着下面对函数分析的深入,这个估算会趋向准确。也正是因为下面将频繁改写节点的键值,出于效率考虑,采用了Fibonacci heaps

1133行,fibheap_extract_minFibonacci heaps中移出键值最小的节点。在一开始,这个键值仅是该函数本身的指令数乘以被调用的次数。显然,从键值最小的函数开始处理,是很好的开始,它最有可能是在调用栈最底层的函数。

 

cgraph_decide_inlining_of_small_functions (continue)

 

1131        if (cgraph_dump_file)

1132          fprintf (cgraph_dump_file,"/nDeciding on smaller functions:/n");

1133        while (overall_insns <= max_insns&& (node = fibheap_extract_min (heap)))

1134        {

1135          struct cgraph_edge *e;

1136          int old_insns = overall_insns;

1137      

1138          heap_node[node->uid] = NULL;

1139          if (cgraph_dump_file)

1140             fprintf (cgraph_dump_file,

1141                   "/nConsidering %s with %i insns/n"

1142                   " Estimated growth is %+i insns./n",

1143                   cgraph_node_name (node),node->global.insns,

1144                   cgraph_estimate_growth (node));

1145          if (!cgraph_default_inline_p(node))

1146          {

1147            cgraph_set_inline_failed (node,

1148               N_("--param max-inline-insns-singlelimit reached after inlining into the callee"));

1149            continue;

1150          }

1151          ninlined_callees = cgraph_inlined_callees(node, inlined_callees);

1152           for (e = node->callers; e; e = e->next_caller)

1153             if (e->inline_failed)

1154            {

1155              /* Marking recursive function inlinine has sane semanticand

1156                thus we should notwarn on it.  */

1157              if(e->caller == node)

1158              {

1159                e->inline_failed= "";

1160                continue;

1161              }

1162              ninlined = cgraph_inlined_into (e->caller, inlined);

1163              if(e->callee->output)

1164                e->inline_failed = "";

1165              if(e->callee->output

1166                  || !cgraph_check_inline_limits(e->caller, node, inlined,

1167                                           ninlined,&e->inline_failed))

1168               {

1169                 for(i = 0; i < ninlined; i++)

1170                   inlined[i]->output = 0, inlined[i]->aux= 0;

1171                 if (cgraph_dump_file)

1172                   fprintf (cgraph_dump_file, " Not inlining into%s./n",

1173                         cgraph_node_name (e->caller));

1174                 continue;

1175               }

1176              cgraph_mark_inline (e->caller, node, inlined,ninlined,

1177                                inlined_callees, ninlined_callees);

1178              if(heap_node[e->caller->uid])

1179                fibheap_replace_key (heap,heap_node[e->caller->uid],

1180                                  cgraph_estimate_growth(e->caller));

1181      

1182              /* Size of the functions we updated into has changed, soupdate

1183                the keys.  */

1184               for (i = 0; i < ninlined; i++)

1185               {

1186                 inlined[i]->output = 0,inlined[i]->aux = 0;

1187                if(heap_node[inlined[i]->uid])

1188                   fibheap_replace_key (heap,heap_node[inlined[i]->uid],

1189                                    cgraph_estimate_growth(inlined[i]));

1190              }

1191              if (cgraph_dump_file)

1192                fprintf (cgraph_dump_file,

1193                      " Inlined into %s which now has %iinsns./n",

1194                       cgraph_node_name (e->caller),

1195                       e->caller->global.insns);

1196            }

1197

1198          /*Similarly all functions called by the function we just inlined

1199             are now called more times; update keys.  */

1200      

1201    for (e = node->callees; e; e = e->next_callee)

1202      if (e->inline_failed &&heap_node[e->callee->uid])

1203        fibheap_replace_key(heap, heap_node[e->callee->uid],

1204                          cgraph_estimate_growth(e->callee));

1205

1206    for (i = 0;i < ninlined_callees; i++)

1207    {

1208      struct cgraph_edge *e;

1209

1210      for (e = inlined_callees[i]->callees; e; e =e->next_callee)

1211              if (e->inline_failed &&heap_node[e->callee->uid])

1212          fibheap_replace_key (heap,heap_node[e->callee->uid],

1213                            cgraph_estimate_growth(e->callee));

1214

1215      inlined_callees[i]->output = 0;

1216      inlined_callees[i]->aux = 0;

1217    }

1218    if (cgraph_dump_file)

1219      fprintf (cgraph_dump_file,

1220            " Inlined %i times for a net change of%+i insns./n",

1221            node->global.cloned_times, overall_insns- old_insns);

1222  }

1223  while ((node= fibheap_extract_min (heap)) != NULL)

1224    if(!node->local.disregard_inline_limits)

1225      cgraph_set_inline_failed(node, N_("--param inline-unit-growth limit reached"));

1226  fibheap_delete (heap);

1227  free (heap_node);

1228}

 

除了给估算展开大小比较小的函数优先处理外,编译器对内联函数基本上一视同仁,一旦展开到了一定程度,就不再允许内联了。这个规则,一方面是总体指令数的增加率,它由1133行的max_insns控制(默认情况下,编译器允许指令数50%的增长);另一方面则体现在cgraph_check_inline_limits中。

 

1018 static bool

1019 cgraph_check_inline_limits (struct cgraph_node *to, structcgraph_node *what,

1020                        structcgraph_node **inlined, int ninlined,

1021                        const char**reason)

1022 {

1023   int i;

1024   int times = 0;

1025   structcgraph_edge *e;

1026   int newsize;

1027   int limit;

1028

1029   for (e =to->callees; e; e = e->next_callee)

1030     if (e->callee == what)

1031       times++;

1032

1033   /* When inlininglarge function body called once into small function,

1034     take the inlinedfunction as base for limiting the growth. */

1035   if (to->local.self_insns >what->local.self_insns)

1036     limit = to->local.self_insns;

1037   else

1038     limit = what->local.self_insns;

1039

1040   limit += limit * PARAM_VALUE(PARAM_LARGE_FUNCTION_GROWTH) / 100;

1041

1042   newsize = cgraph_estimate_size_after_inlining(times, to, what);

1043   if (newsize > PARAM_VALUE(PARAM_LARGE_FUNCTION_INSNS)

1044       && newsize > limit)

1045   {

1046     *reason = N_("--paramlarge-function-growth limit reached");

1047     returnfalse;

1048   }

1049   for (i = 0; i< ninlined; i++)

1050   {

1051     newsize =

1052        cgraph_estimate_size_after_inlining(INLINED_TIMES (inlined[i]) *

1053                                      times, inlined[i], what);

1054     if (newsize > PARAM_VALUE(PARAM_LARGE_FUNCTION_INSNS)

1055         && newsize >

1056           inlined[i]->local.self_insns *

1057           (100 + PARAM_VALUE(PARAM_LARGE_FUNCTION_GROWTH)) / 100)

1058     {

1059       *reason = N_("--paramlarge-function-growth limit reached while inlining the caller");

1060       return false;

1061     }

1062   }

1063   return true;

1064 }

 

PARAM_LARGE_FUNCTION_GROWTH用于控制因为内联了大函数而导致该部分指令数增长的百分率(默认是100%),显然如果该大函数被内联超过1次就会超标。而PARAM_LARGE_FUNCTION_INSNS则是一个值,当函数的估算指令数超过这个值,就被认为是大函数。

1163行的output,在目前的情形下,它是由1151行的cgraph_inlined_callees设置的因为一开始output都是0,而且在1152行的循环中,每次处理完都会重置相关节点的output域(11701186行),因此如果现在发现被调用函数的output已经设置了,显然还有别的函数内联了该函数,那么当前函数就不能被其调用者内联了(但依然内联这个被调用函数),以防止这个被调用函数的展开次数出现指数级增长。

一旦当前函数通过了11651166行的检查,就认为可以内联了,通过cgraph_mark_inline来更新相应的参数。作为结果,其调用者展开它的代价也就变了,所以还需要更新Heaps中节点(如果还在的话)。

cgraph_decide_inlining_of_small_functions的最后,如果指令增长率超过了预设,那么剩下的函数一概不许内联。因为heaps以展开指令数排序,可以预见剩下的都是比较大的函数。

 

cgraph_decide_inline (continue)

 

1338     /* And finallydecide what functions are called once. */

1339

1340     for (i = nnodes - 1; i >= 0; i--)

1341     {

1342       node = order[i];

1343

1344       if(node->callers && !node->callers->next_caller &&!node->needed

1345           && node->local.inlinable&& node->callers->inline_failed

1346           && !DECL_EXTERNAL (node->decl)&& !DECL_COMDAT (node->decl))

1347      {

1348         bool ok = true;

1349         struct cgraph_node *node1;

1350

1351         /* Verify that we won't duplicate the caller.  */

1352         for (node1 = node->callers->caller;

1353             node1->callers &&!node1->callers->inline_failed

1354             && ok; node1 =node1->callers->caller)

1355           if (node1->callers->next_caller|| node1->needed)

1356             ok = false;

1357         if (ok)

1358         {

1359           const char*dummy_reason;

1360           if (cgraph_dump_file)

1361             fprintf (cgraph_dump_file,

1362                   "/nConsidering %s %i insns./n"

1363                    "Called once from %s %i insns./n",

1364                     cgraph_node_name (node),node->global.insns,

1365                     cgraph_node_name(node->callers->caller),

1366                     node->callers->caller->global.insns);

1367           ninlined = cgraph_inlined_into(node->callers->caller,

1368                                     inlined);

1369           old_insns = overall_insns;

1370

1371           /* Inliningfunctions once would never cause inlining warnings.  */

1372           if (cgraph_check_inline_limits

1373               (node->callers->caller, node,inlined, ninlined,

1374                &dummy_reason))

1375          {

1376             ninlined_callees =

1377                    cgraph_inlined_callees (node,inlined_callees);

1378             cgraph_mark_inline(node->callers->caller, node, inlined,

1379                               ninlined, inlined_callees,

1380                               ninlined_callees);

1381             for (y =0; y < ninlined_callees; y++)

1382               inlined_callees[y]->output =0, inlined_callees[y]->aux = 0;

1383             if(cgraph_dump_file)

1384               fprintf (cgraph_dump_file,

1385                     " Inlined into %s which now has %iinsns"

1386                     " for a net change of %+i insns./n",

1387                     cgraph_node_name (node->callers->caller),

1388                     node->callers->caller->global.insns,

1389                     overall_insns - old_insns);

1390           }

1391           else

1392          {

1393             if (cgraph_dump_file)

1394               fprintf (cgraph_dump_file,

1395                     " Inline limit reached, notinlined./n");

1396           }

1397          for(y = 0; y < ninlined; y++)

1398            inlined[y]->output = 0,inlined[y]->aux = 0;

1399         }

1400       }

1401     }

1402   }

1403   cgraph_remove_unreachable_nodes ();

1404

1405   if (cgraph_dump_file)

1406     fprintf (cgraph_dump_file,

1407           "/nInlined %i calls, eliminated %ifunctions, "

1408           "%i insns turned to %iinsns./n/n",

1409           ncalls_inlined, nfunctions_inlined, initial_insns,

1410           overall_insns);

1411   free (order);

1412   free (inlined);

1413   free (inlined_callees);

1414 }

 

回到cgraph_decide_inline,对于只被调用一次,而又在上面被冤杀的小函数,编译器还是要网开一面,毕竟内联这些小函数的利益还是比较大。在1403行,还要进行可访问分析,移除不需要的函数。

5.13.5.3.2.3. 设置cgraph_global_info_ready

回到cgraph_optimize,在上面的处理中cgraph_node节点的global部分已经得到设置,在1596行,设置cgraph_global_info_ready来显示这一事实。