GCC-3.4.6源代码学习笔记(49)

来源:互联网 发布:惠州有什么好玩的知乎 编辑:程序博客网 时间:2024/04/25 19:46

4.2.10.2.              收集数据

完成创建伪函数上下文的任务后,回到backend_init,跟着的是init_expmedinit_expmed首先调用start_sequence来为rtl的产生做准备。

 

102  void

103  init_expmed (void)                                                                                           in expemd.c

104  {

105    rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;

106    int dummy;

107    int m;

108    enum machine_mode mode, wider_mode;

109 

110     start_sequence ();

 

sequence_stack是由start_sequence保存的待定未完成指令序列形成的栈其中每个栈元素描述了一个待定序列。主指令链保存在最后一个元素中除非栈是空的。

 

38      struct sequence_stack GTY(())                                                                   in function.h

39      {

40        /* First and last insns in the chain of the saved sequence.  */

41        rtx first;

42        rtx last;

43        tree sequence_rtl_expr;

44        struct sequence_stack *next;

45      };

 

cfun中,所有insns(指令的rtx对象)被链作一个列表。在导出insn收集记录信息前,需要记录这个边界,在退出时,我们可以通过end_sequence来丢弃这些insn(构成函数体的指令)。

 

4996 void

4997 start_sequence (void)                                                                                 in emit-rlt.c

4998 {

4999   struct sequence_stack *tem;

5000

5001   if (free_sequence_stack != NULL)

5002   {

5003     tem = free_sequence_stack;

5004     free_sequence_stack = tem->next;

5005   }

5006   else

5007     tem = ggc_alloc (sizeof (struct sequence_stack));

5008

5009   tem->next = seq_stack;

5010   tem->first = first_insn;

5011   tem->last = last_insn;

5012   tem->sequence_rtl_expr = seq_rtl_expr;

5013

5014   seq_stack = tem;

5015

5016   first_insn = 0;

5017   last_insn = 0;

5018 }

 

接着init_expmed从特定表达式收集创建rtx对象的代价。头2个被评估的表达式是(const 0)reg (10000) + reg (10000)

 

init_expmed (continue)

 

112     /* This is "some random pseudo register" for purposes of calling recog

113       to see what insns exist.  */

114     reg = gen_rtx_REG (word_mode, 10000);

115  

116     zero_cost = rtx_cost (const0_rtx, 0);

117     add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);

 

上面在116117行的语句将参加如下的2个临时rtx对象。注意到word_modeinit_emit_once中初始化,对于x86机器,它是Simode的别名。

24:整数及使用寄存器的PLUS表达式的rtx对象

 

819    int

820    rtx_cost (rtx x, enum rtx_code outer_code ATTRIBUTE_UNUSED)                    in cse.c

821    {

822      int i, j;

823      enum rtx_code code;

824      const char *fmt;

825      int total;

826   

827      if (x == 0)

828        return 0;

829   

830     /* Compute the default costs of certain things.

831        Note that targetm.rtx_costs can override the defaults.  */

832   

833      code = GET_CODE (x);

834      switch (code)

835      {

836        case MULT:

837          total = COSTS_N_INSNS (5);

838          break;

839        case DIV:

840        case UDIV:

841        case MOD:

842        case UMOD:

843          total = COSTS_N_INSNS (7);

844          break;

845        case USE:

846          /* Used in loop.c and combine.c as a marker.  */

847          total = 0;

848          break;

849        default:

850          total = COSTS_N_INSNS (1);

851      }

852   

853      switch (code)

854      {

855        case REG:

856          return 0;

857   

858        case SUBREG:

859          /* If we can't tie these modes, make this expensive. The larger

860            the mode, the more expensive it is.  */

861          if (! MODES_TIEABLE_P (GET_MODE (x), GET_MODE (SUBREG_REG (x))))

862            return COSTS_N_INSNS (2

863                             + GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD);

864          break;

865   

866        default:

867          if ((*targetm.rtx_costs) (x, code, outer_code, &total))

868            return total;

869          break;

870      }

871   

872      /* Sum the costs of the sub-rtx's, plus cost of this operation,

873        which is already in total.  */

874   

875      fmt = GET_RTX_FORMAT (code);

876      for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)

877        if (fmt[i] == 'e')

878          total += rtx_cost (XEXP (x, i), code);

879        else if (fmt[i] == 'E')

880          for (j = 0; j < XVECLEN (x, i); j++)

881            total += rtx_cost (XVECEXP (x, i, j), code);

882   

883      return total;

884    }

 

注意到在rtx_cost中算术操作的代价根据表达式通过简单数据粗略确定,不过在866行的第二个switch块,目标机器可以通过指定的函数来得到更准确的数据。在这里这个函数是ix86_rtx_cost

 

15067 static bool

15068 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)                                          in i386.c

15069 {

15070   enum machine_mode mode = GET_MODE (x);

15071

15072   switch (code)

15073   {

15074     case CONST_INT:

15075     case CONST:

15076     case LABEL_REF:

15077     case SYMBOL_REF:

15078       if (TARGET_64BIT && !x86_64_sign_extended_value (x))

15079         *total = 3;

15080       else if (TARGET_64BIT && !x86_64_zero_extended_value (x))

15081         *total = 2;

15082       else if (flag_pic && SYMBOLIC_CONST (x)

15083        && (!TARGET_64BIT

15084        || (!GET_CODE (x) != LABEL_REF

15085            && (GET_CODE (x) != SYMBOL_REF

15086                || !SYMBOL_REF_LOCAL_P (x)))))

15087         *total = 1;

15088       else

15089         *total = 0;

15090       return true;

15091

15092     case CONST_DOUBLE:

15093       if (mode == VOIDmode)

15094         *total = 0;

15095       else

15096         switch (standard_80387_constant_p (x))

15097         {

15098           case 1: /* 0.0 */

15099             *total = 1;

15100             break;

15101           default: /* Other constants */

15102             *total = 2;

15103             break;

15104           case 0:

15105           case -1:

15106           /* Start with (MEM (SYMBOL_REF)), since that's where

15107             it'll probably end up. Add a penalty for size.  */

15108             *total = (COSTS_N_INSNS (1)

15109                   + (flag_pic != 0 && !TARGET_64BIT)

15110                   + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));

15111             break;

15112         }

15113       return true;

15114

15115     case ZERO_EXTEND:

15116       /* The zero extensions is often completely free on x86_64, so make

15117         it as cheap as possible.  */

15118       if (TARGET_64BIT && mode == DImode

15119           && GET_MODE (XEXP (x, 0)) == SImode)

15120         *total = 1;

15121       else if (TARGET_ZERO_EXTEND_WITH_AND)

15122         *total = COSTS_N_INSNS (ix86_cost->add);

15123       else

15124         *total = COSTS_N_INSNS (ix86_cost->movzx);

15125       return false;

15126

15127     case SIGN_EXTEND:

15128       *total = COSTS_N_INSNS (ix86_cost->movsx);

15129       return false;

15130

15131     case ASHIFT:

15132       if (GET_CODE (XEXP (x, 1)) == CONST_INT

15133           && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))

15134       {

15135         HOST_WIDE_INT value = INTVAL (XEXP (x, 1));

15136         if (value == 1)

15137         {

15138           *total = COSTS_N_INSNS (ix86_cost->add);

15139           return false;

15140         }

15141         if ((value == 2 || value == 3)

15142            && !TARGET_DECOMPOSE_LEA

15143            && ix86_cost->lea <= ix86_cost->shift_const)

15144         {

15145           *total = COSTS_N_INSNS (ix86_cost->lea);

15146           return false;

15147         }

15148 }

15149     /* FALLTHRU */

15150

15151     case ROTATE:

15152     case ASHIFTRT:

15153     case LSHIFTRT:

15154     case ROTATERT:

15155       if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)

15156       {

15157         if (GET_CODE (XEXP (x, 1)) == CONST_INT)

15158         {

15159           if (INTVAL (XEXP (x, 1)) > 32)

15160             *total = COSTS_N_INSNS(ix86_cost->shift_const + 2);

15161           else

15162             *total = COSTS_N_INSNS(ix86_cost->shift_const * 2);

15163         }

15164         else

15165         {

15166           if (GET_CODE (XEXP (x, 1)) == AND)

15167             *total = COSTS_N_INSNS(ix86_cost->shift_var * 2);

15168           else

15169             *total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);

15170         }

15171       }

15172       else

15173       {

15174         if (GET_CODE (XEXP (x, 1)) == CONST_INT)

15175           *total = COSTS_N_INSNS (ix86_cost->shift_const);

15176         else

15177           *total = COSTS_N_INSNS (ix86_cost->shift_var);

15178       }

15179       return false;

15180

15181     case MULT:

15182       if (FLOAT_MODE_P (mode))

15183         *total = COSTS_N_INSNS (ix86_cost->fmul);

15184       else if (GET_CODE (XEXP (x, 1)) == CONST_INT)

15185       {

15186         unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));

15187         int nbits;

15188

15189         for (nbits = 0; value != 0; value >>= 1)

15190           nbits++;

15191

15192         *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]

15193                      + nbits * ix86_cost->mult_bit);

15194       }

15195       else

15196       {

15197         /* This is arbitrary */

15198         *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]

15199                      + 7 * ix86_cost->mult_bit);

15200       }

15201       return false;

15202

15203     case DIV:

15204     case UDIV:

15205     case MOD:

15206     case UMOD:

15207       if (FLOAT_MODE_P (mode))

15208         *total = COSTS_N_INSNS (ix86_cost->fdiv);

15209       else

15210         *total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);

15211       return false;

15212

15213     case PLUS:

15214       if (FLOAT_MODE_P (mode))

15215         *total = COSTS_N_INSNS (ix86_cost->fadd);

15216       else if (!TARGET_DECOMPOSE_LEA

15217        && GET_MODE_CLASS (mode) == MODE_INT

15218        && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))

15219       {

15220         if (GET_CODE (XEXP (x, 0)) == PLUS

15221             && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT

15222             && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT

15223             && CONSTANT_P (XEXP (x, 1)))

15224         {

15225           HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));

15226           if (val == 2 || val == 4 || val == 8)

15227           {

15228             *total = COSTS_N_INSNS (ix86_cost->lea);

15229             *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);

15230             *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),

15231                            outer_code);

15232             *total += rtx_cost (XEXP (x, 1), outer_code);

15233             return true;

15234           }

15235         }

15236         else if (GET_CODE (XEXP (x, 0)) == MULT

15237             && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)

15238         {

15239           HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));

15240           if (val == 2 || val == 4 || val == 8)

15241           {

15242             *total = COSTS_N_INSNS (ix86_cost->lea);

15243             *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);

15244             *total += rtx_cost (XEXP (x, 1), outer_code);

15245             return true;

15246           }

15247         }

15248         else if (GET_CODE (XEXP (x, 0)) == PLUS)

15249         {

15250           *total = COSTS_N_INSNS (ix86_cost->lea);

15251           *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);

15252           *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);

15253           *total += rtx_cost (XEXP (x, 1), outer_code);

15254           return true;

15255         }

15256       }

15257       /* FALLTHRU */

15258

15259     case MINUS:

15260       if (FLOAT_MODE_P (mode))

15261       {

15262         *total = COSTS_N_INSNS (ix86_cost->fadd);

15263         return false;

15264       }

15265       /* FALLTHRU */

15266

15267     case AND:

15268     case IOR:

15269     case XOR:

15270       if (!TARGET_64BIT && mode == DImode)

15271       {

15272         *total = (COSTS_N_INSNS (ix86_cost->add) * 2

15273                + (rtx_cost (XEXP (x, 0), outer_code)

15274                << (GET_MODE (XEXP (x, 0)) != DImode))

15275                + (rtx_cost (XEXP (x, 1), outer_code)

15276                << (GET_MODE (XEXP (x, 1)) != DImode)));

15277         return true;

15278       }

15279       /* FALLTHRU */

15280

15281     case NEG:

15282       if (FLOAT_MODE_P (mode))

15283       {

15284         *total = COSTS_N_INSNS (ix86_cost->fchs);

15285         return false;

15286       }

15287       /* FALLTHRU */

15288

15289     case NOT:

15290       if (!TARGET_64BIT && mode == DImode)

15291         *total = COSTS_N_INSNS (ix86_cost->add * 2);

15292       else

15293         *total = COSTS_N_INSNS (ix86_cost->add);

15294       return false;

15295

15296     case FLOAT_EXTEND:

15297       if (!TARGET_SSE_MATH

15298           || mode == XFmode

15299           || (mode == DFmode && !TARGET_SSE2))

15300         *total = 0;

15301         return false;

15302

15303     case ABS:

15304       if (FLOAT_MODE_P (mode))

15305         *total = COSTS_N_INSNS (ix86_cost->fabs);

15306       return false;

15307

15308     case SQRT:

15309       if (FLOAT_MODE_P (mode))

15310         *total = COSTS_N_INSNS (ix86_cost->fsqrt);

15311       return false;

15312

15313     case UNSPEC:

15314       if (XINT (x, 1) == UNSPEC_TP)

15315         *total = 0;

15316       return false;

15317

15318     default:

15319       return false;

15320   }

15321 }

 

对于常量0,它在rtx_cost867行处进入ix86_rtx_costs,并且在ix86_rtx_costs中,对于32位的x86系统,它在15091行返回value = 0。然后rtx_cost868行返回并将zero_cost赋为0

对于reg (10000) + reg (10000),它也是在rtx_cost867行处进入ix86_rtx_costs,而且满足在15214行的条件,一路往下执行直到15294行。ix86_cost是预先定义的结构体,记录了特定芯片上特定操作的代价。例如pentium,加法的代价是1。最后得到总的代价为4。注意到当ix86_rtx_costs返回时,在rtx_cost里亦是一直执行到875行。对于rtl编码PLUS,其格式是ee,表明其2个孩子均为表达式。对于我们这里的表达式,这2个孩子都是寄存器,它们在855行返回值0。最后add_cost得到值4

然后init_expmed继续获取关于移位(shift),移位加(shift-add)及移位减(shift-minus)表达式的代价信息。

 

init_expmed (continue)

 

119     shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,

120                              gen_rtx_ASHIFT (word_mode, reg,

121                                            const0_rtx)));

122 

123    shiftadd_insn

124      = emit_insn (gen_rtx_SET (VOIDmode, reg,

125                      gen_rtx_PLUS (word_mode,

126                                  gen_rtx_MULT (word_mode,

127                                              reg, const0_rtx),

128                                  reg)));

129 

130    shiftsub_insn

131      = emit_insn (gen_rtx_SET (VOIDmode, reg,

132                      gen_rtx_MINUS (word_mode,

133                                   gen_rtx_MULT (word_mode,

134                                               reg, const0_rtx),

135                                   reg)));

136 

137    init_recog ();

 

gen_rtx_SETgen_rtx_ASHIFTgen_rtx_MULTgen_rtx_MINUS,正如我们所期望的,都调用了gen_rtx_fmt_ee所创建的rtx对象具有2个表达式形式的孩子。对于上面的代码,以下对象将被创建。

 

 

25:移位操作的rtx对象

 

26:移位加操作的rtx对象

27:移位减操作的rtx对象

上面的rtx对象都是指令的样式。在编译过程中标准的rtx对象是insn,它是高级编程语言中的指令,语句或表达式的对等物。emit_insnrtx对象中创建insn对象来代表函数体。所有的insn对象都通过双向链表来组织,以在源代码中出现顺序排列。

 

4656 rtx

4657 emit_insn (rtx x)                                                                                       in emit-rtl.c

4658 {

4659   rtx last = last_insn;

4660   rtx insn;

4661

4662   if (x == NULL_RTX)

4663     return last;

4664

4665   switch (GET_CODE (x))

4666   {

4667     case INSN:

4668     case JUMP_INSN:

4669     case CALL_INSN:

4670     case CODE_LABEL:

4671     case BARRIER:

4672     case NOTE:

4673       insn = x;

4674       while (insn)

4675       {

4676         rtx next = NEXT_INSN (insn);

4677         add_insn (insn);

4678         last = insn;

4679         insn = next;

4680       }

4681       break;

4682

4683 #ifdef ENABLE_RTL_CHECKING

4684     case SEQUENCE:

4685       abort ();

4686       break;

4687 #endif

4688

4689     default:

4690       last = make_insn_raw (x);

4691       add_insn (last);

4692       break;

4693   }

4694

4695   return last;

4696 }

 

对于我们的案例,make_insn_raw被调用来创建insn对象。

 

3459 rtx

3460 make_insn_raw (rtx pattern)                                                                       in emit-rtl.c

3461 {

3462   rtx insn;

3463

3464   insn = rtx_alloc (INSN);

3465

3466   INSN_UID (insn) = cur_insn_uid++;

3467   PATTERN (insn) = pattern;

3468   INSN_CODE (insn) = -1;

3469   LOG_LINKS (insn) = NULL;

3470   REG_NOTES (insn) = NULL;

3471   INSN_LOCATOR (insn) = 0;

3472   BLOCK_FOR_INSN (insn) = NULL;

3473

3474 #ifdef ENABLE_RTL_CHECKING

3475   if (insn

3476       && INSN_P (insn)

3477       && (returnjump_p (insn)

3478         || (GET_CODE (insn) == SET

3479           && SET_DEST (insn) == pc_rtx)))

3480   {

3481     warning ("ICE: emit_insn used where emit_jump_insn needed:/n");

3482     debug_rtx (insn);

3483   }

3484 #endif

3485

3486   return insn;

3487 }

 

下面是以上所用到的一些宏的定义,它们都用于insn对象上,注意到insn也是一个rtx

 

561    /* Holds a unique number for each insn.

562      These are not necessarily sequentially increasing.  */

563    #define INSN_UID(INSN)  XINT (INSN, 0)                                                   in rtl.h

564   

565    /* Chain insns together in sequence.  */

566    #define PREV_INSN(INSN)     XEXP (INSN, 1)

567    #define NEXT_INSN(INSN)     XEXP (INSN, 2)

568   

569    #define BLOCK_FOR_INSN(INSN) XBBDEF (INSN, 3)

570    #define INSN_LOCATOR(INSN) XINT (INSN, 4)

571    /* The body of an insn.  */

572    #define PATTERN(INSN)   XEXP (INSN, 5)

573   

574    /* Code number of instruction, from when it was recognized.

575      -1 means this instruction has not been recognized yet.  */

576    #define INSN_CODE(INSN) XINT (INSN, 6)

577   

578    /* Set up in flow.c; empty before then.

579      Holds a chain of INSN_LIST rtx's whose first operands point at

580      previous insns with direct data-flow connections to this one.

581      That means that those insns set variables whose next use is in this insn.

582      They are always in the same basic block as this insn.  */

583    #define LOG_LINKS(INSN)     XEXP (INSN, 7)

584 

585    /* Holds a list of notes on what this insn does to various REGs.

586      It is a chain of EXPR_LIST rtx's, where the second operand is the

587      chain pointer and the first operand is the REG being described.

588      The mode field of the EXPR_LIST contains not a real machine mode

589      but a value from enum reg_note.  */

590   

591    #define REG_NOTES(INSN)    XEXP (INSN, 8)

 

4691行,emit_insnadd_insn把所创建的insn对象链入cfun对象中。

 

3534 void

3535 add_insn (rtx insn)                                                                                    in emit-rtl.c

3536 {

3537   PREV_INSN (insn) = last_insn;

3538   NEXT_INSN (insn) = 0;

3539

3540   if (NULL != last_insn)

3541     NEXT_INSN (last_insn) = insn;

3542

3543   if (NULL == first_insn)

3544     first_insn = insn;

3545

3546   last_insn = insn;

3547 }

 

从上面的代码,可以看到insn对象应该看起来像下面那样。

 

28:指令的rtx对象

init_expmed 137行,init_reg仅把全局变量volatile_ok设为1,这个变量不为0表示允许操作数为volatile。然后init_expmed跟着来收集表达式的代价。在这以后,还需要评估取反,除法及取模的表达式。

 

init_expmed (continue)

 

139    shift_cost[0] = 0;

140    shiftadd_cost[0] = shiftsub_cost[0] = add_cost;

141 

142    for (m = 1; m < MAX_BITS_PER_WORD; m++)

143    {

144      rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);

145      shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;

146 

147      XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);

148      if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)

149        shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);

150 

151      XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;

152      if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)

153        shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);

154 

155      XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;

156      if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)

157        shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);

158    }

159 

160    negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);

161 

162    sdiv_pow2_cheap

163      = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)

164         <= 2 * add_cost);

165    smod_pow2_cheap

166      = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)

167         <= 2 * add_cost);

 

SET_SRC执行检查以确保RTX_CODESET

 

542    #define XCEXP(RTX, N, C) (RTL_CHECKC1 (RTX, N, C).rtx)                         in rtl.h

1245   #define SET_SRC(RTX) XCEXP(RTX, 1, SET)

 

在上面,对于32x86系统,已经得到add_cost4。对于移位为0的移位操作,实际上无事可做,因此其代价应该为0。因而具有0移位的移位加/减操作的代价应该等同于add_cost。这正是139~140行的目的。

上面的recog是由工具genrecog通过机器描述文件(这里是i386.md)生成的。recog的返回值是insn-code,如果为-1,则表明该insn不能被识别。

可以看到对于不能识别的insn,其代价被设为32000,一个非常大的值。如果insn被识别,在它的样式中(上面图中所显示的rtx对象),rtx对象const_0rtx对象const_`m`所替代,并尝试评估其代价。注意到被评估的东西是rtx对象SET的第二个孩子。

rtx_cost的帮助下,我们可以获得以下的信息(以pentium4为例):

shift_cost [1] = add_cost = 4

shift_cost [2] = shift_cost [3] = lea cost = 4

shift_cost [4] … shift_cost [31] = constant shift cost = 16

shiftadd_cost [1] (with multiplicator = 1 << 1) = lea cost = 4

shiftadd_cost [2] (with multiplicator = 1 << 2) = lea cost = 4

shiftadd_cost [3] (with multiplicator = 1 << 3) = lea cost = 4

shiftadd_cost [with other multiplicator] = mult cost + add cost = 64

shiftsub_cost [n] = add cost + mult cost = 64

在上面的160行,如下rtx对象被创建。

29NEGDIVMODrtx对象

并且又得到

neg_cost = add_cost = 4

div_cost = 224 > add_cost * 2, sdiv_pow2_cheap = false

mod_cost = 224 > add_cost * 2, sdiv_pow2_cheap = false

接下来,因为不同机器模式乘法和除法操作可能会有不同的代价,并且整数类型是最常用到的,我们把这些值保存到静态变量中。

 

init_expmed (continue)

 

169    for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);

170         mode != VOIDmode;

171         mode = GET_MODE_WIDER_MODE (mode))

172    {

173      reg = gen_rtx_REG (mode, 10000);

174      div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);

175      mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);

176      wider_mode = GET_MODE_WIDER_MODE (mode);

177      if (wider_mode != VOIDmode)

178      {

179        mul_widen_cost[(int) wider_mode]

180          = rtx_cost (gen_rtx_MULT (wider_mode,

181                             gen_rtx_ZERO_EXTEND (wider_mode, reg),

182                             gen_rtx_ZERO_EXTEND (wider_mode, reg)),

183                SET);

184        mul_highpart_cost[(int) mode]

185          = rtx_cost (gen_rtx_TRUNCATE

186                (mode,

187                gen_rtx_LSHIFTRT (wider_mode,

188                                 gen_rtx_MULT (wider_mode,

189                                            gen_rtx_ZERO_EXTEND

190                                            (wider_mode, reg),

191                                            gen_rtx_ZERO_EXTEND

192                                            (wider_mode, reg)),

193                                 GEN_INT (GET_MODE_BITSIZE (mode)))),

194                SET);

195      }

196   }

197 

198    end_sequence ();

199  }

 

上面,对于pentium4div_costmul_cost对于所有的整型模式都是相同的,它们都是224179行,mul_widen_cost记录了有模式提升(mode promotion)的乘法操作。它们是对以下rtx对象评估的结果(以Simode为例)。

30SImode的乘法的rtx对象

对于pentium4,我们得到mul_widen_cost [mode]都是232mul_highpart_cost,对于SImode264,其余为248。在完成所有这些操作后,init_expmed调用end_sequence来恢复之前保存的状态。

原创粉丝点击