-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

ifcvt.c is sometimes over-aggressive in speculating instructions from a
not-predicted path.

Given:

        if (test) goto E; // x not live
        x = big();
        goto L;
        E:
        x = b;
        goto M;


ifcvt wants to turn it into:

        x = b;
        if (test) goto M;
        x = big();
        goto L;

ie, we speculate x = b and remove a branch.

Similarly:

        if (test) goto over; // x not live
        x = a;
        goto label;
        over:

   becomes

        x = a;
        if (! test) goto label;


where again we speculate x = a and eliminate a branch.


ifcvt has tests to see if the code to speculate is cheap relative to the
cost of the branch we get to remove.  Unfortunately, that only takes
into account a static RTX cost.   We need to be looking at the branch
prediction too -- often the branch we're going to eliminate isn't going
to be executed at all!

Specifically, we should take the cost of the branch we want to eliminate
and scale that by how often we expect to reach that branch at runtime.
That allows us to compare the runtime cost of the speculative code vs
the runtime benefit of eliminating the branch.

Looking at branch & insn counts before/after that change is quite
interesting.   I've got gcc-4.6 built with/without the attached change.
 I then use that gcc-4.6 to compile a bunch of .i files under the
watchful eye of valgrind.

Using this data we can see the actual costs...  For every dynamic branch
eliminated, we had to execute an additional 300 instructions!
Again, remember these are dynamic counts, so we may have only speculated
one static instruction to eliminate one branch, but we hit that
speculated instruction far more often dynamically than the branch we
ultimately eliminated.



instructions w/o patch:1267286482371
instructions w   patch:1264140292731

branches w/o     patch: 231180206508
branches w       patch: 231190636813

Bootstrapped and regression tested on x86_64-unknown-linux-gnu.  OK for
trunk?



-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJOSt2sAAoJEBRtltQi2kC7UZUIAJ7fthVsCXxU3JOtIVbUSX5t
grCG73peQnBB7FhB58/jW1GJWc011mExLIJf74FDrNU+gMp3gn01L0zdjcaytmY6
sNjso7dLjW42a/wByzNlHNUy2KRMUqhobEhHYWgC0tMJFz8/ekCulI7h98pVISmT
np9G/1zRXn3uD7F3pKw7lLDS994nSUmjObPFIyFxTfVGhBTWZYY8JjKP7NsOCNli
Dr2BXFF4rahoSDUlcLHwPPBJPABLvxwvMo0dsmNkB3HEiajj7qVPGUYaGrTJ5M1g
Bvww+ozzJRT96qQ/smjVZutD2Cu/U74Ix6EX8Yj54Zp2HeX8tCJ3kXWPFI6cBpk=
=3BEA
-----END PGP SIGNATURE-----
Index: ifcvt.c
===================================================================
*** ifcvt.c     (revision 177629)
--- ifcvt.c     (working copy)
*************** static int cond_exec_changed_p;
*** 85,91 ****
  
  /* Forward references.  */
  static int count_bb_insns (const_basic_block);
! static bool cheap_bb_rtx_cost_p (const_basic_block, int);
  static rtx first_active_insn (basic_block);
  static rtx last_active_insn (basic_block, int);
  static rtx find_active_insn_before (basic_block, rtx);
--- 85,91 ----
  
  /* Forward references.  */
  static int count_bb_insns (const_basic_block);
! static bool cheap_bb_rtx_cost_p (const_basic_block, int, int);
  static rtx first_active_insn (basic_block);
  static rtx last_active_insn (basic_block, int);
  static rtx find_active_insn_before (basic_block, rtx);
*************** count_bb_insns (const_basic_block bb)
*** 131,150 ****
  
  /* Determine whether the total insn_rtx_cost on non-jump insns in
     basic block BB is less than MAX_COST.  This function returns
!    false if the cost of any instruction could not be estimated.  */
  
  static bool
! cheap_bb_rtx_cost_p (const_basic_block bb, int max_cost)
  {
    int count = 0;
    rtx insn = BB_HEAD (bb);
    bool speed = optimize_bb_for_speed_p (bb);
  
    while (1)
      {
        if (NONJUMP_INSN_P (insn))
        {
!         int cost = insn_rtx_cost (PATTERN (insn), speed);
          if (cost == 0)
            return false;
  
--- 131,161 ----
  
  /* Determine whether the total insn_rtx_cost on non-jump insns in
     basic block BB is less than MAX_COST.  This function returns
!    false if the cost of any instruction could not be estimated. 
! 
!    The cost of the non-jump insns in BB is scaled by REG_BR_PROB_BASE
!    as those insns are being speculated.  MAX_COST is scaled with SCALE
!    plus a small fudge factor.  */
  
  static bool
! cheap_bb_rtx_cost_p (const_basic_block bb, int scale, int max_cost)
  {
    int count = 0;
    rtx insn = BB_HEAD (bb);
    bool speed = optimize_bb_for_speed_p (bb);
  
+   /* Our branch probability/scaling factors are just estimates and don't
+      account for cases where we can get speculation for free and other
+      secondary benefits.  So we fudge the scale factor to make speculating
+      appear a little more profitable.  */
+   scale += REG_BR_PROB_BASE / 8;
+   max_cost *= scale;
+ 
    while (1)
      {
        if (NONJUMP_INSN_P (insn))
        {
!         int cost = insn_rtx_cost (PATTERN (insn), speed) * REG_BR_PROB_BASE;
          if (cost == 0)
            return false;
  
*************** find_if_case_1 (basic_block test_bb, edg
*** 3796,3802 ****
    basic_block then_bb = then_edge->dest;
    basic_block else_bb = else_edge->dest;
    basic_block new_bb;
!   int then_bb_index;
  
    /* If we are partitioning hot/cold basic blocks, we don't want to
       mess up unconditional or indirect jumps that cross between hot
--- 3807,3814 ----
    basic_block then_bb = then_edge->dest;
    basic_block else_bb = else_edge->dest;
    basic_block new_bb;
!   int then_bb_index, then_prob;
!   rtx note;
  
    /* If we are partitioning hot/cold basic blocks, we don't want to
       mess up unconditional or indirect jumps that cross between hot
*************** find_if_case_1 (basic_block test_bb, edg
*** 3839,3846 ****
             "\nIF-CASE-1 found, start %d, then %d\n",
             test_bb->index, then_bb->index);
  
!   /* THEN is small.  */
!   if (! cheap_bb_rtx_cost_p (then_bb,
        COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (then_edge->src),
                                    predictable_edge_p (then_edge)))))
      return FALSE;
--- 3851,3865 ----
             "\nIF-CASE-1 found, start %d, then %d\n",
             test_bb->index, then_bb->index);
  
!   note = find_reg_note (BB_END (test_bb), REG_BR_PROB, NULL_RTX);
!   if (!note)
!     then_prob = REG_BR_PROB_BASE / 2;
!   else
!     then_prob = REG_BR_PROB_BASE - INTVAL (XEXP (note, 0));
! 
!   /* We're speculating from the THEN path, we want to make sure the cost
!      of speculation is within reason.  */
!   if (! cheap_bb_rtx_cost_p (then_bb, then_prob,
        COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (then_edge->src),
                                    predictable_edge_p (then_edge)))))
      return FALSE;
*************** find_if_case_2 (basic_block test_bb, edg
*** 3899,3904 ****
--- 3918,3924 ----
    basic_block then_bb = then_edge->dest;
    basic_block else_bb = else_edge->dest;
    edge else_succ;
+   int then_prob, else_prob;
    rtx note;
  
    /* If we are partitioning hot/cold basic blocks, we don't want to
*************** find_if_case_2 (basic_block test_bb, edg
*** 3938,3946 ****
    if (then_bb->index < NUM_FIXED_BLOCKS)
      return FALSE;
  
-   /* ELSE is predicted or SUCC(ELSE) postdominates THEN.  */
    note = find_reg_note (BB_END (test_bb), REG_BR_PROB, NULL_RTX);
!   if (note && INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2)
      ;
    else if (else_succ->dest->index < NUM_FIXED_BLOCKS
           || dominated_by_p (CDI_POST_DOMINATORS, then_bb,
--- 3958,3977 ----
    if (then_bb->index < NUM_FIXED_BLOCKS)
      return FALSE;
  
    note = find_reg_note (BB_END (test_bb), REG_BR_PROB, NULL_RTX);
!   if (!note)
!     {
!       else_prob = REG_BR_PROB_BASE / 2;
!       then_prob = REG_BR_PROB_BASE / 2;
!     }
!   else
!     {
!       else_prob = INTVAL (XEXP (note, 0));
!       then_prob = REG_BR_PROB_BASE - else_prob;
!     }
! 
!   /* ELSE is predicted or SUCC(ELSE) postdominates THEN.  */
!   if (else_prob > then_prob)
      ;
    else if (else_succ->dest->index < NUM_FIXED_BLOCKS
           || dominated_by_p (CDI_POST_DOMINATORS, then_bb,
*************** find_if_case_2 (basic_block test_bb, edg
*** 3955,3962 ****
             "\nIF-CASE-2 found, start %d, else %d\n",
             test_bb->index, else_bb->index);
  
!   /* ELSE is small.  */
!   if (! cheap_bb_rtx_cost_p (else_bb,
        COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (else_edge->src),
                                    predictable_edge_p (else_edge)))))
      return FALSE;
--- 3986,3994 ----
             "\nIF-CASE-2 found, start %d, else %d\n",
             test_bb->index, else_bb->index);
  
!   /* We're speculating from the ELSE path, we want to make sure the cost
!      of speculation is within reason.  */
!   if (! cheap_bb_rtx_cost_p (else_bb, else_prob,
        COSTS_N_INSNS (BRANCH_COST (optimize_bb_for_speed_p (else_edge->src),
                                    predictable_edge_p (else_edge)))))
      return FALSE;

Reply via email to