Missed two more conditional branches created by inline expansion that should 
have had
branch probability notes.

2019-02-08  Aaron Sawdey  <acsaw...@linux.ibm.com>

        * config/rs6000/rs6000-string.c (expand_compare_loop,
        expand_block_compare): Insert REG_BR_PROB notes in inline expansion of
        memcmp/strncmp.

Index: gcc/config/rs6000/rs6000-string.c
===================================================================
--- gcc/config/rs6000/rs6000-string.c   (revision 268547)
+++ gcc/config/rs6000/rs6000-string.c   (working copy)
@@ -1525,6 +1525,7 @@
          else
            j = emit_jump_insn (gen_bdnztf_si (fc_loop, ctr, ctr,
                                               eqrtx, cond));
+         add_reg_br_prob_note (j, profile_probability::likely ());
          JUMP_LABEL (j) = fc_loop;
          LABEL_NUSES (fc_loop) += 1;

@@ -1897,6 +1898,7 @@
              rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
                                                 cvt_ref, pc_rtx);
              rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+             add_reg_br_prob_note (j, profile_probability::likely ());
              JUMP_LABEL (j) = convert_label;
              LABEL_NUSES (convert_label) += 1;
            }

Pre-approved by Segher for trunk and backport to 8, will commit after regtest 
completes.

  Aaron

On 2/4/19 1:06 PM, Aaron Sawdey wrote:
> This is the second part of the fix for 89112, fixing the conditions that 
> caused it to happen.
> This patch adds REG_BR_PROB notes to the branches generated by inline 
> expansion of memcmp
> and strncmp. This prevents any of the code from being marked as cold and 
> moved to the end
> of the function, which is what caused the long branches in 89112. With this 
> patch, the test
> case for 89112 does not have any long branches within the expansion of 
> memcmp, and the code
> for each memcmp is contiguous.
> 
> OK for trunk and 8 backport if bootstrap/regtest passes?
> 
> Thanks!
> 
>    Aaron
> 
> 2019-02-04  Aaron Sawdey  <acsaw...@linux.ibm.com>
> 
>       * config/rs6000/rs6000-string.c (do_ifelse, expand_cmp_vec_sequence,
>       expand_compare_loop, expand_block_compare_gpr,
>       expand_strncmp_align_check, expand_strncmp_gpr_sequence): add branch
>       probability.
> 
> 
> Index: gcc/config/rs6000/rs6000-string.c
> ===================================================================
> --- gcc/config/rs6000/rs6000-string.c (revision 268522)
> +++ gcc/config/rs6000/rs6000-string.c (working copy)
> @@ -35,6 +35,8 @@
>  #include "expr.h"
>  #include "output.h"
>  #include "target.h"
> +#include "profile-count.h"
> +#include "predict.h"
> 
>  /* Expand a block clear operation, and return 1 if successful.  Return 0
>     if we should let the compiler generate normal code.
> @@ -369,6 +371,7 @@
>     B is the second thing to be compared.
>     CR is the condition code reg input, or NULL_RTX.
>     TRUE_LABEL is the label to branch to if the condition is true.
> +   P is the estimated branch probability for the branch.
> 
>     The return value is the CR used for the comparison.
>     If CR is null_rtx, then a new register of CMPMODE is generated.
> @@ -377,7 +380,7 @@
> 
>  static void
>  do_ifelse (machine_mode cmpmode, rtx_code comparison,
> -        rtx a, rtx b, rtx cr, rtx true_label)
> +        rtx a, rtx b, rtx cr, rtx true_label, profile_probability p)
>  {
>    gcc_assert ((a == NULL_RTX && b == NULL_RTX && cr != NULL_RTX)
>             || (a != NULL_RTX && b != NULL_RTX));
> @@ -395,7 +398,8 @@
>    rtx cmp_rtx = gen_rtx_fmt_ee (comparison, VOIDmode, cr, const0_rtx);
> 
>    rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, label_ref, pc_rtx);
> -  rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +  rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +  add_reg_br_prob_note (j, p);
>    JUMP_LABEL (j) = true_label;
>    LABEL_NUSES (true_label) += 1;
>  }
> @@ -781,7 +785,8 @@
>        rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
>        rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
>                                        lab_ref, pc_rtx);
> -      rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +      rtx_insn *j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +      add_reg_br_prob_note (j2, profile_probability::likely ());
>        JUMP_LABEL (j2) = dst_label;
>        LABEL_NUSES (dst_label) += 1;
> 
> @@ -1036,7 +1041,7 @@
> 
>    /* Difference found is stored here before jump to diff_label.  */
>    rtx diff = gen_reg_rtx (word_mode);
> -  rtx j;
> +  rtx_insn *j;
> 
>    /* Example of generated code for 35 bytes aligned 1 byte.
> 
> @@ -1120,11 +1125,11 @@
>        /* Check for > max_bytes bytes.  We want to bail out as quickly as
>        possible if we have to go over to memcmp.  */
>        do_ifelse (CCmode, GT, bytes_rtx, GEN_INT (max_bytes),
> -              NULL_RTX, library_call_label);
> +              NULL_RTX, library_call_label, profile_probability::even ());
> 
>        /* Check for < loop_bytes bytes.  */
>        do_ifelse (CCmode, LT, bytes_rtx, GEN_INT (loop_bytes),
> -              NULL_RTX, cleanup_label);
> +              NULL_RTX, cleanup_label, profile_probability::even ());
> 
>        /* Loop compare bytes and iterations if bytes>max_bytes.  */
>        rtx mb_reg = gen_reg_rtx (word_mode);
> @@ -1165,7 +1170,7 @@
>       {
>         rtx lab_after = gen_label_rtx ();
>         do_ifelse (CCmode, LE, bytes_rtx, GEN_INT (max_bytes),
> -                  NULL_RTX, lab_after);
> +                  NULL_RTX, lab_after, profile_probability::even ());
>         emit_move_insn (loop_cmp, mb_reg);
>         emit_move_insn (iter, mi_reg);
>         emit_label (lab_after);
> @@ -1236,7 +1241,7 @@
>       }
> 
>        do_ifelse (GET_MODE (dcond), NE, NULL_RTX, NULL_RTX,
> -              dcond, diff_label);
> +              dcond, diff_label, profile_probability::unlikely ());
> 
>        if (TARGET_P9_MISC)
>       {
> @@ -1260,6 +1265,7 @@
>        else
>       j = emit_jump_insn (gen_bdnztf_si (loop_top_label, ctr, ctr,
>                                          eqrtx, dcond));
> +      add_reg_br_prob_note (j, profile_probability::likely ());
>        JUMP_LABEL (j) = loop_top_label;
>        LABEL_NUSES (loop_top_label) += 1;
>      }
> @@ -1272,9 +1278,11 @@
>       code.  If we exit here with a nonzero diff, it is
>       because the second word differed.  */
>    if (TARGET_P9_MISC)
> -    do_ifelse (CCUNSmode, NE, NULL_RTX, NULL_RTX, dcond, diff_label);
> +    do_ifelse (CCUNSmode, NE, NULL_RTX, NULL_RTX, dcond,
> +            diff_label, profile_probability::unlikely ());
>    else
> -    do_ifelse (CCmode, NE, diff, const0_rtx, NULL_RTX, diff_label);
> +    do_ifelse (CCmode, NE, diff, const0_rtx, NULL_RTX,
> +            diff_label, profile_probability::unlikely ());
> 
>    if (library_call_label != NULL && bytes_is_const && bytes > max_bytes)
>      {
> @@ -1317,7 +1325,7 @@
>            loop with a branch to cleanup_label.  */
>         emit_move_insn (target, const0_rtx);
>         do_ifelse (CCmode, EQ, cmp_rem, const0_rtx,
> -                  NULL_RTX, final_label);
> +                  NULL_RTX, final_label, profile_probability::unlikely ());
>       }
> 
>        rtx final_cleanup = gen_label_rtx ();
> @@ -1327,9 +1335,12 @@
>       {
>         /* If remainder length < word length, branch to final
>            cleanup compare.  */
> +     
>         if (!bytes_is_const)
> -         do_ifelse (CCmode, LT, cmp_rem, GEN_INT (load_mode_size),
> -                    NULL_RTX, final_cleanup);
> +         {
> +           do_ifelse (CCmode, LT, cmp_rem, GEN_INT (load_mode_size),
> +                      NULL_RTX, final_cleanup, profile_probability::even ());
> +         }
> 
>         /* load and compare 8B */
>         do_load_for_compare_from_addr (load_mode, d1_1,
> @@ -1354,7 +1365,7 @@
>           }
> 
>         do_ifelse (GET_MODE (dcond), NE, NULL_RTX, NULL_RTX,
> -                  dcond, diff_label);
> +                  dcond, diff_label, profile_probability::even ());
> 
>         do_add3 (src1_addr, src1_addr, GEN_INT (load_mode_size));
>         do_add3 (src2_addr, src2_addr, GEN_INT (load_mode_size));
> @@ -1363,10 +1374,12 @@
>         if (bytes_is_const)
>           bytes_remaining -= load_mode_size;
>         else
> -         /* See if remaining length is now zero.  We previously set
> -            target to 0 so we can just jump to the end.  */
> -         do_ifelse (CCmode, EQ, cmp_rem, const0_rtx,
> -                    NULL_RTX, final_label);
> +         {
> +           /* See if remaining length is now zero.  We previously set
> +              target to 0 so we can just jump to the end.  */
> +           do_ifelse (CCmode, EQ, cmp_rem, const0_rtx, NULL_RTX,
> +                      final_label, profile_probability::unlikely ());
> +         }
> 
>       }
> 
> @@ -1450,7 +1463,7 @@
>            than one loop iteration, in which case go do the overlap
>            load compare path.  */
>         do_ifelse (CCmode, GT, bytes_rtx, GEN_INT (loop_bytes),
> -                  NULL_RTX, nonconst_overlap);
> +                  NULL_RTX, nonconst_overlap, profile_probability::even ());
> 
>         rtx rem4k = gen_reg_rtx (word_mode);
>         rtx dist1 = gen_reg_rtx (word_mode);
> @@ -1460,12 +1473,14 @@
>           emit_insn (gen_andsi3 (dist1, src1_addr, GEN_INT (0xfff)));
>         else
>           emit_insn (gen_anddi3 (dist1, src1_addr, GEN_INT (0xfff)));
> -       do_ifelse (CCmode, LE, dist1, rem4k, NULL_RTX, handle4k_label);
> +       do_ifelse (CCmode, LE, dist1, rem4k, NULL_RTX,
> +                  handle4k_label, profile_probability::very_unlikely ());
>         if (word_mode == SImode)
>           emit_insn (gen_andsi3 (dist2, src2_addr, GEN_INT (0xfff)));
>         else
>           emit_insn (gen_anddi3 (dist2, src2_addr, GEN_INT (0xfff)));
> -       do_ifelse (CCmode, LE, dist2, rem4k, NULL_RTX, handle4k_label);
> +       do_ifelse (CCmode, LE, dist2, rem4k, NULL_RTX,
> +                  handle4k_label, profile_probability::very_unlikely ());
> 
>         /* We don't have a 4k boundary to deal with, so do
>            a load/shift/compare and jump to diff.  */
> @@ -1817,7 +1832,8 @@
>             rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx);
>             rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
>                                                fin_ref, pc_rtx);
> -           rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +           rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +           add_reg_br_prob_note (j, profile_probability::unlikely ());
>             JUMP_LABEL (j) = final_label;
>             LABEL_NUSES (final_label) += 1;
>           }
> @@ -2095,7 +2111,8 @@
> 
>    rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
>                                    lab_ref, pc_rtx);
> -  rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +  rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +  add_reg_br_prob_note (j, profile_probability::unlikely ());
>    JUMP_LABEL (j) = strncmp_label;
>    LABEL_NUSES (strncmp_label) += 1;
>  }
> @@ -2265,7 +2282,8 @@
> 
>             rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
>                                                lab_ref, pc_rtx);
> -           rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +           rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +           add_reg_br_prob_note (j, profile_probability::unlikely ());
>             JUMP_LABEL (j) = final_move_label;
>             LABEL_NUSES (final_move_label) += 1;
> 
> @@ -2282,7 +2300,8 @@
> 
>             rtx ifelse0 = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp0eq_rtx,
>                                                lab_ref, pc_rtx);
> -           rtx j0 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse0));
> +           rtx_insn *j0 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse0));
> +           add_reg_br_prob_note (j0, profile_probability::unlikely ());
>             JUMP_LABEL (j0) = final_move_label;
>             LABEL_NUSES (final_move_label) += 1;
>           }
> @@ -2325,7 +2344,8 @@
> 
>         rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
>                                            lab_ref, pc_rtx);
> -       rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +       rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
> +       add_reg_br_prob_note (j, profile_probability::unlikely ());
>         JUMP_LABEL (j) = dst_label;
>         LABEL_NUSES (dst_label) += 1;
>       }
> 

-- 
Aaron Sawdey, Ph.D.  acsaw...@linux.vnet.ibm.com
050-2/C113  (507) 253-7520 home: 507/263-0782
IBM Linux Technology Center - PPC Toolchain

Reply via email to