This is the second part of the fix for 89112, fixing the conditions that caused 
it to happen.
This patch adds REG_BR_PROB notes to the branches generated by inline expansion 
of memcmp
and strncmp. This prevents any of the code from being marked as cold and moved 
to the end
of the function, which is what caused the long branches in 89112. With this 
patch, the test
case for 89112 does not have any long branches within the expansion of memcmp, 
and the code
for each memcmp is contiguous.

OK for trunk and 8 backport if bootstrap/regtest passes?

Thanks!

   Aaron

2019-02-04  Aaron Sawdey  <acsaw...@linux.ibm.com>

        * config/rs6000/rs6000-string.c (do_ifelse, expand_cmp_vec_sequence,
        expand_compare_loop, expand_block_compare_gpr,
        expand_strncmp_align_check, expand_strncmp_gpr_sequence): add branch
        probability.


Index: gcc/config/rs6000/rs6000-string.c
===================================================================
--- gcc/config/rs6000/rs6000-string.c   (revision 268522)
+++ gcc/config/rs6000/rs6000-string.c   (working copy)
@@ -35,6 +35,8 @@
 #include "expr.h"
 #include "output.h"
 #include "target.h"
+#include "profile-count.h"
+#include "predict.h"

 /* Expand a block clear operation, and return 1 if successful.  Return 0
    if we should let the compiler generate normal code.
@@ -369,6 +371,7 @@
    B is the second thing to be compared.
    CR is the condition code reg input, or NULL_RTX.
    TRUE_LABEL is the label to branch to if the condition is true.
+   P is the estimated branch probability for the branch.

    The return value is the CR used for the comparison.
    If CR is null_rtx, then a new register of CMPMODE is generated.
@@ -377,7 +380,7 @@

 static void
 do_ifelse (machine_mode cmpmode, rtx_code comparison,
-          rtx a, rtx b, rtx cr, rtx true_label)
+          rtx a, rtx b, rtx cr, rtx true_label, profile_probability p)
 {
   gcc_assert ((a == NULL_RTX && b == NULL_RTX && cr != NULL_RTX)
              || (a != NULL_RTX && b != NULL_RTX));
@@ -395,7 +398,8 @@
   rtx cmp_rtx = gen_rtx_fmt_ee (comparison, VOIDmode, cr, const0_rtx);

   rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, label_ref, pc_rtx);
-  rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+  rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+  add_reg_br_prob_note (j, p);
   JUMP_LABEL (j) = true_label;
   LABEL_NUSES (true_label) += 1;
 }
@@ -781,7 +785,8 @@
       rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
       rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
                                         lab_ref, pc_rtx);
-      rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+      rtx_insn *j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+      add_reg_br_prob_note (j2, profile_probability::likely ());
       JUMP_LABEL (j2) = dst_label;
       LABEL_NUSES (dst_label) += 1;

@@ -1036,7 +1041,7 @@

   /* Difference found is stored here before jump to diff_label.  */
   rtx diff = gen_reg_rtx (word_mode);
-  rtx j;
+  rtx_insn *j;

   /* Example of generated code for 35 bytes aligned 1 byte.

@@ -1120,11 +1125,11 @@
       /* Check for > max_bytes bytes.  We want to bail out as quickly as
         possible if we have to go over to memcmp.  */
       do_ifelse (CCmode, GT, bytes_rtx, GEN_INT (max_bytes),
-                NULL_RTX, library_call_label);
+                NULL_RTX, library_call_label, profile_probability::even ());

       /* Check for < loop_bytes bytes.  */
       do_ifelse (CCmode, LT, bytes_rtx, GEN_INT (loop_bytes),
-                NULL_RTX, cleanup_label);
+                NULL_RTX, cleanup_label, profile_probability::even ());

       /* Loop compare bytes and iterations if bytes>max_bytes.  */
       rtx mb_reg = gen_reg_rtx (word_mode);
@@ -1165,7 +1170,7 @@
        {
          rtx lab_after = gen_label_rtx ();
          do_ifelse (CCmode, LE, bytes_rtx, GEN_INT (max_bytes),
-                    NULL_RTX, lab_after);
+                    NULL_RTX, lab_after, profile_probability::even ());
          emit_move_insn (loop_cmp, mb_reg);
          emit_move_insn (iter, mi_reg);
          emit_label (lab_after);
@@ -1236,7 +1241,7 @@
        }

       do_ifelse (GET_MODE (dcond), NE, NULL_RTX, NULL_RTX,
-                dcond, diff_label);
+                dcond, diff_label, profile_probability::unlikely ());

       if (TARGET_P9_MISC)
        {
@@ -1260,6 +1265,7 @@
       else
        j = emit_jump_insn (gen_bdnztf_si (loop_top_label, ctr, ctr,
                                           eqrtx, dcond));
+      add_reg_br_prob_note (j, profile_probability::likely ());
       JUMP_LABEL (j) = loop_top_label;
       LABEL_NUSES (loop_top_label) += 1;
     }
@@ -1272,9 +1278,11 @@
      code.  If we exit here with a nonzero diff, it is
      because the second word differed.  */
   if (TARGET_P9_MISC)
-    do_ifelse (CCUNSmode, NE, NULL_RTX, NULL_RTX, dcond, diff_label);
+    do_ifelse (CCUNSmode, NE, NULL_RTX, NULL_RTX, dcond,
+              diff_label, profile_probability::unlikely ());
   else
-    do_ifelse (CCmode, NE, diff, const0_rtx, NULL_RTX, diff_label);
+    do_ifelse (CCmode, NE, diff, const0_rtx, NULL_RTX,
+              diff_label, profile_probability::unlikely ());

   if (library_call_label != NULL && bytes_is_const && bytes > max_bytes)
     {
@@ -1317,7 +1325,7 @@
             loop with a branch to cleanup_label.  */
          emit_move_insn (target, const0_rtx);
          do_ifelse (CCmode, EQ, cmp_rem, const0_rtx,
-                    NULL_RTX, final_label);
+                    NULL_RTX, final_label, profile_probability::unlikely ());
        }

       rtx final_cleanup = gen_label_rtx ();
@@ -1327,9 +1335,12 @@
        {
          /* If remainder length < word length, branch to final
             cleanup compare.  */
+       
          if (!bytes_is_const)
-           do_ifelse (CCmode, LT, cmp_rem, GEN_INT (load_mode_size),
-                      NULL_RTX, final_cleanup);
+           {
+             do_ifelse (CCmode, LT, cmp_rem, GEN_INT (load_mode_size),
+                        NULL_RTX, final_cleanup, profile_probability::even ());
+           }

          /* load and compare 8B */
          do_load_for_compare_from_addr (load_mode, d1_1,
@@ -1354,7 +1365,7 @@
            }

          do_ifelse (GET_MODE (dcond), NE, NULL_RTX, NULL_RTX,
-                    dcond, diff_label);
+                    dcond, diff_label, profile_probability::even ());

          do_add3 (src1_addr, src1_addr, GEN_INT (load_mode_size));
          do_add3 (src2_addr, src2_addr, GEN_INT (load_mode_size));
@@ -1363,10 +1374,12 @@
          if (bytes_is_const)
            bytes_remaining -= load_mode_size;
          else
-           /* See if remaining length is now zero.  We previously set
-              target to 0 so we can just jump to the end.  */
-           do_ifelse (CCmode, EQ, cmp_rem, const0_rtx,
-                      NULL_RTX, final_label);
+           {
+             /* See if remaining length is now zero.  We previously set
+                target to 0 so we can just jump to the end.  */
+             do_ifelse (CCmode, EQ, cmp_rem, const0_rtx, NULL_RTX,
+                        final_label, profile_probability::unlikely ());
+           }

        }

@@ -1450,7 +1463,7 @@
             than one loop iteration, in which case go do the overlap
             load compare path.  */
          do_ifelse (CCmode, GT, bytes_rtx, GEN_INT (loop_bytes),
-                    NULL_RTX, nonconst_overlap);
+                    NULL_RTX, nonconst_overlap, profile_probability::even ());

          rtx rem4k = gen_reg_rtx (word_mode);
          rtx dist1 = gen_reg_rtx (word_mode);
@@ -1460,12 +1473,14 @@
            emit_insn (gen_andsi3 (dist1, src1_addr, GEN_INT (0xfff)));
          else
            emit_insn (gen_anddi3 (dist1, src1_addr, GEN_INT (0xfff)));
-         do_ifelse (CCmode, LE, dist1, rem4k, NULL_RTX, handle4k_label);
+         do_ifelse (CCmode, LE, dist1, rem4k, NULL_RTX,
+                    handle4k_label, profile_probability::very_unlikely ());
          if (word_mode == SImode)
            emit_insn (gen_andsi3 (dist2, src2_addr, GEN_INT (0xfff)));
          else
            emit_insn (gen_anddi3 (dist2, src2_addr, GEN_INT (0xfff)));
-         do_ifelse (CCmode, LE, dist2, rem4k, NULL_RTX, handle4k_label);
+         do_ifelse (CCmode, LE, dist2, rem4k, NULL_RTX,
+                    handle4k_label, profile_probability::very_unlikely ());

          /* We don't have a 4k boundary to deal with, so do
             a load/shift/compare and jump to diff.  */
@@ -1817,7 +1832,8 @@
              rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx);
              rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
                                                 fin_ref, pc_rtx);
-             rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+             rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+             add_reg_br_prob_note (j, profile_probability::unlikely ());
              JUMP_LABEL (j) = final_label;
              LABEL_NUSES (final_label) += 1;
            }
@@ -2095,7 +2111,8 @@

   rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
                                     lab_ref, pc_rtx);
-  rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+  rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+  add_reg_br_prob_note (j, profile_probability::unlikely ());
   JUMP_LABEL (j) = strncmp_label;
   LABEL_NUSES (strncmp_label) += 1;
 }
@@ -2265,7 +2282,8 @@

              rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
                                                 lab_ref, pc_rtx);
-             rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+             rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+             add_reg_br_prob_note (j, profile_probability::unlikely ());
              JUMP_LABEL (j) = final_move_label;
              LABEL_NUSES (final_move_label) += 1;

@@ -2282,7 +2300,8 @@

              rtx ifelse0 = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp0eq_rtx,
                                                 lab_ref, pc_rtx);
-             rtx j0 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse0));
+             rtx_insn *j0 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse0));
+             add_reg_br_prob_note (j0, profile_probability::unlikely ());
              JUMP_LABEL (j0) = final_move_label;
              LABEL_NUSES (final_move_label) += 1;
            }
@@ -2325,7 +2344,8 @@

          rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
                                             lab_ref, pc_rtx);
-         rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+         rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
+         add_reg_br_prob_note (j, profile_probability::unlikely ());
          JUMP_LABEL (j) = dst_label;
          LABEL_NUSES (dst_label) += 1;
        }

-- 
Aaron Sawdey, Ph.D.  acsaw...@linux.vnet.ibm.com
050-2/C113  (507) 253-7520 home: 507/263-0782
IBM Linux Technology Center - PPC Toolchain

Reply via email to