Missed two more conditional branches created by inline expansion that should have had branch probability notes.
2019-02-08 Aaron Sawdey <acsaw...@linux.ibm.com> * config/rs6000/rs6000-string.c (expand_compare_loop, expand_block_compare): Insert REG_BR_PROB notes in inline expansion of memcmp/strncmp. Index: gcc/config/rs6000/rs6000-string.c =================================================================== --- gcc/config/rs6000/rs6000-string.c (revision 268547) +++ gcc/config/rs6000/rs6000-string.c (working copy) @@ -1525,6 +1525,7 @@ else j = emit_jump_insn (gen_bdnztf_si (fc_loop, ctr, ctr, eqrtx, cond)); + add_reg_br_prob_note (j, profile_probability::likely ()); JUMP_LABEL (j) = fc_loop; LABEL_NUSES (fc_loop) += 1; @@ -1897,6 +1898,7 @@ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, cvt_ref, pc_rtx); rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); + add_reg_br_prob_note (j, profile_probability::likely ()); JUMP_LABEL (j) = convert_label; LABEL_NUSES (convert_label) += 1; } Pre-approved by Segher for trunk and backport to 8, will commit after regtest completes. Aaron On 2/4/19 1:06 PM, Aaron Sawdey wrote: > This is the second part of the fix for 89112, fixing the conditions that > caused it to happen. > This patch adds REG_BR_PROB notes to the branches generated by inline > expansion of memcmp > and strncmp. This prevents any of the code from being marked as cold and > moved to the end > of the function, which is what caused the long branches in 89112. With this > patch, the test > case for 89112 does not have any long branches within the expansion of > memcmp, and the code > for each memcmp is contiguous. > > OK for trunk and 8 backport if bootstrap/regtest passes? > > Thanks! > > Aaron > > 2019-02-04 Aaron Sawdey <acsaw...@linux.ibm.com> > > * config/rs6000/rs6000-string.c (do_ifelse, expand_cmp_vec_sequence, > expand_compare_loop, expand_block_compare_gpr, > expand_strncmp_align_check, expand_strncmp_gpr_sequence): add branch > probability. > > > Index: gcc/config/rs6000/rs6000-string.c > =================================================================== > --- gcc/config/rs6000/rs6000-string.c (revision 268522) > +++ gcc/config/rs6000/rs6000-string.c (working copy) > @@ -35,6 +35,8 @@ > #include "expr.h" > #include "output.h" > #include "target.h" > +#include "profile-count.h" > +#include "predict.h" > > /* Expand a block clear operation, and return 1 if successful. Return 0 > if we should let the compiler generate normal code. > @@ -369,6 +371,7 @@ > B is the second thing to be compared. > CR is the condition code reg input, or NULL_RTX. > TRUE_LABEL is the label to branch to if the condition is true. > + P is the estimated branch probability for the branch. > > The return value is the CR used for the comparison. > If CR is null_rtx, then a new register of CMPMODE is generated. > @@ -377,7 +380,7 @@ > > static void > do_ifelse (machine_mode cmpmode, rtx_code comparison, > - rtx a, rtx b, rtx cr, rtx true_label) > + rtx a, rtx b, rtx cr, rtx true_label, profile_probability p) > { > gcc_assert ((a == NULL_RTX && b == NULL_RTX && cr != NULL_RTX) > || (a != NULL_RTX && b != NULL_RTX)); > @@ -395,7 +398,8 @@ > rtx cmp_rtx = gen_rtx_fmt_ee (comparison, VOIDmode, cr, const0_rtx); > > rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, label_ref, pc_rtx); > - rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + add_reg_br_prob_note (j, p); > JUMP_LABEL (j) = true_label; > LABEL_NUSES (true_label) += 1; > } > @@ -781,7 +785,8 @@ > rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label); > rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, > lab_ref, pc_rtx); > - rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + rtx_insn *j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + add_reg_br_prob_note (j2, profile_probability::likely ()); > JUMP_LABEL (j2) = dst_label; > LABEL_NUSES (dst_label) += 1; > > @@ -1036,7 +1041,7 @@ > > /* Difference found is stored here before jump to diff_label. */ > rtx diff = gen_reg_rtx (word_mode); > - rtx j; > + rtx_insn *j; > > /* Example of generated code for 35 bytes aligned 1 byte. > > @@ -1120,11 +1125,11 @@ > /* Check for > max_bytes bytes. We want to bail out as quickly as > possible if we have to go over to memcmp. */ > do_ifelse (CCmode, GT, bytes_rtx, GEN_INT (max_bytes), > - NULL_RTX, library_call_label); > + NULL_RTX, library_call_label, profile_probability::even ()); > > /* Check for < loop_bytes bytes. */ > do_ifelse (CCmode, LT, bytes_rtx, GEN_INT (loop_bytes), > - NULL_RTX, cleanup_label); > + NULL_RTX, cleanup_label, profile_probability::even ()); > > /* Loop compare bytes and iterations if bytes>max_bytes. */ > rtx mb_reg = gen_reg_rtx (word_mode); > @@ -1165,7 +1170,7 @@ > { > rtx lab_after = gen_label_rtx (); > do_ifelse (CCmode, LE, bytes_rtx, GEN_INT (max_bytes), > - NULL_RTX, lab_after); > + NULL_RTX, lab_after, profile_probability::even ()); > emit_move_insn (loop_cmp, mb_reg); > emit_move_insn (iter, mi_reg); > emit_label (lab_after); > @@ -1236,7 +1241,7 @@ > } > > do_ifelse (GET_MODE (dcond), NE, NULL_RTX, NULL_RTX, > - dcond, diff_label); > + dcond, diff_label, profile_probability::unlikely ()); > > if (TARGET_P9_MISC) > { > @@ -1260,6 +1265,7 @@ > else > j = emit_jump_insn (gen_bdnztf_si (loop_top_label, ctr, ctr, > eqrtx, dcond)); > + add_reg_br_prob_note (j, profile_probability::likely ()); > JUMP_LABEL (j) = loop_top_label; > LABEL_NUSES (loop_top_label) += 1; > } > @@ -1272,9 +1278,11 @@ > code. If we exit here with a nonzero diff, it is > because the second word differed. */ > if (TARGET_P9_MISC) > - do_ifelse (CCUNSmode, NE, NULL_RTX, NULL_RTX, dcond, diff_label); > + do_ifelse (CCUNSmode, NE, NULL_RTX, NULL_RTX, dcond, > + diff_label, profile_probability::unlikely ()); > else > - do_ifelse (CCmode, NE, diff, const0_rtx, NULL_RTX, diff_label); > + do_ifelse (CCmode, NE, diff, const0_rtx, NULL_RTX, > + diff_label, profile_probability::unlikely ()); > > if (library_call_label != NULL && bytes_is_const && bytes > max_bytes) > { > @@ -1317,7 +1325,7 @@ > loop with a branch to cleanup_label. */ > emit_move_insn (target, const0_rtx); > do_ifelse (CCmode, EQ, cmp_rem, const0_rtx, > - NULL_RTX, final_label); > + NULL_RTX, final_label, profile_probability::unlikely ()); > } > > rtx final_cleanup = gen_label_rtx (); > @@ -1327,9 +1335,12 @@ > { > /* If remainder length < word length, branch to final > cleanup compare. */ > + > if (!bytes_is_const) > - do_ifelse (CCmode, LT, cmp_rem, GEN_INT (load_mode_size), > - NULL_RTX, final_cleanup); > + { > + do_ifelse (CCmode, LT, cmp_rem, GEN_INT (load_mode_size), > + NULL_RTX, final_cleanup, profile_probability::even ()); > + } > > /* load and compare 8B */ > do_load_for_compare_from_addr (load_mode, d1_1, > @@ -1354,7 +1365,7 @@ > } > > do_ifelse (GET_MODE (dcond), NE, NULL_RTX, NULL_RTX, > - dcond, diff_label); > + dcond, diff_label, profile_probability::even ()); > > do_add3 (src1_addr, src1_addr, GEN_INT (load_mode_size)); > do_add3 (src2_addr, src2_addr, GEN_INT (load_mode_size)); > @@ -1363,10 +1374,12 @@ > if (bytes_is_const) > bytes_remaining -= load_mode_size; > else > - /* See if remaining length is now zero. We previously set > - target to 0 so we can just jump to the end. */ > - do_ifelse (CCmode, EQ, cmp_rem, const0_rtx, > - NULL_RTX, final_label); > + { > + /* See if remaining length is now zero. We previously set > + target to 0 so we can just jump to the end. */ > + do_ifelse (CCmode, EQ, cmp_rem, const0_rtx, NULL_RTX, > + final_label, profile_probability::unlikely ()); > + } > > } > > @@ -1450,7 +1463,7 @@ > than one loop iteration, in which case go do the overlap > load compare path. */ > do_ifelse (CCmode, GT, bytes_rtx, GEN_INT (loop_bytes), > - NULL_RTX, nonconst_overlap); > + NULL_RTX, nonconst_overlap, profile_probability::even ()); > > rtx rem4k = gen_reg_rtx (word_mode); > rtx dist1 = gen_reg_rtx (word_mode); > @@ -1460,12 +1473,14 @@ > emit_insn (gen_andsi3 (dist1, src1_addr, GEN_INT (0xfff))); > else > emit_insn (gen_anddi3 (dist1, src1_addr, GEN_INT (0xfff))); > - do_ifelse (CCmode, LE, dist1, rem4k, NULL_RTX, handle4k_label); > + do_ifelse (CCmode, LE, dist1, rem4k, NULL_RTX, > + handle4k_label, profile_probability::very_unlikely ()); > if (word_mode == SImode) > emit_insn (gen_andsi3 (dist2, src2_addr, GEN_INT (0xfff))); > else > emit_insn (gen_anddi3 (dist2, src2_addr, GEN_INT (0xfff))); > - do_ifelse (CCmode, LE, dist2, rem4k, NULL_RTX, handle4k_label); > + do_ifelse (CCmode, LE, dist2, rem4k, NULL_RTX, > + handle4k_label, profile_probability::very_unlikely ()); > > /* We don't have a 4k boundary to deal with, so do > a load/shift/compare and jump to diff. */ > @@ -1817,7 +1832,8 @@ > rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx); > rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, > fin_ref, pc_rtx); > - rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + add_reg_br_prob_note (j, profile_probability::unlikely ()); > JUMP_LABEL (j) = final_label; > LABEL_NUSES (final_label) += 1; > } > @@ -2095,7 +2111,8 @@ > > rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, > lab_ref, pc_rtx); > - rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + add_reg_br_prob_note (j, profile_probability::unlikely ()); > JUMP_LABEL (j) = strncmp_label; > LABEL_NUSES (strncmp_label) += 1; > } > @@ -2265,7 +2282,8 @@ > > rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, > lab_ref, pc_rtx); > - rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + add_reg_br_prob_note (j, profile_probability::unlikely ()); > JUMP_LABEL (j) = final_move_label; > LABEL_NUSES (final_move_label) += 1; > > @@ -2282,7 +2300,8 @@ > > rtx ifelse0 = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp0eq_rtx, > lab_ref, pc_rtx); > - rtx j0 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse0)); > + rtx_insn *j0 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse0)); > + add_reg_br_prob_note (j0, profile_probability::unlikely ()); > JUMP_LABEL (j0) = final_move_label; > LABEL_NUSES (final_move_label) += 1; > } > @@ -2325,7 +2344,8 @@ > > rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, > lab_ref, pc_rtx); > - rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + rtx_insn *j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); > + add_reg_br_prob_note (j, profile_probability::unlikely ()); > JUMP_LABEL (j) = dst_label; > LABEL_NUSES (dst_label) += 1; > } > -- Aaron Sawdey, Ph.D. acsaw...@linux.vnet.ibm.com 050-2/C113 (507) 253-7520 home: 507/263-0782 IBM Linux Technology Center - PPC Toolchain