v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563800.html
v1 -> v2: Allow (mem) -> (subreg) propagation only for single uses. Boostrapped and regtested on x86_64-redhat-linux, ppc64le-redhat-linux and s390x-redhat-linux. Ok for master? Suppose we have: (set (reg/v:TF 63) (mem/c:TF (reg/v:DI 62))) (set (reg:FPRX2 66) (subreg:FPRX2 (reg/v:TF 63) 0)) It is clearly profitable to propagate the first insn into the second one and get: (set (reg:FPRX2 66) (mem/c:FPRX2 (reg/v:DI 62))) fwprop actually manages to perform this, but doesn't think the result is worth it, which results in unnecessary store/load sequences on s390. Improve the situation by classifying SUBREG -> MEM changes as profitable. gcc/ChangeLog: 2021-01-15 Ilya Leoshkevich <i...@linux.ibm.com> * fwprop.c (fwprop_propagation::classify_result): Allow (subreg (mem)) simplifications. --- gcc/fwprop.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/gcc/fwprop.c b/gcc/fwprop.c index eff8f7cc141..02d3d507cbc 100644 --- a/gcc/fwprop.c +++ b/gcc/fwprop.c @@ -176,7 +176,7 @@ namespace static const uint16_t CONSTANT = FIRST_SPARE_RESULT << 1; static const uint16_t PROFITABLE = FIRST_SPARE_RESULT << 2; - fwprop_propagation (rtx_insn *, rtx, rtx); + fwprop_propagation (rtx_insn *, insn_info *, rtx, rtx); bool changed_mem_p () const { return result_flags & CHANGED_MEM; } bool folded_to_constants_p () const; @@ -185,13 +185,18 @@ namespace bool check_mem (int, rtx) final override; void note_simplification (int, uint16_t, rtx, rtx) final override; uint16_t classify_result (rtx, rtx); + + private: + const bool single_use_p; }; } /* Prepare to replace FROM with TO in INSN. */ -fwprop_propagation::fwprop_propagation (rtx_insn *insn, rtx from, rtx to) - : insn_propagation (insn, from, to) +fwprop_propagation::fwprop_propagation (rtx_insn *insn, insn_info *def_insn, + rtx from, rtx to) + : insn_propagation (insn, from, to), + single_use_p (def_insn->num_uses () == 1) { should_check_mems = true; should_note_simplifications = true; @@ -262,6 +267,13 @@ fwprop_propagation::classify_result (rtx old_rtx, rtx new_rtx) && GET_MODE (new_rtx) == GET_MODE_INNER (GET_MODE (from))) return PROFITABLE; + /* Allow (subreg (mem)) -> (mem) simplifications. Do not allow propagation + of (mem)s into multiple uses, since those are not profitable, as well as + creating new (mem/v)s, since DCE will not remove the old ones. */ + if (single_use_p && SUBREG_P (old_rtx) && MEM_P (new_rtx) + && !MEM_VOLATILE_P (new_rtx)) + return PROFITABLE; + return 0; } @@ -363,7 +375,7 @@ try_fwprop_subst_note (insn_info *use_insn, insn_info *def_insn, rtx_insn *use_rtl = use_insn->rtl (); insn_change_watermark watermark; - fwprop_propagation prop (use_rtl, dest, src); + fwprop_propagation prop (use_rtl, def_insn, dest, src); if (!prop.apply_to_rvalue (&XEXP (note, 0))) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -426,7 +438,7 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change, rtx_insn *use_rtl = use_insn->rtl (); insn_change_watermark watermark; - fwprop_propagation prop (use_rtl, dest, src); + fwprop_propagation prop (use_rtl, def_insn, dest, src); if (!prop.apply_to_pattern (loc)) { if (dump_file && (dump_flags & TDF_DETAILS)) -- 2.26.2