On Wed, Jun 1, 2022 at 12:20 AM Richard Sandiford <richard.sandif...@arm.com> wrote: > > "H.J. Lu" <hjl.to...@gmail.com> writes: > > On Mon, May 30, 2022 at 09:35:43AM +0100, Richard Sandiford wrote: > >> "H.J. Lu" <hjl.to...@gmail.com> writes: > >> > --- > >> > RTL DSE tracks redundant constant stores within a basic block. When RTL > >> > loop invariant motion hoists a constant initialization out of the loop > >> > into a separate basic block, the constant store value becomes unknown > >> > within the original basic block. When recording store for RTL DSE, check > >> > if the source register is set only once to a constant by a non-partial > >> > unconditional load. If yes, record the constant as the constant store > >> > source. It eliminates unrolled zero stores after memset 0 in a loop > >> > where a vector register is used as the zero store source. > >> > > >> > Extract find_single_def_src from loop-iv.cc and move it to df-core.cc: > >> > > >> > 1. Rename to df_find_single_def_src. > >> > 2. Change the argument to rtx and use rtx_equal_p. > >> > 3. Return null for partial or conditional defs. > >> > > >> > gcc/ > >> > > >> > PR rtl-optimization/105638 > >> > * df-core.cc (df_find_single_def_sr): Moved and renamed from > >> > find_single_def_src in loop-iv.cc. Change the argument to rtx > >> > and use rtx_equal_p. Return null for partial or conditional > >> > defs. > >> > * df.h (df_find_single_def_src): New prototype. > >> > * dse.cc (record_store): Use the constant source if the source > >> > register is set only once. > >> > * loop-iv.cc (find_single_def_src): Moved to df-core.cc. > >> > (replace_single_def_regs): Replace find_single_def_src with > >> > df_find_single_def_src. > >> > > >> > gcc/testsuite/ > >> > > >> > PR rtl-optimization/105638 > >> > * g++.target/i386/pr105638.C: New test. > >> > --- > >> > gcc/df-core.cc | 44 +++++++++++++++++++++++ > >> > gcc/df.h | 1 + > >> > gcc/dse.cc | 14 ++++++++ > >> > gcc/loop-iv.cc | 45 +----------------------- > >> > gcc/testsuite/g++.target/i386/pr105638.C | 44 +++++++++++++++++++++++ > >> > 5 files changed, 104 insertions(+), 44 deletions(-) > >> > create mode 100644 gcc/testsuite/g++.target/i386/pr105638.C > >> > > >> > diff --git a/gcc/df-core.cc b/gcc/df-core.cc > >> > index a901b84878f..f9b4de8eb7a 100644 > >> > --- a/gcc/df-core.cc > >> > +++ b/gcc/df-core.cc > >> > @@ -2009,6 +2009,50 @@ df_reg_used (rtx_insn *insn, rtx reg) > >> > return df_find_use (insn, reg) != NULL; > >> > } > >> > > >> > +/* If REG has a single definition, return its known value, otherwise > >> > return > >> > + null. */ > >> > + > >> > +rtx > >> > +df_find_single_def_src (rtx reg) > >> > +{ > >> > + rtx src = NULL_RTX; > >> > + > >> > + /* Don't look through unbounded number of single definition REG > >> > copies, > >> > + there might be loops for sources with uninitialized variables. */ > >> > + for (int cnt = 0; cnt < 128; cnt++) > >> > + { > >> > + df_ref adef = DF_REG_DEF_CHAIN (REGNO (reg)); > >> > + if (adef == NULL || DF_REF_NEXT_REG (adef) != NULL > >> > + || DF_REF_IS_ARTIFICIAL (adef) > >> > + || (DF_REF_FLAGS (adef) > >> > + & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))) > >> > + return NULL_RTX; > >> > + > >> > + rtx set = single_set (DF_REF_INSN (adef)); > >> > + if (set == NULL || !rtx_equal_p (SET_DEST (set), reg)) > >> > + return NULL_RTX; > >> > + > >> > + rtx note = find_reg_equal_equiv_note (DF_REF_INSN (adef)); > >> > + if (note && function_invariant_p (XEXP (note, 0))) > >> > + { > >> > + src = XEXP (note, 0); > >> > + break; > >> > + } > >> > >> Seems simpler to return this directly, rather than break and then > >> check function_invariant_p again. > > > > Fixed. > > > >> > >> > + src = SET_SRC (set); > >> > + > >> > + if (REG_P (src)) > >> > + { > >> > + reg = src; > >> > + continue; > >> > + } > >> > + break; > >> > + } > >> > + if (!function_invariant_p (src)) > >> > + return NULL_RTX; > >> > + > >> > + return src; > >> > +} > >> > + > >> > > >> > > >> > /*---------------------------------------------------------------------------- > >> > Debugging and printing functions. > >> > diff --git a/gcc/df.h b/gcc/df.h > >> > index bd329205d08..71e249ad20a 100644 > >> > --- a/gcc/df.h > >> > +++ b/gcc/df.h > >> > @@ -991,6 +991,7 @@ extern df_ref df_find_def (rtx_insn *, rtx); > >> > extern bool df_reg_defined (rtx_insn *, rtx); > >> > extern df_ref df_find_use (rtx_insn *, rtx); > >> > extern bool df_reg_used (rtx_insn *, rtx); > >> > +extern rtx df_find_single_def_src (rtx); > >> > extern void df_worklist_dataflow (struct dataflow *,bitmap, int *, int); > >> > extern void df_print_regset (FILE *file, const_bitmap r); > >> > extern void df_print_word_regset (FILE *file, const_bitmap r); > >> > diff --git a/gcc/dse.cc b/gcc/dse.cc > >> > index 30c11cee034..c915266f025 100644 > >> > --- a/gcc/dse.cc > >> > +++ b/gcc/dse.cc > >> > @@ -1508,6 +1508,20 @@ record_store (rtx body, bb_info_t bb_info) > >> > > >> > if (tem && CONSTANT_P (tem)) > >> > const_rhs = tem; > >> > + else > >> > + { > >> > + /* If RHS is set only once to a constant, set CONST_RHS > >> > + to the constant. */ > >> > + rtx def_src = df_find_single_def_src (rhs); > >> > + if (def_src != nullptr && CONSTANT_P (def_src)) > >> > + { > >> > + df_ref def = DF_REG_DEF_CHAIN (REGNO (rhs)); > >> > + rtx_insn *def_insn = DF_REF_INSN (def); > >> > + rtx def_body = single_set (def_insn); > >> > + if (rhs == SET_DEST (def_body)) > >> > + const_rhs = def_src; > >> > + } > >> > + } > >> > >> This shouldn't be necessary now: we can rely on def_src being correct. > >> > >> I.e. I think this can just be: > >> > >> rtx def_src = df_find_single_def_src (rhs); > >> if (def_src != nullptr && CONSTANT_P (def_src)) > >> const_rhs = def_src; > > > > Fixed. > > > >> > >> Thanks, > >> Richard > >> > > > > Here is the v4 patch. OK for master? > > > > Thanks. > > > > > > H.J. > > --- > > RTL DSE tracks redundant constant stores within a basic block. When RTL > > loop invariant motion hoists a constant initialization out of the loop > > into a separate basic block, the constant store value becomes unknown > > within the original basic block. When recording store for RTL DSE, check > > if the source register is set only once to a constant by a non-partial > > unconditional load. If yes, record the constant as the constant store > > source. It eliminates unrolled zero stores after memset 0 in a loop > > where a vector register is used as the zero store source. > > > > gcc/ > > > > PR rtl-optimization/105638 > > * df-core.cc (df_find_single_def_src): Moved and renamed from > > find_single_def_src in loop-iv.cc. Change the argument to rtx > > and use rtx_equal_p. Return null for partial or conditional > > defs. > > * df.h (df_find_single_def_src): New prototype. > > * dse.cc (record_store): Use the constant source if the source > > register is set only once. > > * loop-iv.cc (find_single_def_src): Moved to df-core.cc. > > (replace_single_def_regs): Replace find_single_def_src with > > df_find_single_def_src. > > > > gcc/testsuite/ > > > > PR rtl-optimization/105638 > > * g++.target/i386/pr105638.C: New test. > > --- > > gcc/df-core.cc | 44 +++++++++++++++++++++++ > > gcc/df.h | 1 + > > gcc/dse.cc | 8 +++++ > > gcc/loop-iv.cc | 45 +----------------------- > > gcc/testsuite/g++.target/i386/pr105638.C | 44 +++++++++++++++++++++++ > > 5 files changed, 98 insertions(+), 44 deletions(-) > > create mode 100644 gcc/testsuite/g++.target/i386/pr105638.C > > > > diff --git a/gcc/df-core.cc b/gcc/df-core.cc > > index a901b84878f..e56d9bcf760 100644 > > --- a/gcc/df-core.cc > > +++ b/gcc/df-core.cc > > @@ -2009,6 +2009,50 @@ df_reg_used (rtx_insn *insn, rtx reg) > > return df_find_use (insn, reg) != NULL; > > } > > > > +/* If REG has a single definition, return its known value, otherwise return > > + null. */ > > + > > +rtx > > +df_find_single_def_src (rtx reg) > > +{ > > + rtx src = NULL_RTX; > > + > > + /* Don't look through unbounded number of single definition REG copies, > > + there might be loops for sources with uninitialized variables. */ > > + for (int cnt = 0; cnt < 128; cnt++) > > + { > > + df_ref adef = DF_REG_DEF_CHAIN (REGNO (reg)); > > + if (adef == NULL || DF_REF_NEXT_REG (adef) != NULL > > + || DF_REF_IS_ARTIFICIAL (adef) > > + || (DF_REF_FLAGS (adef) > > + & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))) > > + return NULL_RTX; > > + > > + rtx set = single_set (DF_REF_INSN (adef)); > > + if (set == NULL || !rtx_equal_p (SET_DEST (set), reg)) > > + return NULL_RTX; > > + > > + rtx note = find_reg_equal_equiv_note (DF_REF_INSN (adef)); > > + if (note && function_invariant_p (XEXP (note, 0))) > > + { > > + src = XEXP (note, 0); > > + return src; > > Seems simpler without the assignment: > > return XEXP (note, 0); > > OK with that change, thanks.
Fixed. > Richard > > This is the patch I am checking in. Thanks. -- H.J.
From 99d1fe3790275e0eaa588637f90f0ff1e5b8d117 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Wed, 18 May 2022 13:00:47 -0700 Subject: [PATCH] DSE: Use the constant store source if possible RTL DSE tracks redundant constant stores within a basic block. When RTL loop invariant motion hoists a constant initialization out of the loop into a separate basic block, the constant store value becomes unknown within the original basic block. When recording store for RTL DSE, check if the source register is set only once to a constant by a non-partial unconditional load. If yes, record the constant as the constant store source. It eliminates unrolled zero stores after memset 0 in a loop where a vector register is used as the zero store source. gcc/ PR rtl-optimization/105638 * df-core.cc (df_find_single_def_src): Moved and renamed from find_single_def_src in loop-iv.cc. Change the argument to rtx and use rtx_equal_p. Return null for partial or conditional defs. * df.h (df_find_single_def_src): New prototype. * dse.cc (record_store): Use the constant source if the source register is set only once. * loop-iv.cc (find_single_def_src): Moved to df-core.cc. (replace_single_def_regs): Replace find_single_def_src with df_find_single_def_src. gcc/testsuite/ PR rtl-optimization/105638 * g++.target/i386/pr105638.C: New test. --- gcc/df-core.cc | 41 +++++++++++++++++++++ gcc/df.h | 1 + gcc/dse.cc | 8 +++++ gcc/loop-iv.cc | 45 +----------------------- gcc/testsuite/g++.target/i386/pr105638.C | 44 +++++++++++++++++++++++ 5 files changed, 95 insertions(+), 44 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/pr105638.C diff --git a/gcc/df-core.cc b/gcc/df-core.cc index a901b84878f..e3a56bf6a9f 100644 --- a/gcc/df-core.cc +++ b/gcc/df-core.cc @@ -2009,6 +2009,47 @@ df_reg_used (rtx_insn *insn, rtx reg) return df_find_use (insn, reg) != NULL; } +/* If REG has a single definition, return its known value, otherwise return + null. */ + +rtx +df_find_single_def_src (rtx reg) +{ + rtx src = NULL_RTX; + + /* Don't look through unbounded number of single definition REG copies, + there might be loops for sources with uninitialized variables. */ + for (int cnt = 0; cnt < 128; cnt++) + { + df_ref adef = DF_REG_DEF_CHAIN (REGNO (reg)); + if (adef == NULL || DF_REF_NEXT_REG (adef) != NULL + || DF_REF_IS_ARTIFICIAL (adef) + || (DF_REF_FLAGS (adef) + & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))) + return NULL_RTX; + + rtx set = single_set (DF_REF_INSN (adef)); + if (set == NULL || !rtx_equal_p (SET_DEST (set), reg)) + return NULL_RTX; + + rtx note = find_reg_equal_equiv_note (DF_REF_INSN (adef)); + if (note && function_invariant_p (XEXP (note, 0))) + return XEXP (note, 0); + src = SET_SRC (set); + + if (REG_P (src)) + { + reg = src; + continue; + } + break; + } + if (!function_invariant_p (src)) + return NULL_RTX; + + return src; +} + /*---------------------------------------------------------------------------- Debugging and printing functions. diff --git a/gcc/df.h b/gcc/df.h index bd329205d08..71e249ad20a 100644 --- a/gcc/df.h +++ b/gcc/df.h @@ -991,6 +991,7 @@ extern df_ref df_find_def (rtx_insn *, rtx); extern bool df_reg_defined (rtx_insn *, rtx); extern df_ref df_find_use (rtx_insn *, rtx); extern bool df_reg_used (rtx_insn *, rtx); +extern rtx df_find_single_def_src (rtx); extern void df_worklist_dataflow (struct dataflow *,bitmap, int *, int); extern void df_print_regset (FILE *file, const_bitmap r); extern void df_print_word_regset (FILE *file, const_bitmap r); diff --git a/gcc/dse.cc b/gcc/dse.cc index 30c11cee034..994c60dc189 100644 --- a/gcc/dse.cc +++ b/gcc/dse.cc @@ -1508,6 +1508,14 @@ record_store (rtx body, bb_info_t bb_info) if (tem && CONSTANT_P (tem)) const_rhs = tem; + else + { + /* If RHS is set only once to a constant, set CONST_RHS + to the constant. */ + rtx def_src = df_find_single_def_src (rhs); + if (def_src != nullptr && CONSTANT_P (def_src)) + const_rhs = def_src; + } } } diff --git a/gcc/loop-iv.cc b/gcc/loop-iv.cc index 0eafe7d2362..d639336445a 100644 --- a/gcc/loop-iv.cc +++ b/gcc/loop-iv.cc @@ -1378,49 +1378,6 @@ simple_rhs_p (rtx rhs) } } -/* If REGNO has a single definition, return its known value, otherwise return - null. */ - -static rtx -find_single_def_src (unsigned int regno) -{ - rtx src = NULL_RTX; - - /* Don't look through unbounded number of single definition REG copies, - there might be loops for sources with uninitialized variables. */ - for (int cnt = 0; cnt < 128; cnt++) - { - df_ref adef = DF_REG_DEF_CHAIN (regno); - if (adef == NULL || DF_REF_NEXT_REG (adef) != NULL - || DF_REF_IS_ARTIFICIAL (adef)) - return NULL_RTX; - - rtx set = single_set (DF_REF_INSN (adef)); - if (set == NULL || !REG_P (SET_DEST (set)) - || REGNO (SET_DEST (set)) != regno) - return NULL_RTX; - - rtx note = find_reg_equal_equiv_note (DF_REF_INSN (adef)); - if (note && function_invariant_p (XEXP (note, 0))) - { - src = XEXP (note, 0); - break; - } - src = SET_SRC (set); - - if (REG_P (src)) - { - regno = REGNO (src); - continue; - } - break; - } - if (!function_invariant_p (src)) - return NULL_RTX; - - return src; -} - /* If any registers in *EXPR that have a single definition, try to replace them with the known-equivalent values. */ @@ -1433,7 +1390,7 @@ replace_single_def_regs (rtx *expr) { rtx x = *iter; if (REG_P (x)) - if (rtx new_x = find_single_def_src (REGNO (x))) + if (rtx new_x = df_find_single_def_src (x)) { *expr = simplify_replace_rtx (*expr, x, new_x); goto repeat; diff --git a/gcc/testsuite/g++.target/i386/pr105638.C b/gcc/testsuite/g++.target/i386/pr105638.C new file mode 100644 index 00000000000..ff40a459de1 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr105638.C @@ -0,0 +1,44 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-std=gnu++20 -O2 -march=skylake" } */ +/* { dg-final { scan-assembler-not "vpxor" } } */ + +#include <stdint.h> +#include <vector> +#include <tr1/array> + +class FastBoard { +public: + typedef std::pair<int, int> movescore_t; + typedef std::tr1::array<movescore_t, 24> scoredlist_t; + +protected: + std::vector<int> m_critical; + + int m_boardsize; +}; + +class FastState { +public: + FastBoard board; + + int movenum; +protected: + FastBoard::scoredlist_t scoredmoves; +}; + +class KoState : public FastState { +private: + std::vector<uint64_t> ko_hash_history; + std::vector<uint64_t> hash_history; +}; + +class GameState : public KoState { +public: + void foo (); +private: + std::vector<KoState> game_history; +}; + +void GameState::foo() { + game_history.resize(movenum); +} -- 2.36.1