> -----Original Message----- > From: H.J. Lu <hjl.to...@gmail.com> > Sent: Wednesday, September 15, 2021 10:14 PM > To: Cui, Lili <lili....@intel.com> > Cc: Uros Bizjak <ubiz...@gmail.com>; GCC Patches <gcc- > patc...@gcc.gnu.org>; Liu, Hongtao <hongtao....@intel.com> > Subject: Re: [PATCH 4/4] [PATCH 4/4] x86: Add > TARGET_SSE_PARTIAL_REG_[FP_]CONVERTS_DEPENDENCY > > There is no need to add [PATCH N/4] in the first line of the git commit > message. "git format-patch" or "git send-email" will add them automatically. > Thanks for the reminder, I didn't notice it before.
> On Wed, Sep 15, 2021 at 1:10 AM <lili....@intel.com> wrote: > > > > From: "H.J. Lu" <hjl.to...@gmail.com> > > > > 1. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with > > TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY in SSE FP to FP > splitters. > > 2. Replace TARGET_SSE_PARTIAL_REG_DEPENDENCY with > > TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY in SSE INT to FP > splitters. > > 3. Also check TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY > and > > TARGET_SSE_PARTIAL_REG_DEPENDENCY when handling > avx_partial_xmm_update > > attribute. Don't convert AVX partial XMM register update if there is > > no partial SSE register dependency for SSE conversion. > > > > gcc/ > > > > * config/i386/i386-features.c (remove_partial_avx_dependency): > > Also check TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY > and > > and TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY before > generating > > vxorps. > > * config/i386/i386.h > (TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): > > New. > > (TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise. > > * config/i386/i386.md (SSE FP to FP splitters): Replace > > TARGET_SSE_PARTIAL_REG_DEPENDENCY with > > TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY. > > (SSE INT to FP splitter): Replace > TARGET_SSE_PARTIAL_REG_DEPENDENCY > > with TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY. > > * config/i386/x86-tune.def > > (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): New. > > (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Likewise. > > > > gcc/testsuite/ > > > > * gcc.target/i386/avx-covert-1.c: New file. > > * gcc.target/i386/avx-fp-covert-1.c: Likewise. > > * gcc.target/i386/avx-int-covert-1.c: Likewise. > > * gcc.target/i386/sse-covert-1.c: Likewise. > > * gcc.target/i386/sse-fp-covert-1.c: Likewise. > > * gcc.target/i386/sse-int-covert-1.c: Likewise. > > --- > > gcc/config/i386/i386-features.c | 6 ++++-- > > gcc/config/i386/i386.h | 4 ++++ > > gcc/config/i386/i386.md | 9 ++++++--- > > gcc/config/i386/x86-tune.def | 15 +++++++++++++++ > > gcc/testsuite/gcc.target/i386/avx-covert-1.c | 19 +++++++++++++++++++ > > .../gcc.target/i386/avx-fp-covert-1.c | 15 +++++++++++++++ > > .../gcc.target/i386/avx-int-covert-1.c | 14 ++++++++++++++ > > gcc/testsuite/gcc.target/i386/sse-covert-1.c | 19 +++++++++++++++++++ > > .../gcc.target/i386/sse-fp-covert-1.c | 15 +++++++++++++++ > > .../gcc.target/i386/sse-int-covert-1.c | 14 ++++++++++++++ > > 10 files changed, 125 insertions(+), 5 deletions(-) create mode > > 100644 gcc/testsuite/gcc.target/i386/avx-covert-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/avx-int-covert-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/sse-covert-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c > > create mode 100644 gcc/testsuite/gcc.target/i386/sse-int-covert-1.c > > > > diff --git a/gcc/config/i386/i386-features.c > > b/gcc/config/i386/i386-features.c index ae5ea02a002..91bfa06d4bf > > 100644 > > --- a/gcc/config/i386/i386-features.c > > +++ b/gcc/config/i386/i386-features.c > > @@ -2218,14 +2218,16 @@ remove_partial_avx_dependency (void) > > machine_mode dest_mode = GET_MODE (dest); > > machine_mode src_mode; > > > > - if (TARGET_USE_VECTOR_FP_CONVERTS) > > + if (TARGET_USE_VECTOR_FP_CONVERTS > > + || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY) > > { > > src_mode = GET_MODE (XEXP (src, 0)); > > if (src_mode == E_SFmode || src_mode == E_DFmode) > > continue; > > } > > > > - if (TARGET_USE_VECTOR_CONVERTS) > > + if (TARGET_USE_VECTOR_CONVERTS > > + || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY) > > { > > src_mode = GET_MODE (XEXP (src, 0)); > > if (src_mode == E_SImode || src_mode == E_DImode) diff > > --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index > > e76bb55c080..ec60b89753e 100644 > > --- a/gcc/config/i386/i386.h > > +++ b/gcc/config/i386/i386.h > > @@ -334,6 +334,10 @@ extern unsigned char > ix86_tune_features[X86_TUNE_LAST]; > > ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY] > > #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \ > > ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY] > > +#define TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY \ > > + > > > +ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDE > NCY] > > +#define TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY \ > > + > > > +ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENC > Y] > > #define TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \ > > ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL] > > #define TARGET_SSE_UNALIGNED_STORE_OPTIMAL \ diff --git > > a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index > > 13f6f57cdcc..c82a9dc1f67 100644 > > --- a/gcc/config/i386/i386.md > > +++ b/gcc/config/i386/i386.md > > @@ -4535,7 +4535,8 @@ > > (float_extend:DF > > (match_operand:SF 1 "nonimmediate_operand")))] > > "!TARGET_AVX > > - && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed > > + && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY > > + && epilogue_completed > > && optimize_function_for_speed_p (cfun) > > && (!REG_P (operands[1]) > > || (!TARGET_AVX && REGNO (operands[0]) != REGNO > > (operands[1]))) @@ -4708,7 +4709,8 @@ > > (float_truncate:SF > > (match_operand:DF 1 "nonimmediate_operand")))] > > "!TARGET_AVX > > - && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed > > + && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY > > + && epilogue_completed > > && optimize_function_for_speed_p (cfun) > > && (!REG_P (operands[1]) > > || (!TARGET_AVX && REGNO (operands[0]) != REGNO > > (operands[1]))) @@ -5243,7 +5245,8 @@ > > [(set (match_operand:MODEF 0 "sse_reg_operand") > > (float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))] > > "!TARGET_AVX > > - && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed > > + && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY > > + && epilogue_completed > > && optimize_function_for_speed_p (cfun) > > && (!EXT_REX_SSE_REG_P (operands[0]) > > || TARGET_AVX512VL)" > > diff --git a/gcc/config/i386/x86-tune.def > > b/gcc/config/i386/x86-tune.def index 088edb6c4ca..58e8ead56b4 100644 > > --- a/gcc/config/i386/x86-tune.def > > +++ b/gcc/config/i386/x86-tune.def > > @@ -64,6 +64,21 @@ DEF_TUNE > (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", > > m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | > m_AMDFAM10 > > | m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC) > > > > +/* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob > avoids > > + partial write to the destination in scalar SSE conversion from FP > > + to FP. */ > > +DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY, > > + "sse_partial_reg_fp_converts_dependency", > > + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | > m_AMDFAM10 > > + | m_BDVER | m_ZNVER | m_GENERIC) > > I thought we wanted to enable this for Tremont. > From the latest test, enabling Tremont here will cause a 2.8% regression to 538.imagic_r. Thanks, Lili. > > + > > +/* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob > avoids partial > > + write to the destination in scalar SSE conversion from integer to > > +FP. */ DEF_TUNE > (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, > > + "sse_partial_reg_converts_dependency", > > + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | > m_AMDFAM10 > > + | m_BDVER | m_ZNVER | m_GENERIC) > > + > > /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and > dependencies > > are resolved on SSE register parts instead of whole registers, so we may > > maintain just lower part of scalar values in proper format leaving > > the > > -- > H.J.