On Fri, Sep 17, 2021 at 5:15 AM Cui, Lili <lili....@intel.com> wrote: > > > > -----Original Message----- > > From: Uros Bizjak <ubiz...@gmail.com> > > Sent: Thursday, September 16, 2021 2:28 PM > > To: Cui, Lili <lili....@intel.com> > > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao <hongtao....@intel.com>; H. J. Lu > > <hjl.to...@gmail.com> > > Subject: Re: [PATCH 3/4] [PATCH 3/4] x86: Properly handle > > USE_VECTOR_FP_CONVERTS/USE_VECTOR_CONVERTS > > > > On Wed, Sep 15, 2021 at 10:10 AM <lili....@intel.com> wrote: > > > > > > From: "H.J. Lu" <hjl.to...@gmail.com> > > > > > > Check TARGET_USE_VECTOR_FP_CONVERTS or > > TARGET_USE_VECTOR_CONVERTS when > > > handling avx_partial_xmm_update attribute. Don't convert AVX partial > > > XMM register update if vector packed SSE conversion should be used. > > > > > > gcc/ > > > > > > PR target/101900 > > > * config/i386/i386-features.c (remove_partial_avx_dependency): > > > Check TARGET_USE_VECTOR_FP_CONVERTS and > > TARGET_USE_VECTOR_CONVERTS > > > before generating vxorps. > > > > > > gcc/ > > > > > > PR target/101900 > > > * testsuite/gcc.target/i386/pr101900-1.c: New test. > > > * testsuite/gcc.target/i386/pr101900-2.c: Likewise. > > > * testsuite/gcc.target/i386/pr101900-3.c: Likewise. > > > --- > > > gcc/config/i386/i386-features.c | 21 ++++++++++++++++++--- > > > gcc/testsuite/gcc.target/i386/pr101900-1.c | 18 ++++++++++++++++++ > > > gcc/testsuite/gcc.target/i386/pr101900-2.c | 18 ++++++++++++++++++ > > > gcc/testsuite/gcc.target/i386/pr101900-3.c | 19 +++++++++++++++++++ > > > 4 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 > > > gcc/testsuite/gcc.target/i386/pr101900-1.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-2.c > > > create mode 100644 gcc/testsuite/gcc.target/i386/pr101900-3.c > > > > > > diff --git a/gcc/config/i386/i386-features.c > > > b/gcc/config/i386/i386-features.c index 5a99ea7c046..ae5ea02a002 > > > 100644 > > > --- a/gcc/config/i386/i386-features.c > > > +++ b/gcc/config/i386/i386-features.c > > > @@ -2210,15 +2210,30 @@ remove_partial_avx_dependency (void) > > > != AVX_PARTIAL_XMM_UPDATE_TRUE) > > > continue; > > > > > > - if (!v4sf_const0) > > > - v4sf_const0 = gen_reg_rtx (V4SFmode); > > > - > > > /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, > > > SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and > > > vec_merge with subreg. */ > > > rtx src = SET_SRC (set); > > > rtx dest = SET_DEST (set); > > > machine_mode dest_mode = GET_MODE (dest); > > > + machine_mode src_mode; > > > + > > > + if (TARGET_USE_VECTOR_FP_CONVERTS) > > > + { > > > + src_mode = GET_MODE (XEXP (src, 0)); > > > + if (src_mode == E_SFmode || src_mode == E_DFmode) > > > + continue; > > > + } > > > + > > > + if (TARGET_USE_VECTOR_CONVERTS) > > > + { > > > + src_mode = GET_MODE (XEXP (src, 0)); > > > + if (src_mode == E_SImode || src_mode == E_DImode) > > > + continue; > > > + } > > > + > > > + if (!v4sf_const0) > > > + v4sf_const0 = gen_reg_rtx (V4SFmode); > > > > Please better move initialization of src_mode to the top of the new hunk, > > like: > > > > machine_mode src_mode = GET_MODE (XEXP (src, 0)); switch (src_mode) { > > case E_SFmode: > > case E_DFmode: > > if (TARGET_USE_VECTOR_FP_CONVERTS) > > continue; > > break; > > case E_SImode: > > case E_DImode: > > if (TARGET_USE_VECTOR_CONVERTS) > > continue; > > break; > > default: > > break; > > } > > > > or something like the above. > > Done, thanks for your good advice, I also rebased patch 4/4, since it is > based on patch 3/4.
OK. Thanks, Uros. > > Changed it to: > > + machine_mode src_mode = GET_MODE (XEXP (src, 0)); > + > + switch (src_mode) > + { > + case E_SFmode: > + case E_DFmode: > + if (TARGET_USE_VECTOR_FP_CONVERTS) > + continue; > + break; > + case E_SImode: > + case E_DImode: > + if (TARGET_USE_VECTOR_CONVERTS) > + continue; > + break; > + default: > + break; > + } > + if (!v4sf_const0) > + v4sf_const0 = gen_reg_rtx (V4SFmode); > > Thanks, > Lili. > > > > > Uros. > > > > > > > > rtx zero; > > > machine_mode dest_vecmode; > > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-1.c > > > b/gcc/testsuite/gcc.target/i386/pr101900-1.c > > > new file mode 100644 > > > index 00000000000..0a45f8e340a > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-1.c > > > @@ -0,0 +1,18 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse > > > +-mtune-ctrl=use_vector_fp_converts" } */ > > > + > > > +extern float f; > > > +extern double d; > > > +extern int i; > > > + > > > +void > > > +foo (void) > > > +{ > > > + d = f; > > > + f = i; > > > +} > > > + > > > +/* { dg-final { scan-assembler "vcvtps2pd" } } */ > > > +/* { dg-final { scan-assembler "vcvtsi2ssl" } } */ > > > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ > > > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } > > > +} */ > > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-2.c > > > b/gcc/testsuite/gcc.target/i386/pr101900-2.c > > > new file mode 100644 > > > index 00000000000..c8b2d1da5ae > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-2.c > > > @@ -0,0 +1,18 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse > > > +-mtune-ctrl=use_vector_converts" } */ > > > + > > > +extern float f; > > > +extern double d; > > > +extern int i; > > > + > > > +void > > > +foo (void) > > > +{ > > > + d = f; > > > + f = i; > > > +} > > > + > > > +/* { dg-final { scan-assembler "vcvtss2sd" } } */ > > > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ > > > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ > > > +/* { dg-final { scan-assembler-times "vxorps\[^\n\r\]*xmm\[0-9\]" 1 } > > > +} */ > > > diff --git a/gcc/testsuite/gcc.target/i386/pr101900-3.c > > > b/gcc/testsuite/gcc.target/i386/pr101900-3.c > > > new file mode 100644 > > > index 00000000000..6ee565b5bd4 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/pr101900-3.c > > > @@ -0,0 +1,19 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-O2 -march=skylake -mfpmath=sse > > > +-mtune-ctrl=use_vector_fp_converts,use_vector_converts" } */ > > > + > > > +extern float f; > > > +extern double d; > > > +extern int i; > > > + > > > +void > > > +foo (void) > > > +{ > > > + d = f; > > > + f = i; > > > +} > > > + > > > +/* { dg-final { scan-assembler "vcvtps2pd" } } */ > > > +/* { dg-final { scan-assembler "vcvtdq2ps" } } */ > > > +/* { dg-final { scan-assembler-not "vcvtss2sd" } } */ > > > +/* { dg-final { scan-assembler-not "vcvtsi2ssl" } } */ > > > +/* { dg-final { scan-assembler-not "vxorps" } } */ > > > -- > > > 2.17.1 > > >