On Thu, Dec 2, 2021 at 4:27 PM liuhongt <hongtao....@intel.com> wrote: > > The patch helps reload to choose GENENRAL_REGS alternatives for > SSE_FLOAT_MODE and enabled optimization like > > - vmovd %xmm0, -4(%rsp) > - movl $1, %eax > - addl -4(%rsp), %eax > + movd %xmm0, %eax > + addl $1, %eax > > Bootstrapped anf regtested on x86_64-pc-linux-gnu{-m32,} and > x86_64-pc-linux-gnu{-m32\ march=cascadelake,\ -march=cadcadelake}. > > No big performace impact is abserved for SPEC2017 on ICX/CLX with both > Ofast -march=native -flto -funroll-loops and -O2 -mtune=generic options. > > Ok for trunk? > > gcc/ChangeLog: > > PR target/95740 > * config/i386/i386.c (ix86_preferred_reload_class): Prefer > INT_SSE_REGS for SSE_FLOAT_MODE_P. > * config/i386/i386.h (INT_SSE_CLASS_P): New. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr95740.c: New test. > --- > gcc/config/i386/i386.c | 5 +++-- > gcc/config/i386/i386.h | 2 ++ > gcc/testsuite/gcc.target/i386/pr95740.c | 26 +++++++++++++++++++++++++ > 3 files changed, 31 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr95740.c > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 80fee627358..977af1c31a7 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -19194,9 +19194,10 @@ ix86_preferred_reload_class (rtx x, reg_class_t > regclass) > return NO_REGS; > } > > - /* Prefer SSE regs only, if we can use them for math. */ > + /* Prefer INT_SSE_REGS, enable reload from SSE register to GENERAL_REGS, > + refer to PR95740. */ > if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) > - return SSE_CLASS_P (regclass) ? regclass : NO_REGS; > + return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS; > > /* Generally when we see PLUS here, it's the function invariant > (plus soft-fp const_int). Which can only be computed into general > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 2fda1e0686e..ec90e47904b 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -1283,6 +1283,8 @@ enum reg_class > reg_class_subset_p ((CLASS), FLOAT_REGS) > #define SSE_CLASS_P(CLASS) \ > reg_class_subset_p ((CLASS), ALL_SSE_REGS) > +#define INT_SSE_CLASS_P(CLASS) \ > + reg_class_subset_p ((CLASS), INT_SSE_REGS) > #define MMX_CLASS_P(CLASS) \ > ((CLASS) == MMX_REGS) > #define MASK_CLASS_P(CLASS) \ > diff --git a/gcc/testsuite/gcc.target/i386/pr95740.c > b/gcc/testsuite/gcc.target/i386/pr95740.c > new file mode 100644 > index 00000000000..9bc7b862787 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr95740.c > @@ -0,0 +1,26 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-msse2 -O2 -mtune-ctrl=use_incdec -masm=att -mfpmath=sse" } > */ > +/* { dg-final { scan-assembler-times {(?n)movd[\t ]*%xmm0.*%eax} 1 } } */ > +/* { dg-final { scan-assembler-times {(?n)incl[\t ]*%eax} 1 } } */ > +/* { dg-final { scan-assembler-times {(?n)movq[\t ]*%xmm0.*%rax} 1 } } */ > +/* { dg-final { scan-assembler-times {(?n)incq[\t ]*%rax} 1 } } */ > + > +int > +foo (float a) > +{ > + union{ > + int b; > + float a;}u; > + u.a = a; > + return u.b + 1; > +} > + > +long long > +foo1 (double a) > +{ > + union{ > + long long b; > + double a;}u; > + u.a = a; > + return u.b + 1; > +} > -- > 2.18.1 >
-- BR, Hongtao