On Fri, Aug 11, 2023 at 8:38 AM liuhongt <hongtao....@intel.com> wrote: > > For more details of GDS (Gather Data Sampling), refer to > https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/gather-data-sampling.html > > After microcode update, there's performance regression. To avoid that, > the patch disables gather generation in autovectorization but uses > gather scalar emulation instead. > > Ready push to trunk and backport. > any comments? Pushed to trunk and backport to releases/gcc-{11,12,13}. > > gcc/ChangeLog: > > * config/i386/i386-options.cc (m_GDS): New macro. > * config/i386/x86-tune.def (X86_TUNE_USE_GATHER_2PARTS): Don't > enable for m_GDS. > (X86_TUNE_USE_GATHER_4PARTS): Ditto. > (X86_TUNE_USE_GATHER): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx2-gather-2.c: Adjust options to keep > gather vectorization. > * gcc.target/i386/avx2-gather-6.c: Ditto. > * gcc.target/i386/avx512f-pr88464-1.c: Ditto. > * gcc.target/i386/avx512f-pr88464-5.c: Ditto. > * gcc.target/i386/avx512vl-pr88464-1.c: Ditto. > * gcc.target/i386/avx512vl-pr88464-11.c: Ditto. > * gcc.target/i386/avx512vl-pr88464-3.c: Ditto. > * gcc.target/i386/avx512vl-pr88464-9.c: Ditto. > * gcc.target/i386/pr88531-1b.c: Ditto. > * gcc.target/i386/pr88531-1c.c: Ditto. > --- > gcc/config/i386/i386-options.cc | 5 +++++ > gcc/config/i386/x86-tune.def | 6 +++--- > gcc/testsuite/gcc.target/i386/avx2-gather-2.c | 2 +- > gcc/testsuite/gcc.target/i386/avx2-gather-6.c | 2 +- > gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c | 2 +- > gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c | 2 +- > gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c | 2 +- > gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c | 2 +- > gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c | 2 +- > gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c | 2 +- > gcc/testsuite/gcc.target/i386/pr88531-1b.c | 2 +- > gcc/testsuite/gcc.target/i386/pr88531-1c.c | 2 +- > 12 files changed, 18 insertions(+), 13 deletions(-) > > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc > index 127ee24203c..e6ba33c370d 100644 > --- a/gcc/config/i386/i386-options.cc > +++ b/gcc/config/i386/i386-options.cc > @@ -141,6 +141,11 @@ along with GCC; see the file COPYING3. If not see > #define m_ARROWLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ARROWLAKE) > #define m_CORE_ATOM (m_SIERRAFOREST | m_GRANDRIDGE) > #define m_INTEL (HOST_WIDE_INT_1U<<PROCESSOR_INTEL) > +/* Gather Data Sampling / CVE-2022-40982 / INTEL-SA-00828. > + Software mitigation. */ > +#define m_GDS (m_SKYLAKE | m_SKYLAKE_AVX512 | m_CANNONLAKE \ > + | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \ > + | m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE) > > #define m_LUJIAZUI (HOST_WIDE_INT_1U<<PROCESSOR_LUJIAZUI) > > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def > index 40e04ecddbf..22d26bb0030 100644 > --- a/gcc/config/i386/x86-tune.def > +++ b/gcc/config/i386/x86-tune.def > @@ -491,7 +491,7 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, > "avoid_4byte_prefixes", > elements. */ > DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts", > ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE > - | m_ARROWLAKE | m_CORE_ATOM | m_GENERIC)) > + | m_ARROWLAKE | m_CORE_ATOM | m_GENERIC | m_GDS)) > > /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2 > elements. */ > @@ -502,7 +502,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, > "use_scatter_2parts", > elements. */ > DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts", > ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE > - | m_ARROWLAKE | m_CORE_ATOM | m_GENERIC)) > + | m_ARROWLAKE | m_CORE_ATOM | m_GENERIC | m_GDS)) > > /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4 > elements. */ > @@ -513,7 +513,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, > "use_scatter_4parts", > elements. */ > DEF_TUNE (X86_TUNE_USE_GATHER, "use_gather", > ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE | m_ARROWLAKE > - | m_CORE_ATOM | m_GENERIC)) > + | m_CORE_ATOM | m_GENERIC | m_GDS)) > > /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more > elements. */ > diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c > b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c > index ad5ef73107c..978924b0f57 100644 > --- a/gcc/testsuite/gcc.target/i386/avx2-gather-2.c > +++ b/gcc/testsuite/gcc.target/i386/avx2-gather-2.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake" } */ > +/* { dg-options "-O3 -fdump-tree-vect-details -march=skylake -mtune=haswell" > } */ > > #include "avx2-gather-1.c" > > diff --git a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c > b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c > index 47a95dbe989..0bb40ac14eb 100644 > --- a/gcc/testsuite/gcc.target/i386/avx2-gather-6.c > +++ b/gcc/testsuite/gcc.target/i386/avx2-gather-6.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details > -mtune=skylake -fno-split-loops" } */ > +/* { dg-options "-O3 -mavx2 -fno-common -fdump-tree-vect-details > -mtune=haswell -fno-split-loops" } */ > > #include "avx2-gather-5.c" > > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c > b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c > index 06d21bb0129..d1a2298618e 100644 > --- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-1.c > @@ -1,6 +1,6 @@ > /* PR tree-optimization/88464 */ > /* { dg-do compile } */ > -/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 > -mtune=skylake-avx512 -fdump-tree-vect-details" } */ > +/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell > -fdump-tree-vect-details" } */ > /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" > 4 "vect" } } */ > /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 > "vect" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c > b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c > index 462e951fdc1..d7b0b2b28cb 100644 > --- a/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr88464-5.c > @@ -1,6 +1,6 @@ > /* PR tree-optimization/88464 */ > /* { dg-do compile } */ > -/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 > -mtune=skylake-avx512 -fdump-tree-vect-details" } */ > +/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=haswell > -fdump-tree-vect-details" } */ > /* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" > 4 "vect" } } */ > /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 > "vect" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c > b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c > index 55a28dddbf8..07439185ec1 100644 > --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-1.c > @@ -1,6 +1,6 @@ > /* PR tree-optimization/88464 */ > /* { dg-do compile } */ > -/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 > -mtune=skylake-avx512 -fdump-tree-vect-details" } */ > +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell > -fdump-tree-vect-details" } */ > /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" > 4 "vect" } } */ > /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 > "vect" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c > b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c > index 9696008855d..3a98108279a 100644 > --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-11.c > @@ -1,6 +1,6 @@ > /* PR tree-optimization/88464 */ > /* { dg-do compile } */ > -/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 > -mtune=skylake-avx512 -fdump-tree-vect-details" } */ > +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell > -fdump-tree-vect-details" } */ > /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" > 4 "vect" } } */ > /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 > "vect" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c > b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c > index 6b0c8a85957..ac669e04812 100644 > --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-3.c > @@ -1,6 +1,6 @@ > /* PR tree-optimization/88464 */ > /* { dg-do compile } */ > -/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 > -mtune=skylake-avx512 -fdump-tree-vect-details" } */ > +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=128 -mtune=haswell > -fdump-tree-vect-details" } */ > /* { dg-final { scan-tree-dump-times "loop vectorized using 16 byte vectors" > 4 "vect" } } */ > /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 > "vect" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c > b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c > index 3af568ab323..14a1083b6d1 100644 > --- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c > +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88464-9.c > @@ -1,6 +1,6 @@ > /* PR tree-optimization/88464 */ > /* { dg-do compile } */ > -/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 > -mtune=skylake-avx512 -fdump-tree-vect-details" } */ > +/* { dg-options "-O3 -mavx512vl -mprefer-vector-width=256 -mtune=haswell > -fdump-tree-vect-details" } */ > /* { dg-final { scan-tree-dump-times "loop vectorized using 32 byte vectors" > 4 "vect" } } */ > /* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 > "vect" } } */ > > diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1b.c > b/gcc/testsuite/gcc.target/i386/pr88531-1b.c > index 812c8a10fab..e6df789de90 100644 > --- a/gcc/testsuite/gcc.target/i386/pr88531-1b.c > +++ b/gcc/testsuite/gcc.target/i386/pr88531-1b.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O3 -march=skylake -mfpmath=sse" } */ > +/* { dg-options "-O3 -march=skylake -mfpmath=sse -mtune=haswell" } */ > > #include "pr88531-1a.c" > > diff --git a/gcc/testsuite/gcc.target/i386/pr88531-1c.c > b/gcc/testsuite/gcc.target/i386/pr88531-1c.c > index 43fc5913ed3..a093c87c01f 100644 > --- a/gcc/testsuite/gcc.target/i386/pr88531-1c.c > +++ b/gcc/testsuite/gcc.target/i386/pr88531-1c.c > @@ -1,5 +1,5 @@ > /* { dg-do compile } */ > -/* { dg-options "-O3 -march=skylake-avx512 -mfpmath=sse" } */ > +/* { dg-options "-O3 -march=skylake-avx512 -mfpmath=sse -mtune=haswell" } */ > > #include "pr88531-1a.c" > > -- > 2.31.1 >
-- BR, Hongtao