When none of mprefer-vector-width, avx256_optimal/avx128_optimal, avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will set ix86_{move_max,store_max} as max available vector length except for AVX part.
if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) opts->x_ix86_move_max = PVW_AVX512; else opts->x_ix86_move_max = PVW_AVX128; So for -mavx2, vectorizer will choose 256-bit for vectorization, but 128-bit is used for struct copy, there could be a potential STLF issue due to this "misalign". The patch fixes that and improved 538.imagick_r by ~30% for -march=x86-64-v3 -O2. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Any comments? gcc/ChangeLog: * config/i386/i386-options.cc (ix86_option_override_internal): set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX instead of PVW_AVX128. gcc/testsuite/ChangeLog: * gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128. * gcc.target/i386/pieces-memcpy-6.c: Ditto. * gcc.target/i386/pieces-memset-38.c: Ditto. * gcc.target/i386/pieces-memset-40.c: Ditto. * gcc.target/i386/pieces-memset-41.c: Ditto. * gcc.target/i386/pieces-memset-42.c: Ditto. * gcc.target/i386/pieces-memset-43.c: Ditto. * gcc.target/i386/pieces-strcpy-2.c: Ditto. * gcc.target/i386/pieces-memcpy-22.c: New test. * gcc.target/i386/pieces-memset-51.c: New test. * gcc.target/i386/pieces-strcpy-3.c: New test. --- gcc/config/i386/i386-options.cc | 6 ++++++ gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c | 2 +- gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c | 12 ++++++++++++ gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c | 2 +- gcc/testsuite/gcc.target/i386/pieces-memset-38.c | 2 +- gcc/testsuite/gcc.target/i386/pieces-memset-40.c | 2 +- gcc/testsuite/gcc.target/i386/pieces-memset-41.c | 2 +- gcc/testsuite/gcc.target/i386/pieces-memset-42.c | 2 +- gcc/testsuite/gcc.target/i386/pieces-memset-43.c | 2 +- gcc/testsuite/gcc.target/i386/pieces-memset-51.c | 12 ++++++++++++ gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 2 +- gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c | 15 +++++++++++++++ 12 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-51.c create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index f423455b363..f79257cc764 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -3023,6 +3023,9 @@ ix86_option_override_internal (bool main_args_p, if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) opts->x_ix86_move_max = PVW_AVX512; + /* Align with vectorizer to avoid potential STLF issue. */ + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) + opts->x_ix86_move_max = PVW_AVX256; else opts->x_ix86_move_max = PVW_AVX128; } @@ -3047,6 +3050,9 @@ ix86_option_override_internal (bool main_args_p, if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) opts->x_ix86_store_max = PVW_AVX512; + /* Align with vectorizer to avoid potential STLF issue. */ + else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) + opts->x_ix86_store_max = PVW_AVX256; else opts->x_ix86_store_max = PVW_AVX128; } diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c index 5faee21f9b9..53ad0b3be44 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c new file mode 100644 index 00000000000..605b3623ffc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *dst, *src; + +void +foo (void) +{ + __builtin_memcpy (dst, src, 33); +} + +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c index 5f99cc98c47..cfd2a86cf33 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst, *src; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c index ed4a24a54fd..ddd194debd5 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c index 86358c99a83..5878876550c 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ /* Cope with --enable-frame-pointer, Solaris/x86 -mstackrealign default. */ /* { dg-additional-options "-fomit-frame-pointer -mno-stackrealign" } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c index d7a27f52983..27a6c8ad139 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */ /* Cope with --enable-frame-pointer. */ /* { dg-additional-options "-fomit-frame-pointer" } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c index df0c122aae7..103da699ae5 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c index 2f2179c2df9..f1494e17610 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *dst; diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c new file mode 100644 index 00000000000..192ec0d1647 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *dst; + +void +foo (int x) +{ + __builtin_memset (dst, x, 64); +} + +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c index 90446edb4f3..9bb94b7419b 100644 --- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ extern char *strcpy (char *, const char *); diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c new file mode 100644 index 00000000000..df7571b547f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ + +extern char *strcpy (char *, const char *); + +void +foo (char *s) +{ + strcpy (s, + "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" + "1234567"); +} + +/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ -- 2.31.1