On Fri, Aug 21, 2020 at 9:45 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > On Fri, Aug 21, 2020 at 9:35 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > On Fri, Aug 21, 2020 at 9:29 AM Hongtao Liu <crazy...@gmail.com> wrote: > > > > > > On Fri, Aug 21, 2020 at 11:50 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > > > On Fri, Aug 21, 2020 at 5:41 PM Hongtao Liu <crazy...@gmail.com> wrote: > > > > > > > > > > On Fri, Aug 21, 2020 at 9:15 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > > > > > > > > > > gcc/ > > > > > > > > > PR target/88808 > > > > > > > > > * config/i386/i386.c (ix86_preferred_reload_class): > > > > > > > > > Allow > > > > > > > > > QImode data go into mask registers. > > > > > > > > > * config/i386/i386.md: (*movhi_internal): Adjust > > > > > > > > > constraints > > > > > > > > > for mask registers. > > > > > > > > > (*movqi_internal): Ditto. > > > > > > > > > (*anddi_1): Support mask register operations > > > > > > > > > (*and<mode>_1): Ditto. > > > > > > > > > (*andqi_1): Ditto. > > > > > > > > > (*andn<mode>_1): Ditto. > > > > > > > > > (*<code><mode>_1): Ditto. > > > > > > > > > (*<code>qi_1): Ditto. > > > > > > > > > (*one_cmpl<mode>2_1): Ditto. > > > > > > > > > (*one_cmplsi2_1_zext): Ditto. > > > > > > > > > (*one_cmplqi2_1): Ditto. > > > > > > > > > (define_peephole2): Move constant 0/-1 directly into > > > > > > > > > mask > > > > > > > > > registers. > > > > > > > > > * config/i386/predicates.md (mask_reg_operand): New > > > > > > > > > predicate. > > > > > > > > > * config/i386/sse.md (define_split): Add post-reload > > > > > > > > > splitters > > > > > > > > > that would convert "generic" patterns to mask > > > > > > > > > patterns. > > > > > > > > > (*knotsi_1_zext): New define_insn. > > > > > > > > > > > > > > > > > > gcc/testsuite/ > > > > > > > > > * gcc.target/i386/bitwise_mask_op-1.c: New test. > > > > > > > > > * gcc.target/i386/bitwise_mask_op-2.c: New test. > > > > > > > > > * gcc.target/i386/bitwise_mask_op-3.c: New test. > > > > > > > > > * gcc.target/i386/avx512bw-pr88465.c: New testcase. > > > > > > > > > * gcc.target/i386/avx512bw-kunpckwd-1.c: Adjust > > > > > > > > > testcase. > > > > > > > > > * gcc.target/i386/avx512bw-kunpckwd-3.c: Ditto. > > > > > > > > > * gcc.target/i386/avx512dq-kmovb-5.c: Ditto. > > > > > > > > > * gcc.target/i386/avx512f-kmovw-5.c: Ditto. > > > > > > > > > > > > > > > > A little nit, please put new splitters after the instruction > > > > > > > > pattern. > > > > > > > > > > > > > > > > OK for the whole patch set with the above change, > > > > > > > > > > > > > > > > > > > > > > Yes, thanks for the review. > > > > > > > > > > > > Please note that your patch introduces several testsuite fails with > > > > > > -m32: > > > > > > > > > > > > gcc -O2 -mavx512bitalg -mavx512bw -m32 -g avx512bitalg-vpopcntb-1.c > > > > > > > > > > > > > > > > I can't reproduce this failure. > > > > > > > > Because you are running it on AVX512 enabled target. > > > > > > > > > > Program received signal SIGILL, Illegal instruction. > > > > > > 0x080490ac in __get_cpuid_count (__edx=<synthetic pointer>, > > > > > > __ecx=<synthetic pointer>, __ebx=<synthetic pointer>, > > > > > > __eax=<synthetic > > > > > > pointer>, > > > > > > __subleaf=0, __leaf=7) at > > > > > > /hdd/uros/gcc-build-fast/gcc/include/cpuid.h:316 > > > > > > 316 __cpuid_count (__leaf, __subleaf, *__eax, *__ebx, *__ecx, > > > > > > *__edx); > > > > > > > > > > > > 0x080490a3 <+51>: cpuid > > > > > > 0x080490a5 <+53>: mov $0x1,%eax > > > > > > 0x080490aa <+58>: mov %ecx,%esi > > > > > > => 0x080490ac <+60>: kmovd %ebx,%k0 > > > > > > 0x080490b0 <+64>: mov %edi,%ecx > > > > > > 0x080490b2 <+66>: mov %edi,%ebx > > > > > > > > > > > > kmov insn is generated for __cpuid_count function, where the binary > > > > > > determines, if the new instructions are supported. The binary will > > > > > > crash in the detection code if the processor lacks AVX512 > > > > > > instructions. > > > > > > > > > > > > > > > > IMHO, the testcase shouldn't be run on processors without AVX512BW. > > > > > > > > No, it could run, because it checks for AVX512BW at runtime. > > > > > > > > > > Got it. > > > > > > > > Because in avx512bitalg-vpopcntb-1.c, there's /* > > > > > dg-require-effective-target avx512bw } */. > > > > > > > > This is to check the toolchain for support. > > > > > > > > > what's the version of your assembler? > > > > > > > > GNU assembler version 2.34-4.fc32 > > > > > > > > > > If assembler supports avx512bw, but processor not, the test would pass > > > condition `dg-require-effective-target avx512bw` and be runned. > > > then crashed for illegal instruction. > > > > > > > Please add something like > > > > X86_TUNE_INTER_UNIT_MOVES_FROM_MASK/X86_TUNE_INTER_UNIT_MOVES_TO_MASK > > > > and enable them only for m_CORE_AVX512 (or perhaps m_INTEL). > > > > > > > > Handle this in inline_secondary_memory_needed to reject direct moves > > > > for all other targets. This should disable direct moves for generic > > > > targets. > > > > > > > > > > Yes, I'll add it. > > > > > > > > > (define_insn "*movsi_internal" > > [(set (match_operand:SI 0 "nonimmediate_operand" > > "=r,m ,*y,*y,?*y,?m,?r,?*y,*v,*v,*v,m ,?r,?*v,*k,*k ,*rm,*k") > > (match_operand:SI 1 "general_operand" > > "g ,re,C ,*y,m ,*y,*y,r ,C ,*v,m ,*v,*v,r ,*r,*km,*k ,CBC"))] > > "!(MEM_P (operands[0]) && MEM_P (operands[1]))" > > ... > > [(set (attr "isa") > > (cond [(eq_attr "alternative" "12,13") > > (const_string "sse2") > > ] > > (const_string "*"))) > > > > is wrong. mask register alternatives should be marked with avx512f. > > Please fix it. Other integer move patterns may have the same issue. > > Once these are fixed, > > > > diff --git a/gcc/testsuite/gcc.target/i386/avx512-check.h > > b/gcc/testsuite/gcc.target/i386/avx512-check.h > > index 0a377dba1d5..576e9b390c6 100644 > > --- a/gcc/testsuite/gcc.target/i386/avx512-check.h > > +++ b/gcc/testsuite/gcc.target/i386/avx512-check.h > > @@ -25,6 +25,7 @@ do_test (void) > > } > > #endif > > > > +__attribute__((target ("no-avx512f"))) > > static int > > check_osxsave (void) > > { > > @@ -34,6 +35,7 @@ check_osxsave (void) > > return (ecx & bit_OSXSAVE) != 0; > > } > > > > +__attribute__((target ("no-avx512f"))) > > int > > main () > > { > > > > should work. > > > > Like this. You need to check all integer patterns with mskmov and msklog.
Compile CPUID check with "-mno-sse -mfpmath=387" to disable SSE, AVX and AVX512 during CPUID check to avoid vector and mask register operations. Note: -mfpmath=387 is needed to override -mfpmath=sse. OK for master branch? -- H.J.
From 7e1ccab16bfbea9c6836296936e87e783290db24 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Fri, 21 Aug 2020 09:42:49 -0700 Subject: [PATCH] x86: Disable SSE, AVX and AVX512 during CPUID check Compile CPUID check with "-mno-sse -mfpmath=387" to disable SSE, AVX and AVX512 during CPUID check to avoid vector and mask register operations. Note: -mfpmath=387 is needed to override -mfpmath=sse. gcc/ PR target/96744 * config/i386/cpuid.h: Add #pragma GCC target("no-sse,fpmath=387") to disable SSE, AVX and AVX512. gcc/testsuite/ PR target/96744 * gcc.target/i386/adx-check.h: Add #pragma GCC target("no-sse,fpmath=387") to disable SSE, AVX and AVX512. * gcc.target/i386/avx2-check.h: Likewise. * gcc.target/i386/avx512-check.h: Likewise. * gcc.target/i386/bmi-check.h: Likewise. * gcc.target/i386/bmi2-check.h: Likewise. * gcc.target/i386/pr77756.c: Likewise. * gcc.target/i386/pr95973.c: Likewise. * gcc.target/i386/rtm-check.h: Likewise. * gcc.target/i386/sha-check.h: Likewise. --- gcc/config/i386/cpuid.h | 5 +++++ gcc/testsuite/gcc.target/i386/adx-check.h | 4 ++++ gcc/testsuite/gcc.target/i386/avx2-check.h | 5 +++++ gcc/testsuite/gcc.target/i386/avx512-check.h | 5 +++++ gcc/testsuite/gcc.target/i386/bmi-check.h | 5 +++++ gcc/testsuite/gcc.target/i386/bmi2-check.h | 5 +++++ gcc/testsuite/gcc.target/i386/pr77756.c | 5 +++++ gcc/testsuite/gcc.target/i386/pr95973.c | 5 +++++ gcc/testsuite/gcc.target/i386/rtm-check.h | 5 +++++ gcc/testsuite/gcc.target/i386/sha-check.h | 5 +++++ 10 files changed, 49 insertions(+) diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index bca61d620db..01aba002bcf 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -24,6 +24,9 @@ #ifndef _CPUID_H_INCLUDED #define _CPUID_H_INCLUDED +#pragma GCC push_options +#pragma GCC target("no-sse,fpmath=387") + /* %eax */ #define bit_AVX512BF16 (1 << 5) @@ -324,4 +327,6 @@ __cpuidex (int __cpuid_info[4], int __leaf, int __subleaf) __cpuid_info[2], __cpuid_info[3]); } +#pragma GCC pop_options + #endif /* _CPUID_H_INCLUDED */ diff --git a/gcc/testsuite/gcc.target/i386/adx-check.h b/gcc/testsuite/gcc.target/i386/adx-check.h index cfed1a38483..942f248df43 100644 --- a/gcc/testsuite/gcc.target/i386/adx-check.h +++ b/gcc/testsuite/gcc.target/i386/adx-check.h @@ -8,6 +8,9 @@ static void __attribute__ ((noinline)) do_test (void) adx_test (); } +#pragma GCC push_options +#pragma GCC target("no-sse,fpmath=387") + int main () { @@ -32,3 +35,4 @@ main () return 0; } +#pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/i386/avx2-check.h b/gcc/testsuite/gcc.target/i386/avx2-check.h index 25bed5e0da6..861308ceb5c 100644 --- a/gcc/testsuite/gcc.target/i386/avx2-check.h +++ b/gcc/testsuite/gcc.target/i386/avx2-check.h @@ -10,6 +10,9 @@ static void __attribute__ ((noinline)) do_test (void) avx2_test (); } +#pragma GCC push_options +#pragma GCC target("no-sse,fpmath=387") + static int check_osxsave (void) { @@ -42,3 +45,5 @@ main () #endif return 0; } + +#pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/i386/avx512-check.h b/gcc/testsuite/gcc.target/i386/avx512-check.h index 0a377dba1d5..74e1cce16c3 100644 --- a/gcc/testsuite/gcc.target/i386/avx512-check.h +++ b/gcc/testsuite/gcc.target/i386/avx512-check.h @@ -25,6 +25,9 @@ do_test (void) } #endif +#pragma GCC push_options +#pragma GCC target("no-sse,fpmath=387") + static int check_osxsave (void) { @@ -110,3 +113,5 @@ main () #endif return 0; } + +#pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/i386/bmi-check.h b/gcc/testsuite/gcc.target/i386/bmi-check.h index 1973f3b6468..35b46528d8c 100644 --- a/gcc/testsuite/gcc.target/i386/bmi-check.h +++ b/gcc/testsuite/gcc.target/i386/bmi-check.h @@ -12,6 +12,9 @@ do_test (void) bmi_test (); } +#pragma GCC push_options +#pragma GCC target("no-sse,fpmath=387") + int main () { @@ -35,3 +38,5 @@ main () #endif return 0; } + +#pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/i386/bmi2-check.h b/gcc/testsuite/gcc.target/i386/bmi2-check.h index ba91ef9b780..06e738e704d 100644 --- a/gcc/testsuite/gcc.target/i386/bmi2-check.h +++ b/gcc/testsuite/gcc.target/i386/bmi2-check.h @@ -11,6 +11,9 @@ do_test (void) bmi2_test (); } +#pragma GCC push_options +#pragma GCC target("no-sse,fpmath=387") + int main () { @@ -34,3 +37,5 @@ main () #endif return 0; } + +#pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/i386/pr77756.c b/gcc/testsuite/gcc.target/i386/pr77756.c index 1eee7cd5a00..3d2f371d1f0 100644 --- a/gcc/testsuite/gcc.target/i386/pr77756.c +++ b/gcc/testsuite/gcc.target/i386/pr77756.c @@ -2,6 +2,9 @@ #include "cpuid.h" +#pragma GCC push_options +#pragma GCC target("no-sse,fpmath=387") + int main () { @@ -20,3 +23,5 @@ main () return 0; } + +#pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/i386/pr95973.c b/gcc/testsuite/gcc.target/i386/pr95973.c index 08c7dba8f46..f1d3b7870a3 100644 --- a/gcc/testsuite/gcc.target/i386/pr95973.c +++ b/gcc/testsuite/gcc.target/i386/pr95973.c @@ -4,6 +4,9 @@ #include <cpuid.h> #include <cpuid.h> +#pragma GCC push_options +#pragma GCC target("no-sse,fpmath=387") + int main () { @@ -23,3 +26,5 @@ main () return 0; } + +#pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/i386/rtm-check.h b/gcc/testsuite/gcc.target/i386/rtm-check.h index bdb5a6dc0bf..f131a4135ca 100644 --- a/gcc/testsuite/gcc.target/i386/rtm-check.h +++ b/gcc/testsuite/gcc.target/i386/rtm-check.h @@ -8,6 +8,9 @@ static void __attribute__ ((noinline)) do_test (void) rtm_test (); } +#pragma GCC push_options +#pragma GCC target("no-sse,fpmath=387") + int main () { @@ -31,3 +34,5 @@ main () #endif return 0; } + +#pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/i386/sha-check.h b/gcc/testsuite/gcc.target/i386/sha-check.h index 5bc5a59ab80..2c2c4e94403 100644 --- a/gcc/testsuite/gcc.target/i386/sha-check.h +++ b/gcc/testsuite/gcc.target/i386/sha-check.h @@ -10,6 +10,9 @@ do_test (void) sha_test (); } +#pragma GCC push_options +#pragma GCC target("no-sse,fpmath=387") + int main () { @@ -33,3 +36,5 @@ main () #endif return 0; } + +#pragma GCC pop_options -- 2.26.2