On Tue, Aug 8, 2023 at 3:16 PM Haochen Jiang via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > gcc/ChangeLog: > > * common/config/i386/cpuinfo.h (get_available_features): > Add avx10_set and version and detect avx10.1. > (cpu_indicator_init): Handle avx10.1-512. > * common/config/i386/i386-common.cc > (OPTION_MASK_ISA2_AVX10_512BIT_SET): New. > (OPTION_MASK_ISA2_AVX10_1_SET): Ditto. > (OPTION_MASK_ISA2_AVX10_512BIT_UNSET): Ditto. > (OPTION_MASK_ISA2_AVX10_1_UNSET): Ditto. > (OPTION_MASK_ISA2_AVX2_UNSET): Modify for AVX10_1. > (ix86_handle_option): Handle -mavx10.1, -mavx10.1-256 and > -mavx10.1-512. > * common/config/i386/i386-cpuinfo.h (enum processor_features): > Add FEATURE_AVX10_512BIT, FEATURE_AVX10_1 and > FEATURE_AVX10_512BIT. > * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for > AVX10_512BIT, AVX10_1 and AVX10_1_512. > * config/i386/constraints.md (Yk): Add AVX10_1. > (Yv): Ditto. > (k): Ditto. > * config/i386/cpuid.h (bit_AVX10): New. > (bit_AVX10_256): Ditto. > (bit_AVX10_512): Ditto. > * config/i386/i386-c.cc (ix86_target_macros_internal): > Define AVX10_512BIT and AVX10_1. > * config/i386/i386-isa.def > (AVX10_512BIT): Add DEF_PTA(AVX10_512BIT). > (AVX10_1): Add DEF_PTA(AVX10_1). > * config/i386/i386-options.cc (isa2_opts): Add -mavx10.1. > (ix86_valid_target_attribute_inner_p): Handle avx10-512bit, avx10.1 > and avx10.1-512. > (ix86_option_override_internal): Enable AVX512{F,VL,BW,DQ,CD,BF16, > FP16,VBMI,VBMI2,VNNI,IFMA,BITALG,VPOPCNTDQ} features for avx10.1-512. > (ix86_valid_target_attribute_inner_p): Handle AVX10_1. > * config/i386/i386.cc (ix86_get_ssemov): Add AVX10_1. > (ix86_conditional_register_usage): Ditto. > (ix86_hard_regno_mode_ok): Ditto. > (ix86_rtx_costs): Ditto. > * config/i386/i386.h (VALID_MASK_AVX10_MODE): New macro. > * config/i386/i386.opt: Add option -mavx10.1, -mavx10.1-256 and > -mavx10.1-512. > * doc/extend.texi: Document avx10.1, avx10.1-256 and avx10.1-512. > * doc/invoke.texi: Document -mavx10.1, -mavx10.1-256 and > -mavx10.1-512. > * doc/sourcebuild.texi: Document target avx10.1, avx10.1-256 > and avx10.1-512. > > gcc/testsuite/ChangeLog: > > * g++.target/i386/mv33.C: New test. > * gcc.target/i386/avx10_1-1.c: Ditto. > * gcc.target/i386/avx10_1-2.c: Ditto. > * gcc.target/i386/avx10_1-3.c: Ditto. > * gcc.target/i386/avx10_1-4.c: Ditto. > * gcc.target/i386/avx10_1-5.c: Ditto. > * gcc.target/i386/avx10_1-6.c: Ditto. > * gcc.target/i386/avx10_1-7.c: Ditto. > * gcc.target/i386/avx10_1-8.c: Ditto. > * gcc.target/i386/avx10_1-9.c: Ditto. > * gcc.target/i386/avx10_1-10.c: Ditto. Ok(please wait for extra 24 hours to commit, if there's no objection) > --- > gcc/common/config/i386/cpuinfo.h | 36 +++++++++++++++ > gcc/common/config/i386/i386-common.cc | 53 +++++++++++++++++++++- > gcc/common/config/i386/i386-cpuinfo.h | 3 ++ > gcc/common/config/i386/i386-isas.h | 5 ++ > gcc/config/i386/constraints.md | 6 +-- > gcc/config/i386/cpuid.h | 6 +++ > gcc/config/i386/i386-c.cc | 4 ++ > gcc/config/i386/i386-isa.def | 2 + > gcc/config/i386/i386-options.cc | 26 ++++++++++- > gcc/config/i386/i386.cc | 18 ++++++-- > gcc/config/i386/i386.h | 3 ++ > gcc/config/i386/i386.opt | 19 ++++++++ > gcc/doc/extend.texi | 13 ++++++ > gcc/doc/invoke.texi | 16 +++++-- > gcc/doc/sourcebuild.texi | 9 ++++ > gcc/testsuite/g++.target/i386/mv33.C | 30 ++++++++++++ > gcc/testsuite/gcc.target/i386/avx10_1-1.c | 22 +++++++++ > gcc/testsuite/gcc.target/i386/avx10_1-10.c | 13 ++++++ > gcc/testsuite/gcc.target/i386/avx10_1-2.c | 13 ++++++ > gcc/testsuite/gcc.target/i386/avx10_1-3.c | 13 ++++++ > gcc/testsuite/gcc.target/i386/avx10_1-4.c | 13 ++++++ > gcc/testsuite/gcc.target/i386/avx10_1-5.c | 13 ++++++ > gcc/testsuite/gcc.target/i386/avx10_1-6.c | 13 ++++++ > gcc/testsuite/gcc.target/i386/avx10_1-7.c | 13 ++++++ > gcc/testsuite/gcc.target/i386/avx10_1-8.c | 4 ++ > gcc/testsuite/gcc.target/i386/avx10_1-9.c | 13 ++++++ > 26 files changed, 366 insertions(+), 13 deletions(-) > create mode 100644 gcc/testsuite/g++.target/i386/mv33.C > create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-10.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-3.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-4.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-5.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-6.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-7.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-8.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx10_1-9.c > > diff --git a/gcc/common/config/i386/cpuinfo.h > b/gcc/common/config/i386/cpuinfo.h > index 30ef0d334ca..5abff83b4ca 100644 > --- a/gcc/common/config/i386/cpuinfo.h > +++ b/gcc/common/config/i386/cpuinfo.h > @@ -688,6 +688,9 @@ get_available_features (struct __processor_model > *cpu_model, > int amx_usable = 0; > /* Check if KL is usable. */ > int has_kl = 0; > + /* Record AVX10 version. */ > + int avx10_set = 0; > + int version = 0; > if ((ecx & bit_OSXSAVE)) > { > /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and > @@ -906,6 +909,9 @@ get_available_features (struct __processor_model > *cpu_model, > { > if (eax & bit_AVX512BF16) > set_feature (FEATURE_AVX512BF16); > + /* AVX10 has the same XSTATE with AVX512. */ > + if (edx & bit_AVX10) > + avx10_set = 1; > } > if (amx_usable) > { > @@ -951,6 +957,24 @@ get_available_features (struct __processor_model > *cpu_model, > } > } > > + /* Get Advanced Features at level 0x24 (eax = 0x24). */ > + if (avx10_set && max_cpuid_level >= 0x24) > + { > + __cpuid (0x18, eax, ebx, ecx, edx); > + version = ebx & 0xff; > + if (ebx & bit_AVX10_256) > + switch (version) > + { > + case 1: > + set_feature (FEATURE_AVX10_1); > + break; > + default: > + gcc_unreachable (); > + } > + if (ebx & bit_AVX10_512) > + set_feature (FEATURE_AVX10_512BIT); > + } > + > /* Check cpuid level of extended features. */ > __cpuid (0x80000000, ext_level, ebx, ecx, edx); > > @@ -1155,6 +1179,18 @@ cpu_indicator_init (struct __processor_model > *cpu_model, > } > } > > +#define SET_AVX10_512(A,B) \ > + if (has_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX10_##A)) \ > + { \ > + CHECK___builtin_cpu_supports (B); \ > + set_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX10_##A##_512); \ > + } > + > + if (has_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX10_512BIT)) > + SET_AVX10_512 (1, "avx10.1-512"); > + > +#undef SET_AVX10_512 > + > gcc_assert (cpu_model->__cpu_vendor < VENDOR_MAX); > gcc_assert (cpu_model->__cpu_type < CPU_TYPE_MAX); > gcc_assert (cpu_model->__cpu_subtype < CPU_SUBTYPE_MAX); > diff --git a/gcc/common/config/i386/i386-common.cc > b/gcc/common/config/i386/i386-common.cc > index 26005914079..6c3bebb1846 100644 > --- a/gcc/common/config/i386/i386-common.cc > +++ b/gcc/common/config/i386/i386-common.cc > @@ -123,6 +123,8 @@ along with GCC; see the file COPYING3. If not see > #define OPTION_MASK_ISA2_SM3_SET OPTION_MASK_ISA2_SM3 > #define OPTION_MASK_ISA2_SHA512_SET OPTION_MASK_ISA2_SHA512 > #define OPTION_MASK_ISA2_SM4_SET OPTION_MASK_ISA2_SM4 > +#define OPTION_MASK_ISA2_AVX10_512BIT_SET OPTION_MASK_ISA2_AVX10_512BIT > +#define OPTION_MASK_ISA2_AVX10_1_SET OPTION_MASK_ISA2_AVX10_1 > > /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same > as -msse4.2. */ > @@ -232,7 +234,8 @@ along with GCC; see the file COPYING3. If not see > #define OPTION_MASK_ISA2_AVX2_UNSET \ > (OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \ > | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | > OPTION_MASK_ISA2_AVXNECONVERT_UNSET \ > - | OPTION_MASK_ISA2_AVXVNNIINT16_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET) > + | OPTION_MASK_ISA2_AVXVNNIINT16_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET \ > + | OPTION_MASK_ISA2_AVX10_1_UNSET) > #define OPTION_MASK_ISA_AVX512F_UNSET \ > (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \ > | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \ > @@ -309,6 +312,8 @@ along with GCC; see the file COPYING3. If not see > #define OPTION_MASK_ISA2_SM3_UNSET OPTION_MASK_ISA2_SM3 > #define OPTION_MASK_ISA2_SHA512_UNSET OPTION_MASK_ISA2_SHA512 > #define OPTION_MASK_ISA2_SM4_UNSET OPTION_MASK_ISA2_SM4 > +#define OPTION_MASK_ISA2_AVX10_512BIT_UNSET OPTION_MASK_ISA2_AVX10_512BIT > +#define OPTION_MASK_ISA2_AVX10_1_UNSET OPTION_MASK_ISA2_AVX10_1 > > /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same > as -mno-sse4.1. */ > @@ -1341,6 +1346,52 @@ ix86_handle_option (struct gcc_options *opts, > } > return true; > > + case OPT_mavx10_max_512bit: > + if (value) > + { > + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_512BIT_SET; > + opts->x_ix86_isa_flags2_explicit |= > OPTION_MASK_ISA2_AVX10_512BIT_SET; > + } > + else > + { > + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_512BIT_UNSET; > + opts->x_ix86_isa_flags2_explicit |= > OPTION_MASK_ISA2_AVX10_512BIT_UNSET; > + } > + return true; > + > + case OPT_mavx10_1: > + if (value) > + { > + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_1_SET; > + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_SET; > + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET; > + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET; > + } > + else > + { > + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_1_UNSET; > + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_UNSET; > + } > + return true; > + > + case OPT_mavx10_1_256: > + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_1_SET; > + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_SET; > + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_512BIT_SET; > + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_512BIT_SET; > + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET; > + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET; > + return true; > + > + case OPT_mavx10_1_512: > + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_1_SET; > + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_SET; > + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX10_512BIT_SET; > + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_512BIT_SET; > + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET; > + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET; > + return true; > + > case OPT_mfma: > if (value) > { > diff --git a/gcc/common/config/i386/i386-cpuinfo.h > b/gcc/common/config/i386/i386-cpuinfo.h > index 9153b4d0a54..8fbfb38baed 100644 > --- a/gcc/common/config/i386/i386-cpuinfo.h > +++ b/gcc/common/config/i386/i386-cpuinfo.h > @@ -261,6 +261,9 @@ enum processor_features > FEATURE_SM3, > FEATURE_SHA512, > FEATURE_SM4, > + FEATURE_AVX10_512BIT, > + FEATURE_AVX10_1, > + FEATURE_AVX10_1_512, > CPU_FEATURE_MAX > }; > > diff --git a/gcc/common/config/i386/i386-isas.h > b/gcc/common/config/i386/i386-isas.h > index 2297903a45e..35be0cc3f2a 100644 > --- a/gcc/common/config/i386/i386-isas.h > +++ b/gcc/common/config/i386/i386-isas.h > @@ -191,4 +191,9 @@ ISA_NAMES_TABLE_START > ISA_NAMES_TABLE_ENTRY("sm3", FEATURE_SM3, P_NONE, "-msm3") > ISA_NAMES_TABLE_ENTRY("sha512", FEATURE_SHA512, P_NONE, "-msha512") > ISA_NAMES_TABLE_ENTRY("sm4", FEATURE_SM4, P_NONE, "-msm4") > + ISA_NAMES_TABLE_ENTRY("avx10-max-512bit", FEATURE_AVX10_512BIT, > + P_NONE, "-mavx10-max-512bit") > + ISA_NAMES_TABLE_ENTRY("avx10.1", FEATURE_AVX10_1, P_NONE, "-mavx10.1") > + ISA_NAMES_TABLE_ENTRY("avx10.1-256", FEATURE_AVX10_1, P_NONE, NULL) > + ISA_NAMES_TABLE_ENTRY("avx10.1-512", FEATURE_AVX10_1_512, P_NONE, NULL) > ISA_NAMES_TABLE_END > diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md > index fd490f39110..4be6bc4816a 100644 > --- a/gcc/config/i386/constraints.md > +++ b/gcc/config/i386/constraints.md > @@ -78,10 +78,10 @@ > "TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387 ? FP_SECOND_REG : NO_REGS" > "Second from top of 80387 floating-point stack (@code{%st(1)}).") > > -(define_register_constraint "Yk" "TARGET_AVX512F ? MASK_REGS : NO_REGS" > +(define_register_constraint "Yk" "(TARGET_AVX512F || TARGET_AVX10_1) ? > MASK_REGS : NO_REGS" > "@internal Any mask register that can be used as predicate, i.e. k1-k7.") > > -(define_register_constraint "k" "TARGET_AVX512F ? ALL_MASK_REGS : NO_REGS" > +(define_register_constraint "k" "(TARGET_AVX512F || TARGET_AVX10_1) ? > ALL_MASK_REGS : NO_REGS" > "@internal Any mask register.") > > ;; Vector registers (also used for plain floating point nowadays). > @@ -146,7 +146,7 @@ > "@internal Lower SSE register when avoiding REX prefix and all SSE > registers otherwise.") > > (define_register_constraint "Yv" > - "TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS" > + "(TARGET_AVX512VL || TARGET_AVX10_1) ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS > : NO_REGS" > "@internal For AVX512VL, any EVEX encodable SSE register > (@code{%xmm0-%xmm31}), otherwise any SSE register.") > > (define_register_constraint "Yw" > diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h > index 73c15480350..ca5551cefca 100644 > --- a/gcc/config/i386/cpuid.h > +++ b/gcc/config/i386/cpuid.h > @@ -149,6 +149,7 @@ > #define bit_AVXNECONVERT (1 << 5) > #define bit_AVXVNNIINT16 (1 << 10) > #define bit_PREFETCHI (1 << 14) > +#define bit_AVX10 (1 << 19) > > /* Extended State Enumeration Sub-leaf (%eax == 0xd, %ecx == 1) */ > #define bit_XSAVEOPT (1 << 0) > @@ -159,6 +160,11 @@ > /* %ebx */ > #define bit_PTWRITE (1 << 4) > > +/* AVX10 sub leaf (%eax == 0x18) */ > +/* %ebx */ > +#define bit_AVX10_256 (1 << 17) > +#define bit_AVX10_512 (1 << 18) > + > /* Keylocker leaf (%eax == 0x19) */ > /* %ebx */ > #define bit_AESKLE ( 1<<0 ) > diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc > index 257950582c2..caef5531593 100644 > --- a/gcc/config/i386/i386-c.cc > +++ b/gcc/config/i386/i386-c.cc > @@ -692,6 +692,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, > def_or_undef (parse_in, "__SHA512__"); > if (isa_flag2 & OPTION_MASK_ISA2_SM4) > def_or_undef (parse_in, "__SM4__"); > + if (isa_flag2 & OPTION_MASK_ISA2_AVX10_512BIT) > + def_or_undef (parse_in, "__AVX10_512BIT__"); > + if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1) > + def_or_undef (parse_in, "__AVX10_1__"); > if (TARGET_IAMCU) > { > def_or_undef (parse_in, "__iamcu"); > diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def > index aeafcf870ac..f7d741746c3 100644 > --- a/gcc/config/i386/i386-isa.def > +++ b/gcc/config/i386/i386-isa.def > @@ -121,3 +121,5 @@ DEF_PTA(AVXVNNIINT16) > DEF_PTA(SM3) > DEF_PTA(SHA512) > DEF_PTA(SM4) > +DEF_PTA(AVX10_512BIT) > +DEF_PTA(AVX10_1) > diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc > index 127ee24203c..b2281fbd4b5 100644 > --- a/gcc/config/i386/i386-options.cc > +++ b/gcc/config/i386/i386-options.cc > @@ -243,7 +243,9 @@ static struct ix86_target_opts isa2_opts[] = > { "-mavxvnniint16", OPTION_MASK_ISA2_AVXVNNIINT16 }, > { "-msm3", OPTION_MASK_ISA2_SM3 }, > { "-msha512", OPTION_MASK_ISA2_SHA512 }, > - { "-msm4", OPTION_MASK_ISA2_SM4 } > + { "-msm4", OPTION_MASK_ISA2_SM4 }, > + { "-mavx10-max-512bit", OPTION_MASK_ISA2_AVX10_512BIT }, > + { "-mavx10.1", OPTION_MASK_ISA2_AVX10_1 } > }; > static struct ix86_target_opts isa_opts[] = > { > @@ -983,7 +985,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree > args, char *p_strings[], > ix86_opt_ix86_no, > ix86_opt_str, > ix86_opt_enum, > - ix86_opt_isa > + ix86_opt_isa, > }; > > static const struct > @@ -1100,6 +1102,10 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree > args, char *p_strings[], > IX86_ATTR_ISA ("sm3", OPT_msm3), > IX86_ATTR_ISA ("sha512", OPT_msha512), > IX86_ATTR_ISA ("sm4", OPT_msm4), > + IX86_ATTR_ISA ("avx10-max-512bit", OPT_mavx10_max_512bit), > + IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1), > + IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256), > + IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1_512), > > /* enum options */ > IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), > @@ -2524,6 +2530,22 @@ ix86_option_override_internal (bool main_args_p, > &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM) > & ~opts->x_ix86_isa_flags_explicit); > > + /* Enable AVX512{F,VL,BW,DQ,CD,BF16,FP16,VBMI,VBMI2,VNNI,IFMA,BITALG, > + VPOPCNTDQ} features for AVX10.1/512. */ > + if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2) > + && TARGET_AVX10_512BIT_P (opts->x_ix86_isa_flags2)) > + { > + opts->x_ix86_isa_flags > + |= OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD > + | OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512BW > + | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512IFMA > + | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI2 > + | OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VPOPCNTDQ > + | OPTION_MASK_ISA_AVX512BITALG; > + opts->x_ix86_isa_flags2 > + |= OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_AVX512BF16; > + } > + > /* Validate -mpreferred-stack-boundary= value or default it to > PREFERRED_STACK_BOUNDARY_DEFAULT. */ > ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index 5d57726e22c..e75614b993d 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -513,8 +513,8 @@ ix86_conditional_register_usage (void) > if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) > accessible_reg_set &= ~reg_class_contents[FLOAT_REGS]; > > - /* If AVX512F is disabled, disable the registers. */ > - if (! TARGET_AVX512F) > + /* If AVX512F and AVX10 is disabled, disable the registers. */ > + if (!TARGET_AVX512F && !TARGET_AVX10_1) > { > for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) > CLEAR_HARD_REG_BIT (accessible_reg_set, i); > @@ -5490,6 +5490,7 @@ ix86_get_ssemov (rtx *operands, unsigned size, > we can only use zmm register move without memory operand. */ > if (evex_reg_p > && !TARGET_AVX512VL > + && !TARGET_AVX10_1 > && GET_MODE_SIZE (mode) < 64) > { > /* NB: Even though ix86_hard_regno_mode_ok doesn't allow > @@ -20259,7 +20260,8 @@ ix86_hard_regno_mode_ok (unsigned int regno, > machine_mode mode) > > return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode)) > || (TARGET_AVX512BW > - && VALID_MASK_AVX512BW_MODE (mode))); > + && VALID_MASK_AVX512BW_MODE (mode)) > + || (TARGET_AVX10_1 && VALID_MASK_AVX10_MODE (mode))); > } > > if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) > @@ -20294,6 +20296,13 @@ ix86_hard_regno_mode_ok (unsigned int regno, > machine_mode mode) > || VALID_AVX512VL_128_REG_MODE (mode))) > return true; > > + /* AVX10_1 allows sse regs16+ for 256 bit modes. */ > + if (TARGET_AVX10_1 > + && (VALID_AVX256_REG_OR_OI_MODE (mode) > + || VALID_AVX512VL_128_REG_MODE (mode) > + || VALID_AVX512F_SCALAR_MODE (mode))) > + return true; > + > /* xmm16-xmm31 are only available for AVX-512. */ > if (EXT_REX_SSE_REGNO_P (regno)) > return false; > @@ -21584,7 +21593,8 @@ ix86_rtx_costs (rtx x, machine_mode mode, int > outer_code_i, int opno, > mask = XEXP (x, 2); > /* This is masked instruction, assume the same cost, > as nonmasked variant. */ > - if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) > + if ((TARGET_AVX512F || TARGET_AVX10_1) > + && register_operand (mask, GET_MODE (mask))) > *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); > else > *total = cost->sse_op; > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index ef342fcee9b..77b50913458 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -1080,6 +1080,9 @@ extern const char *host_detect_local_cpu (int argc, > const char **argv); > > #define VALID_MASK_AVX512BW_MODE(MODE) ((MODE) == SImode || (MODE) == DImode) > > +#define VALID_MASK_AVX10_MODE(MODE) ((MODE) == SImode || (MODE) == HImode \ > + || (MODE) == QImode) > + > #define VALID_FP_MODE_P(MODE) \ > ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \ > || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode) > diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt > index 1cc8563477a..0ce8e6204ff 100644 > --- a/gcc/config/i386/i386.opt > +++ b/gcc/config/i386/i386.opt > @@ -1298,3 +1298,22 @@ msm4 > Target Mask(ISA2_SM4) Var(ix86_isa_flags2) Save > Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and > SM4 built-in functions and code generation. > + > +mavx10-max-512bit > +Target Mask(ISA2_AVX10_512BIT) Var(ix86_isa_flags2) Save > +Indicates 512 bit vector width support for AVX10. > + > +mavx10.1 > +Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save > +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, > +and AVX10.1 built-in functions and code generation. > + > +mavx10.1-256 > +Target RejectNegative > +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, > +and AVX10.1 built-in functions and code generation. > + > +mavx10.1-512 > +Target RejectNegative > +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, > +and AVX10.1-512 built-in functions and code generation. > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index 89c5b4ea2b2..08e8b3b761c 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -7184,6 +7184,19 @@ Enable/disable the generation of the SHA512 > instructions. > @itemx no-sm4 > Enable/disable the generation of the SM4 instructions. > > +@cindex @code{target("avx10.1")} function attribute, x86 > +@item avx10.1 > +@itemx no-avx10.1 > +Enable/disable the generation of the AVX10.1 instructions. > + > +@cindex @code{target("avx10.1-256")} function attribute, x86 > +@item avx10.1-256 > +Enable the generation of the AVX10.1 instructions. > + > +@cindex @code{target("avx10.1-512")} function attribute, x86 > +@item avx10.1-512 > +Enable the generation of the AVX10.1 512 bit instructions. > + > @cindex @code{target("cld")} function attribute, x86 > @item cld > @itemx no-cld > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index 674f956f4b8..43b6210c3c8 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -1436,6 +1436,7 @@ See RS/6000 and PowerPC Options. > -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni > -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 > -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 > +-mavx10.1 -mavx10.1-256 -mavx10.1-512 > -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops > -minline-stringops-dynamically -mstringop-strategy=@var{alg} > -mkl -mwidekl > @@ -33670,6 +33671,15 @@ preferred alignment to > @option{-mpreferred-stack-boundary=2}. > @need 200 > @opindex msm4 > @itemx -msm4 > +@need 200 > +@opindex mavx10.1 > +@itemx -mavx10.1 > +@need 200 > +@opindex mavx10.1-256 > +@itemx -mavx10.1-256 > +@need 200 > +@opindex mavx10.1-512 > +@itemx -mavx10.1-512 > These switches enable the use of instructions in the MMX, SSE, > AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, > SHA, > AES, PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG, > @@ -33680,9 +33690,9 @@ GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, > MOVDIRI, MOVDIR64B, AVX512BF16, > ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE, > UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16, > AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT, > -AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512, SM4 or CLDEMOTE extended instruction > -sets. Each has a corresponding @option{-mno-} option to disable use of these > -instructions. > +AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512, SM4, AVX10.1 or CLDEMOTE extended > +instruction sets. Each has a corresponding @option{-mno-} option to disable > +use of these instructions. > > These extensions are also available as built-in functions: see > @ref{x86 Built-in Functions}, for details of the functions enabled and > diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi > index 1a78b3c1abb..cab8065cd8e 100644 > --- a/gcc/doc/sourcebuild.texi > +++ b/gcc/doc/sourcebuild.texi > @@ -2484,6 +2484,15 @@ Target supports compiling @code{avx} instructions. > @item avx_runtime > Target supports the execution of @code{avx} instructions. > > +@item avx10.1 > +Target supports the execution of @code{avx10.1} instructions. > + > +@item avx10.1-256 > +Target supports the execution of @code{avx10.1} instructions. > + > +@item avx10.1-512 > +Target supports the execution of @code{avx10.1-512} instructions. > + > @item avx2 > Target supports compiling @code{avx2} instructions. > > diff --git a/gcc/testsuite/g++.target/i386/mv33.C > b/gcc/testsuite/g++.target/i386/mv33.C > new file mode 100644 > index 00000000000..b50f13c5aa8 > --- /dev/null > +++ b/gcc/testsuite/g++.target/i386/mv33.C > @@ -0,0 +1,30 @@ > +// Test that dispatching can choose the right multiversion > +// for avx10.x-512 microarchitecture levels. > + > +// { dg-do run } > +// { dg-require-ifunc "" } > +// { dg-options "-O2" } > + > +#include <assert.h> > + > +int __attribute__ ((target("default"))) > +foo () > +{ > + return 0; > +} > + > +int __attribute__ ((target("avx10.1-512"))) foo () { > + return 1; > +} > + > +int main () > +{ > + int val = foo (); > + > + if (__builtin_cpu_supports ("avx10.1-512")) > + assert (val == 1); > + else > + assert (val == 0); > + > + return 0; > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-1.c > b/gcc/testsuite/gcc.target/i386/avx10_1-1.c > new file mode 100644 > index 00000000000..cfd9662bb13 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx10_1-1.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */ > + > +#include <immintrin.h> > + > +void > +f1 () > +{ > + register __m256d a __asm ("ymm17"); > + register __m256d b __asm ("ymm16"); > + a = _mm256_add_pd (a, b); > + asm volatile ("" : "+v" (a)); > +} > + > +void > +f2 () > +{ > + register __m128d a __asm ("xmm17"); > + register __m128d b __asm ("xmm16"); > + a = _mm_add_pd (a, b); > + asm volatile ("" : "+v" (a)); > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-10.c > b/gcc/testsuite/gcc.target/i386/avx10_1-10.c > new file mode 100644 > index 00000000000..9a5892d8df9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx10_1-10.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=x86-64" } */ > +/* { dg-final { scan-assembler "%zmm" } } */ > + > +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); > + > +__attribute__ ((target ("avx10.1-512"))) __m512d > +foo () > +{ > + __m512d a, b; > + a = a + b; > + return a; > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-2.c > b/gcc/testsuite/gcc.target/i386/avx10_1-2.c > new file mode 100644 > index 00000000000..0b3991dcf74 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx10_1-2.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=x86-64 -mavx10.1-512" } */ > +/* { dg-final { scan-assembler "%zmm" } } */ > + > +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); > + > +__m512d > +foo () > +{ > + __m512d a, b; > + a = a + b; > + return a; > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-3.c > b/gcc/testsuite/gcc.target/i386/avx10_1-3.c > new file mode 100644 > index 00000000000..3be988a1a62 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx10_1-3.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */ > + > +#include <immintrin.h> > + > +int > +foo (int c) > +{ > + register int a __asm ("k7") = c; > + int b = foo (a); > + asm volatile ("" : "+k" (b)); > + return b; > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-4.c > b/gcc/testsuite/gcc.target/i386/avx10_1-4.c > new file mode 100644 > index 00000000000..68cbf197d61 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx10_1-4.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=x86-64 -mavx10.1-512" } */ > + > +#include <immintrin.h> > + > +long long > +foo (long long c) > +{ > + register long long a __asm ("k7") = c; > + long long b = foo (a); > + asm volatile ("" : "+k" (b)); > + return b; > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-5.c > b/gcc/testsuite/gcc.target/i386/avx10_1-5.c > new file mode 100644 > index 00000000000..5481ab2f386 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx10_1-5.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O0 -march=x86-64 -mavx10.1 -Wno-psabi" } */ > +/* { dg-final { scan-assembler-not ".%zmm" } } */ > + > +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); > + > +__m512d > +foo () > +{ > + __m512d a, b; > + a = a + b; > + return a; > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-6.c > b/gcc/testsuite/gcc.target/i386/avx10_1-6.c > new file mode 100644 > index 00000000000..827c80ce51e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx10_1-6.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */ > + > +#include <immintrin.h> > + > +long long > +foo (long long c) > +{ > + register long long a __asm ("k7") = c; > + long long b = foo (a); > + asm volatile ("" : "+k" (b)); /* { dg-error "inconsistent operand > constraints in an 'asm'" } */ > + return b; > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-7.c > b/gcc/testsuite/gcc.target/i386/avx10_1-7.c > new file mode 100644 > index 00000000000..d8b8d97590b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx10_1-7.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=x86-64 -Wno-psabi" } */ > +/* { dg-final { scan-assembler-not ".%zmm" } } */ > + > +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); > + > +__attribute__ ((target ("avx10.1"))) __m512d > +foo () > +{ > + __m512d a, b; > + a = a + b; > + return a; > +} > diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-8.c > b/gcc/testsuite/gcc.target/i386/avx10_1-8.c > new file mode 100644 > index 00000000000..8dbd201b336 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx10_1-8.c > @@ -0,0 +1,4 @@ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-O2 -march=x86-64 -mavx10.1-256" } */ > + > +#include "avx10_1-1.c" > diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-9.c > b/gcc/testsuite/gcc.target/i386/avx10_1-9.c > new file mode 100644 > index 00000000000..00493098be7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx10_1-9.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=x86-64 -Wno-psabi" } */ > +/* { dg-final { scan-assembler-not ".%zmm" } } */ > + > +typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); > + > +__attribute__ ((target ("avx10.1-256"))) __m512d > +foo () > +{ > + __m512d a, b; > + a = a + b; > + return a; > +} > -- > 2.31.1 >
-- BR, Hongtao