From: "Hu, Lin1" <lin1...@intel.com> gcc/ChangeLog:
* common/config/i386/cpuinfo.h (get_available_features): Detect AMX-MOVRS. * common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AMX_MOVRS_SET): New. (OPTION_MASK_ISA2_AMX_MOVRS_UNSET): Ditto. (ix86_handle_option): Handle -mamx-movrs. * common/config/i386/i386-cpuinfo.h (enum processor_features): Add FEATURE_AMX_MOVRS. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for amx-movrs. * config.gcc: Add amxmovrsintrin.h. * config/i386/cpuid.h (bit_AMX_MOVRS): New. * config/i386/i386-c.cc (ix86_target_macros_internal): Define __AMX_MOVRS__. * config/i386/i386-isa.def (AMX_MOVRS): Add DEF_PTA(AMX_MOVRS). * config/i386/i386-options.cc (ix86_valid_target_attribute_inner_p): Handle amx-movrs. * config/i386/i386.opt: Add option -mamx-movrs. * config/i386/i386.opt.urls: Regenerated. * config/i386/immintrin.h: Include amxmovrsintrin.h * doc/extend.texi: Document amx-movrs. * doc/invoke.texi: Document -mamx-movrs. * doc/sourcebuild.texi: Document target amx-movrs. * config/i386/amxmovrsintrin.h: New file. gcc/testsuite/ChangeLog: * g++.dg/other/i386-2.C: Add -mamx-movrs. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/amx-check.h: Add new check for amx-movrs. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * gcc.target/i386/sse-12.c: Add -mamx-movrs. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Add amx-movrs. * gcc.target/i386/sse-23.c: Ditto. * lib/target-supports.exp (check_effective_target_amx_movrs): New. * gcc.target/i386/amxmovrs-asmatt-1.c: New test. * gcc.target/i386/amxmovrs-asmintel-1.c: Ditto. * gcc.target/i386/amxmovrs-t2rpntlvw-2.c: Ditto. * gcc.target/i386/amxmovrs-tileloaddrs-2.c: Ditto. --- gcc/common/config/i386/cpuinfo.h | 2 + gcc/common/config/i386/i386-common.cc | 19 ++- gcc/common/config/i386/i386-cpuinfo.h | 1 + gcc/common/config/i386/i386-isas.h | 1 + gcc/config.gcc | 3 +- gcc/config/i386/amxmovrsintrin.h | 111 ++++++++++++++++++ gcc/config/i386/cpuid.h | 1 + gcc/config/i386/i386-c.cc | 2 + gcc/config/i386/i386-isa.def | 1 + gcc/config/i386/i386-options.cc | 4 +- gcc/config/i386/i386.opt | 4 + gcc/config/i386/i386.opt.urls | 3 + gcc/config/i386/immintrin.h | 1 + gcc/doc/extend.texi | 5 + gcc/doc/invoke.texi | 9 +- gcc/doc/sourcebuild.texi | 3 + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- gcc/testsuite/g++.dg/other/i386-3.C | 2 +- gcc/testsuite/gcc.target/i386/amx-check.h | 3 + .../gcc.target/i386/amxmovrs-asmatt-1.c | 27 +++++ .../gcc.target/i386/amxmovrs-asmintel-1.c | 28 +++++ .../gcc.target/i386/amxmovrs-t2rpntlvw-2.c | 58 +++++++++ .../gcc.target/i386/amxmovrs-tileloaddrs-2.c | 33 ++++++ gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- gcc/testsuite/lib/target-supports.exp | 11 ++ 30 files changed, 334 insertions(+), 14 deletions(-) create mode 100644 gcc/config/i386/amxmovrsintrin.h create mode 100644 gcc/testsuite/gcc.target/i386/amxmovrs-asmatt-1.c create mode 100644 gcc/testsuite/gcc.target/i386/amxmovrs-asmintel-1.c create mode 100644 gcc/testsuite/gcc.target/i386/amxmovrs-t2rpntlvw-2.c create mode 100644 gcc/testsuite/gcc.target/i386/amxmovrs-tileloaddrs-2.c diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index 14af8bf2294..0dcdaafeca5 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -1011,6 +1011,8 @@ get_available_features (struct __processor_model *cpu_model, set_feature (FEATURE_AMX_TRANSPOSE); if (eax & bit_AMX_FP8) set_feature (FEATURE_AMX_FP8); + if (eax & bit_AMX_MOVRS) + set_feature (FEATURE_AMX_MOVRS); } } diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index f7e99e3c070..7ad9f571527 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -137,6 +137,8 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AMX_FP8_SET \ (OPTION_MASK_ISA2_AMX_TILE_SET | OPTION_MASK_ISA2_AMX_FP8) #define OPTION_MASK_ISA2_MOVRS_SET OPTION_MASK_ISA2_MOVRS +#define OPTION_MASK_ISA2_AMX_MOVRS_SET \ + (OPTION_MASK_ISA2_AMX_TILE_SET | OPTION_MASK_ISA2_AMX_MOVRS) /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same as -msse4.2. */ @@ -301,7 +303,7 @@ along with GCC; see the file COPYING3. If not see | OPTION_MASK_ISA2_AMX_BF16_UNSET | OPTION_MASK_ISA2_AMX_FP16_UNSET \ | OPTION_MASK_ISA2_AMX_COMPLEX_UNSET | OPTION_MASK_ISA2_AMX_AVX512_UNSET \ | OPTION_MASK_ISA2_AMX_TF32_UNSET | OPTION_MASK_ISA2_AMX_TRANSPOSE_UNSET \ - | OPTION_MASK_ISA2_AMX_FP8_UNSET) + | OPTION_MASK_ISA2_AMX_FP8_UNSET | OPTION_MASK_ISA2_AMX_MOVRS_UNSET) #define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8 #define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16 #define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR @@ -336,6 +338,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AMX_TRANSPOSE_UNSET OPTION_MASK_ISA2_AMX_TRANSPOSE #define OPTION_MASK_ISA2_AMX_FP8_UNSET OPTION_MASK_ISA2_AMX_FP8 #define OPTION_MASK_ISA2_MOVRS_UNSET OPTION_MASK_ISA2_MOVRS +#define OPTION_MASK_ISA2_AMX_MOVRS_UNSET OPTION_MASK_ISA2_AMX_MOVRS /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same as -mno-sse4.1. */ @@ -1496,6 +1499,20 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mamx_movrs: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_MOVRS_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_MOVRS_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_MOVRS_UNSET; + opts->x_ix86_isa_flags2_explicit |= + OPTION_MASK_ISA2_AMX_MOVRS_UNSET; + } + return true; + case OPT_mfma: if (value) { diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index f170aafb63c..23c995037ed 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -274,6 +274,7 @@ enum processor_features FEATURE_AMX_TRANSPOSE, FEATURE_AMX_FP8, FEATURE_MOVRS, + FEATURE_AMX_MOVRS, CPU_FEATURE_MAX }; diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h index d454173fa0b..bf75e41da39 100644 --- a/gcc/common/config/i386/i386-isas.h +++ b/gcc/common/config/i386/i386-isas.h @@ -196,4 +196,5 @@ ISA_NAMES_TABLE_START P_NONE, "-mamx-transpose") ISA_NAMES_TABLE_ENTRY("amx-fp8", FEATURE_AMX_FP8, P_NONE, "-mamx-fp8") ISA_NAMES_TABLE_ENTRY("movrs", FEATURE_MOVRS, P_NONE, "-mmovrs") + ISA_NAMES_TABLE_ENTRY("amx-movrs", FEATURE_AMX_MOVRS, P_NONE, "-mamx-movrs") ISA_NAMES_TABLE_END diff --git a/gcc/config.gcc b/gcc/config.gcc index 1b2fb9d61a0..732ec0eb27e 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -459,7 +459,8 @@ i[34567]86-*-* | x86_64-*-*) avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h avx10_2copyintrin.h amxavx512intrin.h amxtf32intrin.h - amxtransposeintrin.h amxfp8intrin.h movrsintrin.h" + amxtransposeintrin.h amxfp8intrin.h movrsintrin.h + amxmovrsintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/amxmovrsintrin.h b/gcc/config/i386/amxmovrsintrin.h new file mode 100644 index 00000000000..47c086856c4 --- /dev/null +++ b/gcc/config/i386/amxmovrsintrin.h @@ -0,0 +1,111 @@ +/* Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use <amxmovrsintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef _AMX_MOVRSINTRIN_H_INCLUDED +#define _AMX_MOVRSINTRIN_H_INCLUDED + +#if defined(__x86_64__) + +#if !defined(__AMX_MOVRS__) +#pragma GCC push_options +#pragma GCC target("amx-movrs") +#define __DISABLE_AMX_MOVRS__ +#endif /* __AMX_MOVRS__ */ + +#define _tile_loaddrs_internal(tdst, base, stride) \ +__asm__ volatile \ + ("{tileloaddrs\t(%0,%1,1), %%tmm"#tdst \ + "|tileloaddrs\t%%tmm"#tdst", [%0+%1*1]}" \ + :: "r" ((const void*) (base)), "r" ((long) (stride))) + +#define _tile_loaddrst1_internal(tdst, base, stride) \ +__asm__ volatile \ + ("{tileloaddrst1\t(%0,%1,1), %%tmm"#tdst \ + "|tileloaddrst1\t%%tmm"#tdst", [%0+%1*1]}" \ + :: "r" ((const void*) (base)), "r" ((long) (stride))) + +#define _tile_loaddrs(tdst, base, stride) \ + _tile_loaddrs_internal(tdst, base, stride) + +#define _tile_loaddrst1(tdst, base, stride) \ + _tile_loaddrst1_internal(tdst, base, stride) + +#ifdef __DISABLE_AMX_MOVRS__ +#undef __DISABLE_AMX_MOVRS__ +#pragma GCC pop_options +#endif /* __DISABLE_AMX_MOVRS__ */ + +#if !defined(__AMX_MOVRS__) || !defined (__AMX_TRANSPOSE__) +#pragma GCC push_options +#pragma GCC target("amx-movrs,amx-transpose") +#define __DISABLE_AMX_MOVRS_TRANSPOSE__ +#endif /* __AMX_MOVRS_TRANSPOSE__ */ + +#define _tile_2rpntlvwz0rs_internal(tdst, base, stride) \ + __asm__ volatile \ + ("{t2rpntlvwz0rs\t(%0,%1,1), %%tmm"#tdst \ + "|t2rpntlvwz0rs\t%%tmm"#tdst", [%0+%1*1]}" \ + :: "r" ((const void*) (base)), "r" ((long) (stride))) + +#define _tile_2rpntlvwz0rst1_internal(tdst, base, stride) \ + __asm__ volatile \ + ("{t2rpntlvwz0rst1\t(%0,%1,1), %%tmm"#tdst \ + "|t2rpntlvwz0rst1\t%%tmm"#tdst", [%0+%1*1]}" \ + :: "r" ((const void*) (base)), "r" ((long) (stride))) + +#define _tile_2rpntlvwz1rs_internal(tdst, base, stride) \ + __asm__ volatile \ + ("{t2rpntlvwz1rs\t(%0,%1,1), %%tmm"#tdst \ + "|t2rpntlvwz1rs\t%%tmm"#tdst", [%0+%1*1]}" \ + :: "r" ((const void*) (base)), "r" ((long) (stride))) + +#define _tile_2rpntlvwz1rst1_internal(tdst, base, stride) \ + __asm__ volatile \ + ("{t2rpntlvwz1rst1\t(%0,%1,1), %%tmm"#tdst \ + "|t2rpntlvwz1rst1\t%%tmm"#tdst", [%0+%1*1]}" \ + :: "r" ((const void*) (base)), "r" ((long) (stride))) + +#define _tile_2rpntlvwz0rs(tdst, base, stride) \ + _tile_2rpntlvwz0rs_internal(tdst, base, stride) + +#define _tile_2rpntlvwz0rst1(tdst, base, stride) \ + _tile_2rpntlvwz0rst1_internal(tdst, base, stride) + +#define _tile_2rpntlvwz1rs(tdst, base, stride) \ + _tile_2rpntlvwz1rs_internal(tdst, base, stride) + +#define _tile_2rpntlvwz1rst1(tdst, base, stride) \ + _tile_2rpntlvwz1rst1_internal(tdst, base, stride) + +#ifdef __DISABLE_AMX_MOVRS_TRANSPOSE__ +#undef __DISABLE_AMX_MOVRS_TRANSPOSE__ +#pragma GCC pop_options +#endif /* __DISABLE_AMX_MOVRS_TRANSPOSE__ */ + +#endif /* __x86_64__ */ + +#endif /* _AMX_MOVRSINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 32ff8715263..3ee99da93cb 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -169,6 +169,7 @@ #define bit_AMX_TRANSPOSE (1 << 5) #define bit_AMX_TF32 (1 << 6) #define bit_AMX_AVX512 (1 << 7) +#define bit_AMX_MOVRS (1 << 8) /* AVX10 sub leaf (%eax == 0x24) */ /* %ebx */ diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index 31f7e6fffc3..da60da40205 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -751,6 +751,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__AMX_FP8__"); if (isa_flag2 & OPTION_MASK_ISA2_MOVRS) def_or_undef (parse_in, "__MOVRS__"); + if (isa_flag2 & OPTION_MASK_ISA2_AMX_MOVRS) + def_or_undef (parse_in, "__AMX_MOVRS__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index 643cc3e6c10..6cff3a21416 100644 --- a/gcc/config/i386/i386-isa.def +++ b/gcc/config/i386/i386-isa.def @@ -128,3 +128,4 @@ DEF_PTA(AMX_TF32) DEF_PTA(AMX_TRANSPOSE) DEF_PTA(AMX_FP8) DEF_PTA(MOVRS) +DEF_PTA(AMX_MOVRS) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 8ce9ea0cd8f..acd65efca4b 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -267,7 +267,8 @@ static struct ix86_target_opts isa2_opts[] = { "-mamx-tf32", OPTION_MASK_ISA2_AMX_TF32 }, { "-mamx-transpose", OPTION_MASK_ISA2_AMX_TRANSPOSE }, { "-mamx-fp8", OPTION_MASK_ISA2_AMX_FP8 }, - { "-mmovrs", OPTION_MASK_ISA2_MOVRS } + { "-mmovrs", OPTION_MASK_ISA2_MOVRS }, + { "-mamx-movrs", OPTION_MASK_ISA2_AMX_MOVRS } }; static struct ix86_target_opts isa_opts[] = { @@ -1141,6 +1142,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("amx-transpose", OPT_mamx_transpose), IX86_ATTR_ISA ("amx-fp8", OPT_mamx_fp8), IX86_ATTR_ISA ("movrs", OPT_mmovrs), + IX86_ATTR_ISA ("amx-movrs", OPT_mamx_movrs), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 18f69c40ffb..76dff198a12 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1413,3 +1413,7 @@ Support AMX-FP8 built-in functions and code generation. mmovrs Target Mask(ISA2_MOVRS) Var(ix86_isa_flags2) Save Support MOVRS built-in functions and code generation. + +mamx-movrs +Target Mask(ISA2_AMX_MOVRS) Var(ix86_isa_flags2) Save +Support AMX-MOVRS built-in functions and code generation. diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls index 4a30a87edc1..dbd59ecd738 100644 --- a/gcc/config/i386/i386.opt.urls +++ b/gcc/config/i386/i386.opt.urls @@ -628,3 +628,6 @@ UrlSuffix(gcc/x86-Options.html#index-mamx-fp8) mmovrs UrlSuffix(gcc/x86-Options.html#index-mmovrs) +mamx-movrs +UrlSuffix(gcc/x86-Options.html#index-mamx-movrs) + diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index ebe0443d451..7e957b88641 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -172,4 +172,5 @@ #include <movrsintrin.h> +#include <amxmovrsintrin.h> #endif /* _IMMINTRIN_H_INCLUDED */ diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 9c913a14e67..8abcd9ba4a4 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -7569,6 +7569,11 @@ Enable/disable the generation of the AMX-FP8 instructions. @itemx no-movrs Enable/disable the generation of the MOVRS instructions. +@cindex @code{target("amx-movrs")} function attribute, x86 +@item amx-movrs +@itemx no-amx-movrs +Enable/disable the generation of the AMX-MOVRS instructions. + @cindex @code{target("cld")} function attribute, x86 @item cld @itemx no-cld diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c9f95184b5b..95a7b5c4f5c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1483,7 +1483,7 @@ See RS/6000 and PowerPC Options. -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mapxf -musermsr -mavx10.1 -mavx10.1-256 -mavx10.1-512 -mevex512 -mavx10.2 -mavx10.2-256 --mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mmovrs +-mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mmovrs -mamx-movrs -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops -minline-stringops-dynamically -mstringop-strategy=@var{alg} -mkl -mwidekl @@ -35571,6 +35571,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @need 200 @opindex mmovrs @itemx -mmovrs +@need 200 +@opindex mamx-movrs +@itemx -mamx-movrs These switches enable the use of instructions in the MMX, SSE, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, AES, PCLMUL, CLFLUSHOPT, CLWB, FSGSBASE, PTWRITE, RDRND, F16C, FMA, PCONFIG, @@ -35582,8 +35585,8 @@ AVX512VPOPCNTDQ, AVX512VNNI, SERIALIZE, UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512-FP16, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AMX-FP16, PREFETCHI, RAOINT, AMX-COMPLEX, AVXVNNIINT16, SM3, SHA512, SM4, APX_F, USER_MSR, AVX10.1, AVX10.2, AMX-AVX512, AMX-TF32, AMX-TRANSPOSE, -AMX-FP8, MOVRS or CLDEMOTE extended instruction sets. Each has a corresponding -@option{-mno-} option to disable use of these instructions. +AMX-FP8, MOVRS, AMX-MOVRS or CLDEMOTE extended instruction sets. Each has a +corresponding @option{-mno-} option to disable use of these instructions. These extensions are also available as built-in functions: see @ref{x86 Built-in Functions}, for details of the functions enabled and diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 10fd1b9c8a7..b7167b39bb8 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2653,6 +2653,9 @@ Target supports the execution of @code{amx-complex} instructions. @item amx_fp16 Target supports the execution of @code{amx-fp16} instructions. +@item amx_movrs +Target supports the execution of @code{amx-movrs} instructions. + @item amx_tf32 Target supports the execution of @code{amx-tf32} instructions. diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index ddfdab41b38..00cdc50fe1b 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs -mamx-movrs" } */ /* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index ad77fc737b6..8319d8cc3ca 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs -mamx-movrs" } */ /* { dg-skip-if "requires hosted libstdc++ for cstdlib malloc" { ! hostedlib } } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h index b765f30d458..0addb5b5d8f 100644 --- a/gcc/testsuite/gcc.target/i386/amx-check.h +++ b/gcc/testsuite/gcc.target/i386/amx-check.h @@ -266,6 +266,9 @@ main () #ifdef AMX_FP8 && __builtin_cpu_supports ("amx-fp8") #endif +#ifdef AMX_MOVRS + && __builtin_cpu_supports ("amx-movrs") +#endif #ifdef __linux__ && request_perm_xtile_data () #endif diff --git a/gcc/testsuite/gcc.target/i386/amxmovrs-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxmovrs-asmatt-1.c new file mode 100644 index 00000000000..d99a97ff6ce --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxmovrs-asmatt-1.c @@ -0,0 +1,27 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-movrs -mamx-transpose" } */ +/* { dg-final { scan-assembler "tileloaddrs\[ \\t]+\[^\n\]*\(%\[a-z0-9\]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "tileloaddrst1\[ \\t]+\[^\n\]*\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "t2rpntlvwz0rs\[ \\t]+\[^\n\]*\(%\[a-z0-9\]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "t2rpntlvwz0rst1\[ \\t]+\[^\n\]*\(%\[a-z0-9\]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "t2rpntlvwz1rs\[ \\t]+\[^\n\]*\(%\[a-z0-9\]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "t2rpntlvwz1rst1\[ \\t]+\[^\n\]*\(%\[a-z0-9\]*\,%\[a-z0-9\]*\,\[124\]\)+\[^\n\]*%tmm\[0-9\]" } } */ +#include <immintrin.h> + +extern const void* base; +extern const int stride; + +#define TMM0 0 +#define TMM1 1 +#define TMM2 2 +#define TMM3 3 + +void TEST() +{ + _tile_loaddrs (TMM1, base, stride); + _tile_loaddrst1 (TMM1, base, stride); + _tile_2rpntlvwz0rs (TMM0, base, stride); + _tile_2rpntlvwz0rst1 (TMM1, base, stride); + _tile_2rpntlvwz1rs (TMM2, base, stride); + _tile_2rpntlvwz1rst1 (TMM3, base, stride); +} diff --git a/gcc/testsuite/gcc.target/i386/amxmovrs-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxmovrs-asmintel-1.c new file mode 100644 index 00000000000..6a339866930 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxmovrs-asmintel-1.c @@ -0,0 +1,28 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-require-effective-target masm_intel } */ +/* { dg-options "-O2 -mamx-movrs -mamx-transpose -masm=intel" } */ +/* { dg-final { scan-assembler-times "tileloaddrs\[ \\t]%tmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler-times "tileloaddrst1\[ \\t]%tmm\[0-9\]" 1 } } */ +/* { dg-final { scan-assembler "t2rpntlvwz0rs\[ \\t]%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "t2rpntlvwz0rst1\[ \\t]%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "t2rpntlvwz1rs\[ \\t]%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "t2rpntlvwz1rst1\[ \\t]%tmm\[0-9\]" } } */ +#include <immintrin.h> + +extern const void* base; +extern const int stride; + +#define TMM0 0 +#define TMM1 1 +#define TMM2 2 +#define TMM3 3 + +void TEST() +{ + _tile_loaddrs (TMM1, base, stride); + _tile_loaddrst1 (TMM1, base, stride); + _tile_2rpntlvwz0rs (TMM0, base, stride); + _tile_2rpntlvwz0rst1 (TMM1, base, stride); + _tile_2rpntlvwz1rs (TMM2, base, stride); + _tile_2rpntlvwz1rst1 (TMM3, base, stride); +} diff --git a/gcc/testsuite/gcc.target/i386/amxmovrs-t2rpntlvw-2.c b/gcc/testsuite/gcc.target/i386/amxmovrs-t2rpntlvw-2.c new file mode 100644 index 00000000000..e38c6ea277a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxmovrs-t2rpntlvw-2.c @@ -0,0 +1,58 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_movrs } */ +/* { dg-require-effective-target amx_transpose } */ +/* { dg-require-effective-target avx512fp16 } */ +/* { dg-options "-O2 -mamx-movrs -mamx-transpose -mavx512fp16 -mavx512bf16" } */ +#define AMX_MOVRS +#define AMX_TRANSPOSE +#define DO_TEST test_amx_movrs_t2rpntlvw +void test_amx_movrs_t2rpntlvw (); +#include "amx-helper.h" + +#define init_pair_tile_reg_and_src_z_t1(tmm_num, src, buffer, ztype, wtype)\ +{ \ + init_pair_tile_src (tmm_num, &src, buffer, ztype); \ + _tile_2rpntlvwz##ztype##wtype (tmm_num, buffer, _STRIDE);\ +} + +void test_amx_movrs_t2rpntlvw () +{ + __tilecfg_u cfg; + __tilepair src; + __tile ref_0, ref_1; + uint8_t buffer[2048]; + int i; + + init_tile_config (&cfg); + + for (i = 0; i < 2048; i++) + buffer[i] = i % 256; + + /* Check t2rpntlvwz0. */ + init_pair_tile_reg_and_src_z_t1 (0, src, buffer, 0,); + _tile_stored (0, ref_0.buf, _STRIDE); + _tile_stored (1, ref_1.buf, _STRIDE); + if (!check_pair_tile_register (&ref_0, &ref_1, &src)) + abort (); + + /* Check t2rpntlvwz1. */ + init_pair_tile_reg_and_src_z_t1 (1, src, buffer, 1,); + _tile_stored (0, ref_0.buf, _STRIDE); + _tile_stored (1, ref_1.buf, _STRIDE); + if (!check_pair_tile_register (&ref_0, &ref_1, &src)) + abort (); + + /* Check t2rpntlvwz0t1. */ + init_pair_tile_reg_and_src_z_t1 (0, src, buffer, 0, t1); + _tile_stored (0, ref_0.buf, _STRIDE); + _tile_stored (1, ref_1.buf, _STRIDE); + if (!check_pair_tile_register (&ref_0, &ref_1, &src)) + abort (); + + /* Check t2rpntlvwz1t1. */ + init_pair_tile_reg_and_src_z_t1 (1, src, buffer, 1, t1); + _tile_stored (0, ref_0.buf, _STRIDE); + _tile_stored (1, ref_1.buf, _STRIDE); + if (!check_pair_tile_register (&ref_0, &ref_1, &src)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/amxmovrs-tileloaddrs-2.c b/gcc/testsuite/gcc.target/i386/amxmovrs-tileloaddrs-2.c new file mode 100644 index 00000000000..d247c66b06b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxmovrs-tileloaddrs-2.c @@ -0,0 +1,33 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_movrs } */ +/* { dg-options "-O2 -mamx-movrs" } */ +#define AMX_MOVRS +#define DO_TEST test_amx_movrs_tileloaddrs +void test_amx_movrs_tileloaddrs (); +#include "amx-helper.h" + +void test_amx_movrs_tileloaddrs () +{ + __tilecfg_u cfg; + __tile reg_src0, reg_src1, reg_ref0, reg_ref1; + uint8_t buffer[1024]; + int i; + + for (i = 0; i < 1024; i++) + buffer[i] = i % 256; + + + init_tile_config (&cfg); + + init_tile_src (0, ®_src0, buffer); + _tile_loaddrs (0, reg_src0.buf, _STRIDE); + _tile_stored (0, reg_ref0.buf, _STRIDE); + if (!check_tile_register (®_ref0, ®_src0)) + abort(); + + init_tile_src (1, ®_src1, buffer); + _tile_loaddrst1 (1, reg_src1.buf, _STRIDE); + _tile_stored (1, reg_ref1.buf, _STRIDE); + if (!check_tile_register (®_ref1, ®_src1)) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index 5fdc754bb8f..09276ad45d9 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -94,6 +94,7 @@ extern void test_amx_tf32 (void) __attribute__((__target__("amx-tf32"))); extern void test_amx_transpose (void) __attribute__((__target__("amx-transpose"))); extern void test_amx_fp8 (void) __attribute__((__target__("amx-fp8"))); extern void test_movrs (void) __attribute__((__target__("movrs"))); +extern void test_amx_movrs (void) __attribute__((__target__("amx-movrs"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx512vpopcntdq(void) __attribute__((__target__("no-avx512vpopcntdq"))); @@ -189,6 +190,7 @@ extern void test_no_amx_tf32 (void) __attribute__((__target__("no-amx-tf32"))); extern void test_no_amx_transpose (void) __attribute__((__target__("no-amx-transpose"))); extern void test_no_amx_fp8 (void) __attribute__((__target__("no-amx-fp8"))); extern void test_no_movrs (void) __attribute__((__target__("no-movrs"))); +extern void test_no_amx_movrs (void) __attribute__((__target__("no-amx-movrs"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index 53480d6ca8b..c9907fcc922 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h gfniintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs -mamx-movrs" } */ #include <x86intrin.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index f0acb380dbb..04df3977e32 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd -mamx-fp16 -mprefetchi -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs -mamx-movrs" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 96f14123caf..4910f1a4d0b 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -msha -mxsavec -mxsaves -mclflushopt -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8 -mavxneconvert -mamx-fp16 -mraoint -mamx-complex -mavxvnniint16 -msm3 -msha512 -msm4 -mavx10.2-512 -mamx-avx512 -mamx-tf32 -mamx-transpose -mamx-fp8 -mmovrs -mamx-movrs" } */ /* { dg-add-options bind_pic_locally } */ #include <mm_malloc.h> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 6a4f48f111a..6ca0d139df5 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -103,7 +103,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs,amx-movrs") #endif /* Following intrinsics require immediate arguments. They @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,sha,gfni,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,amx-fp16,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs,amx-movrs") #endif #include <immintrin.h> test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 8fffacbd3ca..0f836d9fa5a 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -1082,6 +1082,6 @@ #define __builtin_ia32_minmaxps128_mask(A, B, C, D, E) __builtin_ia32_minmaxps128_mask (A, B, 100, D, E) #define __builtin_ia32_minmaxps256_mask_round(A, B, C, D, E, F) __builtin_ia32_minmaxps256_mask_round (A, B, 100, D, E, 4) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,sha,xsavec,xsaves,clflushopt,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,vpclmulqdq,pconfig,wbnoinvd,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avxifma,avxvnniint8,avxneconvert,cmpccxadd,amx-fp16,prefetchi,raoint,amx-complex,avxvnniint16,sm3,sha512,sm4,avx10.2-512,amx-avx512,amx-tf32,amx-transpose,amx-fp8,movrs,amx-movrs") #include <x86intrin.h> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index dccc3866cca..c0aba8863a1 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -10825,6 +10825,17 @@ proc check_effective_target_amx_fp8 { } { } "-mamx-fp8" ] } +# Return 1 if amx-movrs instructions can be compiled. +proc check_effective_target_amx_movrs { } { + return [check_no_compiler_messages amx_movrs object { + void + foo () + { + __asm__ volatile ("tileloaddrs\t(%%r9), %%tmm3" ::); + } + } "-mamx-movrs" ] +} + # Return 1 if sse instructions can be compiled. proc check_effective_target_sse { } { return [check_no_compiler_messages sse object { -- 2.31.1