> -----Original Message-----
> From: Jiang, Haochen <haochen.ji...@intel.com>
> Sent: Friday, January 3, 2025 4:55 PM
> To: gcc-patches@gcc.gnu.org
> Cc: Liu, Hongtao <hongtao....@intel.com>; ubiz...@gmail.com
> Subject: [PATCH] i386: Change mnemonics from TCVTROWPS2PBF16[H,L] to
> TCVTROWPS2BF16[H,L]
>
> Hi all,
>
> The mnemonics for TCVTROWPS2PBF16[H,L] has been changed to
> TCVTROWPS2BF16[H,L] in ISE056. There will be also some more BF16
> mnemonics change upcoming, which will fix the regression in PR118270.
Please add PR target/118270 to changelog, otherwise LGTM.
>
> Bootstraped and tested on x86_64-pc-linux-gnu. Ok for trunk?
>
> Ref: https://cdrdv2.intel.com/v1/dl/getContent/671368
>
> Thx,
> Haochen
>
> ---
>
> In ISE056, the mnemonics for TCVTROWPS2PBF16[H,L] has been changed to
> TCVTROWPS2BF16[H,L].
>
> gcc/ChangeLog:
>
> * config/i386/amxavx512intrin.h
> (_tile_cvtrowps2pbf16h_internal): Rename to...
> (_tile_cvtrowps2bf16h_internal): ...this.
> (_tile_cvtrowps2pbf16hi_internal): Rename to...
> (_tile_cvtrowps2bf16hi_internal): ...this.
> (_tile_cvtrowps2pbf16l_internal): Rename to...
> (_tile_cvtrowps2bf16l_internal): ...this.
> (_tile_cvtrowps2pbf16li_internal): Rename to...
> (_tile_cvtrowps2bf16li_internal): ...this.
> (_tile_cvtrowps2pbf16h): Rename to...
> (_tile_cvtrowps2bf16h): ...this.
> (_tile_cvtrowps2pbf16hi): Rename to...
> (_tile_cvtrowps2bf16hi): ...this.
> (_tile_cvtrowps2pbf16l): Rename to...
> (_tile_cvtrowps2bf16l): ...this.
> (_tile_cvtrowps2pbf16li): Rename to...
> (_tile_cvtrowps2bf16li): ...this.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/amxavx512-asmatt-1.c: Adjust intrin call.
> * gcc.target/i386/amxavx512-asmintel-1.c: Ditto.
> * gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c: Rename to...
> * gcc.target/i386/amxavx512-cvtrowps2bf16-2.c: ...this. Rename
> test functions.
> ---
> gcc/config/i386/amxavx512intrin.h | 32 +++++++++----------
> .../gcc.target/i386/amxavx512-asmatt-1.c | 12 +++----
> .../gcc.target/i386/amxavx512-asmintel-1.c | 12 +++----
> ...2pbf16-2.c => amxavx512-cvtrowps2bf16-2.c} | 30 ++++++++---------
> 4 files changed, 43 insertions(+), 43 deletions(-) rename
> gcc/testsuite/gcc.target/i386/{amxavx512-cvtrowps2pbf16-2.c =>
> amxavx512-cvtrowps2bf16-2.c} (67%)
>
> diff --git a/gcc/config/i386/amxavx512intrin.h
> b/gcc/config/i386/amxavx512intrin.h
> index 59d142948fb..ab5362571d1 100644
> --- a/gcc/config/i386/amxavx512intrin.h
> +++ b/gcc/config/i386/amxavx512intrin.h
> @@ -53,38 +53,38 @@
> dst;
> \
> })
>
> -#define _tile_cvtrowps2pbf16h_internal(src,A)
> \
> +#define _tile_cvtrowps2bf16h_internal(src,A) \
> ({ \
> __m512bh dst;
> \
> __asm__ volatile \
> -
> ("{tcvtrowps2pbf16h\t%1, %%tmm"#src", %0|tcvtrowps2pbf16h\t%0, %%t
> mm"#src", %1}" \
> +
> ("{tcvtrowps2bf16h\t%1, %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tm
> m"#src", %1}" \
> : "=v" (dst) : "r" ((unsigned) (A))); \
> dst;
> \
> })
>
> -#define _tile_cvtrowps2pbf16hi_internal(src,imm) \
> +#define _tile_cvtrowps2bf16hi_internal(src,imm) \
> ({ \
> __m512bh dst;
> \
> __asm__ volatile \
> -
> ("{tcvtrowps2pbf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16h\t%0,
> %%tmm"#src", "#imm"}" \
> +
> ("{tcvtrowps2bf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %
> %tmm"#src", "#imm"}" \
> : "=v" (dst) :); \
> dst;
> \
> })
>
> -#define _tile_cvtrowps2pbf16l_internal(src,A)
> \
> +#define _tile_cvtrowps2bf16l_internal(src,A) \
> ({ \
> __m512bh dst;
> \
> __asm__ volatile \
> -
> ("{tcvtrowps2pbf16l\t%1, %%tmm"#src", %0|tcvtrowps2pbf16l\t%0, %%tm
> m"#src", %1}" \
> +
> ("{tcvtrowps2bf16l\t%1, %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"
> #src", %1}" \
> : "=v" (dst) : "r" ((unsigned) (A))); \
> dst;
> \
> })
>
> -#define _tile_cvtrowps2pbf16li_internal(src,imm) \
> +#define _tile_cvtrowps2bf16li_internal(src,imm) \
> ({ \
> __m512bh dst;
> \
> __asm__ volatile \
> -
> ("{tcvtrowps2pbf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16l\t%0,
> %%tmm"#src", "#imm"}" \
> +
> ("{tcvtrowps2bf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%
> tmm"#src", "#imm"}" \
> : "=v" (dst) :); \
> dst;
> \
> })
> @@ -149,17 +149,17 @@
> #define _tile_cvtrowd2psi(src,imm) \
> _tile_cvtrowd2psi_internal (src,imm)
>
> -#define _tile_cvtrowps2pbf16h(src,A) \
> - _tile_cvtrowps2pbf16h_internal (src,A)
> +#define _tile_cvtrowps2bf16h(src,A) \
> + _tile_cvtrowps2bf16h_internal (src,A)
>
> -#define _tile_cvtrowps2pbf16hi(src,imm) \
> - _tile_cvtrowps2pbf16hi_internal (src,imm)
> +#define _tile_cvtrowps2bf16hi(src,imm) \
> + _tile_cvtrowps2bf16hi_internal (src,imm)
>
> -#define _tile_cvtrowps2pbf16l(src,A) \
> - _tile_cvtrowps2pbf16l_internal (src,A)
> +#define _tile_cvtrowps2bf16l(src,A) \
> + _tile_cvtrowps2bf16l_internal (src,A)
>
> -#define _tile_cvtrowps2pbf16li(src,imm) \
> - _tile_cvtrowps2pbf16li_internal (src,imm)
> +#define _tile_cvtrowps2bf16li(src,imm) \
> + _tile_cvtrowps2bf16li_internal (src,imm)
>
> #define _tile_cvtrowps2phh(src,A) \
> _tile_cvtrowps2phh_internal (src,A)
> diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c
> b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c
> index 497218d12ba..885f864d930 100644
> --- a/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c
> +++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c
> @@ -1,8 +1,8 @@
> /* { dg-do compile { target { ! ia32 } } } */
> /* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */
> /* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]" 2 } } */
> -/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]" 2 } } */
> -/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]" 2 } } */
> +/* { dg-final { scan-assembler-times "tcvtrowps2bf16h\[ \\t]" 2 } } */
> +/* { dg-final { scan-assembler-times "tcvtrowps2bf16l\[ \\t]" 2 } } */
> /* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]" 2 } } */
> /* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]" 2 } } */
> /* { dg-final { scan-assembler-times "tilemovrow\[ \\t]" 2 } } */ @@ -18,10
> +18,10 @@ void TEST () {
> a = _tile_cvtrowd2ps (TMM1, 1);
> a = _tile_cvtrowd2psi (TMM1, 2);
> - b = _tile_cvtrowps2pbf16h (TMM1, 3);
> - b = _tile_cvtrowps2pbf16hi (TMM1, 4);
> - b = _tile_cvtrowps2pbf16l (TMM1, 5);
> - b = _tile_cvtrowps2pbf16li (TMM1, 6);
> + b = _tile_cvtrowps2bf16h (TMM1, 3);
> + b = _tile_cvtrowps2bf16hi (TMM1, 4);
> + b = _tile_cvtrowps2bf16l (TMM1, 5);
> + b = _tile_cvtrowps2bf16li (TMM1, 6);
> c = _tile_cvtrowps2phh (TMM1, 7);
> c = _tile_cvtrowps2phhi (TMM1, 8);
> c = _tile_cvtrowps2phl (TMM1, 9);
> diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c
> b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c
> index 40110438609..57c37050f70 100644
> --- a/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c
> +++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c
> @@ -2,8 +2,8 @@
> /* { dg-require-effective-target masm_intel } */
> /* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512 -masm=intel" } */
> /* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]+\[^\n\]*zmm\[0-
> 9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
> -/* { dg-final { scan-assembler-times
> "tcvtrowps2pbf16h\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } }
> */
> -/* { dg-final { scan-assembler-times
> "tcvtrowps2pbf16l\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } }
> */
> +/* { dg-final { scan-assembler-times "tcvtrowps2bf16h\[
> +\\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
> +/* { dg-final { scan-assembler-times "tcvtrowps2bf16l\[
> +\\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
> /* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]+\[^\n\]*zmm\[0-
> 9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
> /* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]+\[^\n\]*zmm\[0-
> 9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */
> /* { dg-final { scan-assembler-times "tilemovrow\[ \\t]+\[^\n\]*zmm\[0-
> 9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ @@ -17,10 +17,10 @@ void TEST () {
> a = _tile_cvtrowd2ps (1, 1);
> a = _tile_cvtrowd2psi (1, 2);
> - b = _tile_cvtrowps2pbf16h (1, 3);
> - b = _tile_cvtrowps2pbf16hi (1, 4);
> - b = _tile_cvtrowps2pbf16l (1, 5);
> - b = _tile_cvtrowps2pbf16li (1, 6);
> + b = _tile_cvtrowps2bf16h (1, 3);
> + b = _tile_cvtrowps2bf16hi (1, 4);
> + b = _tile_cvtrowps2bf16l (1, 5);
> + b = _tile_cvtrowps2bf16li (1, 6);
> c = _tile_cvtrowps2phh (1, 7);
> c = _tile_cvtrowps2phhi (1, 8);
> c = _tile_cvtrowps2phl (1, 9);
> diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c
> b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2bf16-2.c
> similarity index 67%
> rename from gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c
> rename to gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2bf16-2.c
> index dfd1d6ad57c..acd5f76c96c 100644
> --- a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c
> +++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2bf16-2.c
> @@ -2,16 +2,16 @@
> /* { dg-require-effective-target amx_avx512 } */
> /* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ #define
> AMX_AVX512 -#define DO_TEST test_amx_avx512_cvtrowps2pbf16 -void
> test_amx_avx512_cvtrowps2pbf16();
> +#define DO_TEST test_amx_avx512_cvtrowps2bf16 void
> +test_amx_avx512_cvtrowps2bf16();
> #include "amx-helper.h"
>
> volatile __m512bh cal_dst, cmp_dst;
>
> -#define DEFINE_TEST_CVTROWPS2PBF16(HL, EI, T) \
> +#define DEFINE_TEST_CVTROWPS2BF16(HL, EI, T) \
> __m512bh \
> __attribute__((noinline, noclone, __target__("no-amx-avx512"))) \
> -calc_cvtrowps2pbf16##HL##EI (__tile *src, T __A) \
> +calc_cvtrowps2bf16##HL##EI (__tile *src, T __A) \
> { \
> float *src_buf = (float *) src->buf; \
> int N = src->colsb / 4; \
> @@ -53,17 +53,17 @@ calc_cvtrowps2pbf16##HL##EI (__tile *src, T __A)
> \
> return res; \
> }
>
> -DEFINE_TEST_CVTROWPS2PBF16(h, e, unsigned) -
> DEFINE_TEST_CVTROWPS2PBF16(l, e, unsigned) -
> DEFINE_TEST_CVTROWPS2PBF16(h, i, const unsigned) -
> DEFINE_TEST_CVTROWPS2PBF16(l, i, const unsigned)
> +DEFINE_TEST_CVTROWPS2BF16(h, e, unsigned)
> DEFINE_TEST_CVTROWPS2BF16(l,
> +e, unsigned) DEFINE_TEST_CVTROWPS2BF16(h, i, const unsigned)
> +DEFINE_TEST_CVTROWPS2BF16(l, i, const unsigned)
>
> -#define TEST_CVTROWPS2PBF16(X, Y, HL, EI, T, INTRIN) \
> -cal_dst = calc_cvtrowps2pbf16##HL##EI (X, Y); \
> +#define TEST_CVTROWPS2BF16(X, Y, HL, EI, T, INTRIN) \
> +cal_dst = calc_cvtrowps2bf16##HL##EI (X, Y); \
> cmp_dst = _tile_##INTRIN (1, Y); \
> COMPARE_ZMM_BF16(cal_dst, cmp_dst);
>
> -void test_amx_avx512_cvtrowps2pbf16 ()
> +void test_amx_avx512_cvtrowps2bf16 ()
> {
> __tilecfg_u cfg;
> __tile src;
> @@ -75,8 +75,8 @@ void test_amx_avx512_cvtrowps2pbf16 ()
> init_tile_config (&cfg);
> init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf);
>
> - TEST_CVTROWPS2PBF16 (&src, a, h, e, unsigned, cvtrowps2pbf16h);
> - TEST_CVTROWPS2PBF16 (&src, a, l, e, unsigned, cvtrowps2pbf16l);
> - TEST_CVTROWPS2PBF16 (&src, 1, h, i, const unsigned, cvtrowps2pbf16hi);
> - TEST_CVTROWPS2PBF16 (&src, 1, l, i, const unsigned, cvtrowps2pbf16li);
> + TEST_CVTROWPS2BF16 (&src, a, h, e, unsigned, cvtrowps2bf16h);
> + TEST_CVTROWPS2BF16 (&src, a, l, e, unsigned, cvtrowps2bf16l);
> + TEST_CVTROWPS2BF16 (&src, 1, h, i, const unsigned, cvtrowps2bf16hi);
> + TEST_CVTROWPS2BF16 (&src, 1, l, i, const unsigned, cvtrowps2bf16li);
> }
> --
> 2.31.1