Hi all, The mnemonics for TCVTROWPS2PBF16[H,L] has been changed to TCVTROWPS2BF16[H,L] in ISE056. There will be also some more BF16 mnemonics change upcoming, which will fix the regression in PR118270.
Bootstraped and tested on x86_64-pc-linux-gnu. Ok for trunk? Ref: https://cdrdv2.intel.com/v1/dl/getContent/671368 Thx, Haochen --- In ISE056, the mnemonics for TCVTROWPS2PBF16[H,L] has been changed to TCVTROWPS2BF16[H,L]. gcc/ChangeLog: * config/i386/amxavx512intrin.h (_tile_cvtrowps2pbf16h_internal): Rename to... (_tile_cvtrowps2bf16h_internal): ...this. (_tile_cvtrowps2pbf16hi_internal): Rename to... (_tile_cvtrowps2bf16hi_internal): ...this. (_tile_cvtrowps2pbf16l_internal): Rename to... (_tile_cvtrowps2bf16l_internal): ...this. (_tile_cvtrowps2pbf16li_internal): Rename to... (_tile_cvtrowps2bf16li_internal): ...this. (_tile_cvtrowps2pbf16h): Rename to... (_tile_cvtrowps2bf16h): ...this. (_tile_cvtrowps2pbf16hi): Rename to... (_tile_cvtrowps2bf16hi): ...this. (_tile_cvtrowps2pbf16l): Rename to... (_tile_cvtrowps2bf16l): ...this. (_tile_cvtrowps2pbf16li): Rename to... (_tile_cvtrowps2bf16li): ...this. gcc/testsuite/ChangeLog: * gcc.target/i386/amxavx512-asmatt-1.c: Adjust intrin call. * gcc.target/i386/amxavx512-asmintel-1.c: Ditto. * gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c: Rename to... * gcc.target/i386/amxavx512-cvtrowps2bf16-2.c: ...this. Rename test functions. --- gcc/config/i386/amxavx512intrin.h | 32 +++++++++---------- .../gcc.target/i386/amxavx512-asmatt-1.c | 12 +++---- .../gcc.target/i386/amxavx512-asmintel-1.c | 12 +++---- ...2pbf16-2.c => amxavx512-cvtrowps2bf16-2.c} | 30 ++++++++--------- 4 files changed, 43 insertions(+), 43 deletions(-) rename gcc/testsuite/gcc.target/i386/{amxavx512-cvtrowps2pbf16-2.c => amxavx512-cvtrowps2bf16-2.c} (67%) diff --git a/gcc/config/i386/amxavx512intrin.h b/gcc/config/i386/amxavx512intrin.h index 59d142948fb..ab5362571d1 100644 --- a/gcc/config/i386/amxavx512intrin.h +++ b/gcc/config/i386/amxavx512intrin.h @@ -53,38 +53,38 @@ dst; \ }) -#define _tile_cvtrowps2pbf16h_internal(src,A) \ +#define _tile_cvtrowps2bf16h_internal(src,A) \ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16h\t%1, %%tmm"#src", %0|tcvtrowps2pbf16h\t%0, %%tmm"#src", %1}" \ + ("{tcvtrowps2bf16h\t%1, %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tmm"#src", %1}" \ : "=v" (dst) : "r" ((unsigned) (A))); \ dst; \ }) -#define _tile_cvtrowps2pbf16hi_internal(src,imm) \ +#define _tile_cvtrowps2bf16hi_internal(src,imm) \ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16h\t%0, %%tmm"#src", "#imm"}" \ + ("{tcvtrowps2bf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tmm"#src", "#imm"}" \ : "=v" (dst) :); \ dst; \ }) -#define _tile_cvtrowps2pbf16l_internal(src,A) \ +#define _tile_cvtrowps2bf16l_internal(src,A) \ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16l\t%1, %%tmm"#src", %0|tcvtrowps2pbf16l\t%0, %%tmm"#src", %1}" \ + ("{tcvtrowps2bf16l\t%1, %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"#src", %1}" \ : "=v" (dst) : "r" ((unsigned) (A))); \ dst; \ }) -#define _tile_cvtrowps2pbf16li_internal(src,imm) \ +#define _tile_cvtrowps2bf16li_internal(src,imm) \ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2pbf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2pbf16l\t%0, %%tmm"#src", "#imm"}" \ + ("{tcvtrowps2bf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"#src", "#imm"}" \ : "=v" (dst) :); \ dst; \ }) @@ -149,17 +149,17 @@ #define _tile_cvtrowd2psi(src,imm) \ _tile_cvtrowd2psi_internal (src,imm) -#define _tile_cvtrowps2pbf16h(src,A) \ - _tile_cvtrowps2pbf16h_internal (src,A) +#define _tile_cvtrowps2bf16h(src,A) \ + _tile_cvtrowps2bf16h_internal (src,A) -#define _tile_cvtrowps2pbf16hi(src,imm) \ - _tile_cvtrowps2pbf16hi_internal (src,imm) +#define _tile_cvtrowps2bf16hi(src,imm) \ + _tile_cvtrowps2bf16hi_internal (src,imm) -#define _tile_cvtrowps2pbf16l(src,A) \ - _tile_cvtrowps2pbf16l_internal (src,A) +#define _tile_cvtrowps2bf16l(src,A) \ + _tile_cvtrowps2bf16l_internal (src,A) -#define _tile_cvtrowps2pbf16li(src,imm) \ - _tile_cvtrowps2pbf16li_internal (src,imm) +#define _tile_cvtrowps2bf16li(src,imm) \ + _tile_cvtrowps2bf16li_internal (src,imm) #define _tile_cvtrowps2phh(src,A) \ _tile_cvtrowps2phh_internal (src,A) diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c index 497218d12ba..885f864d930 100644 --- a/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c +++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmatt-1.c @@ -1,8 +1,8 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ /* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]" 2 } } */ -/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]" 2 } } */ -/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2bf16h\[ \\t]" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2bf16l\[ \\t]" 2 } } */ /* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]" 2 } } */ /* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]" 2 } } */ /* { dg-final { scan-assembler-times "tilemovrow\[ \\t]" 2 } } */ @@ -18,10 +18,10 @@ void TEST () { a = _tile_cvtrowd2ps (TMM1, 1); a = _tile_cvtrowd2psi (TMM1, 2); - b = _tile_cvtrowps2pbf16h (TMM1, 3); - b = _tile_cvtrowps2pbf16hi (TMM1, 4); - b = _tile_cvtrowps2pbf16l (TMM1, 5); - b = _tile_cvtrowps2pbf16li (TMM1, 6); + b = _tile_cvtrowps2bf16h (TMM1, 3); + b = _tile_cvtrowps2bf16hi (TMM1, 4); + b = _tile_cvtrowps2bf16l (TMM1, 5); + b = _tile_cvtrowps2bf16li (TMM1, 6); c = _tile_cvtrowps2phh (TMM1, 7); c = _tile_cvtrowps2phhi (TMM1, 8); c = _tile_cvtrowps2phl (TMM1, 9); diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c index 40110438609..57c37050f70 100644 --- a/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c +++ b/gcc/testsuite/gcc.target/i386/amxavx512-asmintel-1.c @@ -2,8 +2,8 @@ /* { dg-require-effective-target masm_intel } */ /* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512 -masm=intel" } */ /* { dg-final { scan-assembler-times "tcvtrowd2ps\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ -/* { dg-final { scan-assembler-times "tcvtrowps2pbf16h\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ -/* { dg-final { scan-assembler-times "tcvtrowps2pbf16l\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2bf16h\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ +/* { dg-final { scan-assembler-times "tcvtrowps2bf16l\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ /* { dg-final { scan-assembler-times "tcvtrowps2phh\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ /* { dg-final { scan-assembler-times "tcvtrowps2phl\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ /* { dg-final { scan-assembler-times "tilemovrow\[ \\t]+\[^\n\]*zmm\[0-9\]+\[^\n\]*tmm1+\[^\n\]*" 2 } } */ @@ -17,10 +17,10 @@ void TEST () { a = _tile_cvtrowd2ps (1, 1); a = _tile_cvtrowd2psi (1, 2); - b = _tile_cvtrowps2pbf16h (1, 3); - b = _tile_cvtrowps2pbf16hi (1, 4); - b = _tile_cvtrowps2pbf16l (1, 5); - b = _tile_cvtrowps2pbf16li (1, 6); + b = _tile_cvtrowps2bf16h (1, 3); + b = _tile_cvtrowps2bf16hi (1, 4); + b = _tile_cvtrowps2bf16l (1, 5); + b = _tile_cvtrowps2bf16li (1, 6); c = _tile_cvtrowps2phh (1, 7); c = _tile_cvtrowps2phhi (1, 8); c = _tile_cvtrowps2phl (1, 9); diff --git a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2bf16-2.c similarity index 67% rename from gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c rename to gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2bf16-2.c index dfd1d6ad57c..acd5f76c96c 100644 --- a/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2pbf16-2.c +++ b/gcc/testsuite/gcc.target/i386/amxavx512-cvtrowps2bf16-2.c @@ -2,16 +2,16 @@ /* { dg-require-effective-target amx_avx512 } */ /* { dg-options "-O2 -march=x86-64-v3 -mamx-avx512" } */ #define AMX_AVX512 -#define DO_TEST test_amx_avx512_cvtrowps2pbf16 -void test_amx_avx512_cvtrowps2pbf16(); +#define DO_TEST test_amx_avx512_cvtrowps2bf16 +void test_amx_avx512_cvtrowps2bf16(); #include "amx-helper.h" volatile __m512bh cal_dst, cmp_dst; -#define DEFINE_TEST_CVTROWPS2PBF16(HL, EI, T) \ +#define DEFINE_TEST_CVTROWPS2BF16(HL, EI, T) \ __m512bh \ __attribute__((noinline, noclone, __target__("no-amx-avx512"))) \ -calc_cvtrowps2pbf16##HL##EI (__tile *src, T __A) \ +calc_cvtrowps2bf16##HL##EI (__tile *src, T __A) \ { \ float *src_buf = (float *) src->buf; \ int N = src->colsb / 4; \ @@ -53,17 +53,17 @@ calc_cvtrowps2pbf16##HL##EI (__tile *src, T __A) \ return res; \ } -DEFINE_TEST_CVTROWPS2PBF16(h, e, unsigned) -DEFINE_TEST_CVTROWPS2PBF16(l, e, unsigned) -DEFINE_TEST_CVTROWPS2PBF16(h, i, const unsigned) -DEFINE_TEST_CVTROWPS2PBF16(l, i, const unsigned) +DEFINE_TEST_CVTROWPS2BF16(h, e, unsigned) +DEFINE_TEST_CVTROWPS2BF16(l, e, unsigned) +DEFINE_TEST_CVTROWPS2BF16(h, i, const unsigned) +DEFINE_TEST_CVTROWPS2BF16(l, i, const unsigned) -#define TEST_CVTROWPS2PBF16(X, Y, HL, EI, T, INTRIN) \ -cal_dst = calc_cvtrowps2pbf16##HL##EI (X, Y); \ +#define TEST_CVTROWPS2BF16(X, Y, HL, EI, T, INTRIN) \ +cal_dst = calc_cvtrowps2bf16##HL##EI (X, Y); \ cmp_dst = _tile_##INTRIN (1, Y); \ COMPARE_ZMM_BF16(cal_dst, cmp_dst); -void test_amx_avx512_cvtrowps2pbf16 () +void test_amx_avx512_cvtrowps2bf16 () { __tilecfg_u cfg; __tile src; @@ -75,8 +75,8 @@ void test_amx_avx512_cvtrowps2pbf16 () init_tile_config (&cfg); init_tile_reg_and_src_with_buffer (1, src, tmp_dst_buf); - TEST_CVTROWPS2PBF16 (&src, a, h, e, unsigned, cvtrowps2pbf16h); - TEST_CVTROWPS2PBF16 (&src, a, l, e, unsigned, cvtrowps2pbf16l); - TEST_CVTROWPS2PBF16 (&src, 1, h, i, const unsigned, cvtrowps2pbf16hi); - TEST_CVTROWPS2PBF16 (&src, 1, l, i, const unsigned, cvtrowps2pbf16li); + TEST_CVTROWPS2BF16 (&src, a, h, e, unsigned, cvtrowps2bf16h); + TEST_CVTROWPS2BF16 (&src, a, l, e, unsigned, cvtrowps2bf16l); + TEST_CVTROWPS2BF16 (&src, 1, h, i, const unsigned, cvtrowps2bf16hi); + TEST_CVTROWPS2BF16 (&src, 1, l, i, const unsigned, cvtrowps2bf16li); } -- 2.31.1