Besides mnemonics change, this patch also use the compare pattern instead of UNSPEC.
gcc/ChangeLog: PR target/118270 * config/i386/avx10_2bf16intrin.h: Change intrin and builtin name according to new mnemonics. * config/i386/i386-builtin.def (BDESC): Ditto. * config/i386/i386-expand.cc (ix86_expand_fp_compare): Adjust comments. (ix86_expand_builtin): Adjust switch case. * config/i386/i386.md (cmpibf): Change instruction name output. * config/i386/sse.md (UNSPEC_VCOMSBF16): Removed. (avx10_2_comisbf16_v8bf): New. (avx10_2_comsbf16_v8bf): Removed. gcc/testsuite/ChangeLog: PR target/118270 * gcc.target/i386/avx10_2-comibf-1.c: Adjust asm check. * gcc.target/i386/avx10_2-comibf-3.c: Ditto. * gcc.target/i386/avx10_2-vcomsbf16-1.c: Move to... * gcc.target/i386/avx10_2-vcomisbf16-1.c: ...here. Adjust output and intrin call. * gcc.target/i386/avx10_2-vcomsbf16-2.c: Move to... * gcc.target/i386/avx10_2-vcomisbf16-2.c: ...here. Adjust intrin call. * gcc.target/i386/pr117495.c: Adjust asm check. --- gcc/config/i386/avx10_2bf16intrin.h | 26 ++++++++--------- gcc/config/i386/i386-builtin.def | 12 ++++---- gcc/config/i386/i386-expand.cc | 14 ++++----- gcc/config/i386/i386.md | 2 +- gcc/config/i386/sse.md | 29 +++++++++---------- .../gcc.target/i386/avx10_2-comibf-1.c | 2 +- .../gcc.target/i386/avx10_2-comibf-3.c | 2 +- .../gcc.target/i386/avx10_2-vcomisbf16-1.c | 19 ++++++++++++ ...2-vcomsbf16-2.c => avx10_2-vcomisbf16-2.c} | 2 +- .../gcc.target/i386/avx10_2-vcomsbf16-1.c | 19 ------------ gcc/testsuite/gcc.target/i386/pr117495.c | 2 +- 11 files changed, 64 insertions(+), 65 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcomisbf16-1.c rename gcc/testsuite/gcc.target/i386/{avx10_2-vcomsbf16-2.c => avx10_2-vcomisbf16-2.c} (95%) delete mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-vcomsbf16-1.c diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h index e3fa71f27c0..af3b4afe17f 100644 --- a/gcc/config/i386/avx10_2bf16intrin.h +++ b/gcc/config/i386/avx10_2bf16intrin.h @@ -1284,47 +1284,47 @@ _mm_cmp_pbh_mask (__m128bh __A, __m128bh __B, const int __imm) #endif /* __OPIMTIZE__ */ -/* Intrinsics vcomsbf16. */ +/* Intrinsics vcomisbf16. */ extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_comeq_sbh (__m128bh __A, __m128bh __B) +_mm_comieq_sbh (__m128bh __A, __m128bh __B) { - return __builtin_ia32_vcomsbf16eq (__A, __B); + return __builtin_ia32_vcomisbf16eq (__A, __B); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_comlt_sbh (__m128bh __A, __m128bh __B) +_mm_comilt_sbh (__m128bh __A, __m128bh __B) { - return __builtin_ia32_vcomsbf16lt (__A, __B); + return __builtin_ia32_vcomisbf16lt (__A, __B); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_comle_sbh (__m128bh __A, __m128bh __B) +_mm_comile_sbh (__m128bh __A, __m128bh __B) { - return __builtin_ia32_vcomsbf16le (__A, __B); + return __builtin_ia32_vcomisbf16le (__A, __B); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_comgt_sbh (__m128bh __A, __m128bh __B) +_mm_comigt_sbh (__m128bh __A, __m128bh __B) { - return __builtin_ia32_vcomsbf16gt (__A, __B); + return __builtin_ia32_vcomisbf16gt (__A, __B); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_comge_sbh (__m128bh __A, __m128bh __B) +_mm_comige_sbh (__m128bh __A, __m128bh __B) { - return __builtin_ia32_vcomsbf16ge (__A, __B); + return __builtin_ia32_vcomisbf16ge (__A, __B); } extern __inline int __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_comneq_sbh (__m128bh __A, __m128bh __B) +_mm_comineq_sbh (__m128bh __A, __m128bh __B) { - return __builtin_ia32_vcomsbf16neq (__A, __B); + return __builtin_ia32_vcomisbf16neq (__A, __B); } #ifdef __DISABLE_AVX10_2_256__ diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index a546cdcaed9..7e1dad2615e 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3284,12 +3284,12 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fpclassbf16_v8bf_mask, BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cmpbf16_v32bf_mask, "__builtin_ia32_cmpbf16512_mask", IX86_BUILTIN_CMPBF16512_MASK, UNKNOWN, (int) USI_FTYPE_V32BF_V32BF_INT_USI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmpbf16_v16bf_mask, "__builtin_ia32_cmpbf16256_mask", IX86_BUILTIN_CMPBF16256_MASK, UNKNOWN, (int) UHI_FTYPE_V16BF_V16BF_INT_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmpbf16_v8bf_mask, "__builtin_ia32_cmpbf16128_mask", IX86_BUILTIN_CMPBF16128_MASK, UNKNOWN, (int) UQI_FTYPE_V8BF_V8BF_INT_UQI) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16eq", IX86_BUILTIN_VCOMSBF16EQ, EQ, (int) INT_FTYPE_V8BF_V8BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16gt", IX86_BUILTIN_VCOMSBF16GT, GT, (int) INT_FTYPE_V8BF_V8BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16ge", IX86_BUILTIN_VCOMSBF16GE, GE, (int) INT_FTYPE_V8BF_V8BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16le", IX86_BUILTIN_VCOMSBF16LE, LE, (int) INT_FTYPE_V8BF_V8BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16lt", IX86_BUILTIN_VCOMSBF16LT, LT, (int) INT_FTYPE_V8BF_V8BF) -BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16neq", IX86_BUILTIN_VCOMSBF16NE, NE, (int) INT_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16eq", IX86_BUILTIN_VCOMISBF16EQ, EQ, (int) INT_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16gt", IX86_BUILTIN_VCOMISBF16GT, GT, (int) INT_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16ge", IX86_BUILTIN_VCOMISBF16GE, GE, (int) INT_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16le", IX86_BUILTIN_VCOMISBF16LE, LE, (int) INT_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16lt", IX86_BUILTIN_VCOMISBF16LT, LT, (int) INT_FTYPE_V8BF_V8BF) +BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16neq", IX86_BUILTIN_VCOMISBF16NE, NE, (int) INT_FTYPE_V8BF_V8BF) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtnebf162ibsv8bf_mask, "__builtin_ia32_cvtnebf162ibs128_mask", IX86_BUILTIN_CVTNEBF162IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtnebf162ibsv16bf_mask, "__builtin_ia32_cvtnebf162ibs256_mask", IX86_BUILTIN_CVTNEBF162IBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI) BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtnebf162ibsv32bf_mask, "__builtin_ia32_cvtnebf162ibs512_mask", IX86_BUILTIN_CVTNEBF162IBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index da030832bba..117f6f6f7eb 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -2921,7 +2921,7 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1) { case IX86_FPCMP_COMI: tmp = gen_rtx_COMPARE (CCFPmode, op0, op1); - /* We only have vcomsbf16, No vcomubf16 nor vcomxbf16 */ + /* We only have vcomisbf16, No vcomubf16 nor vcomxbf16 */ if (GET_MODE (op0) != E_BFmode) { if (TARGET_AVX10_2_256 && (code == EQ || code == NE)) @@ -15945,12 +15945,12 @@ rdseed_step: case IX86_BUILTIN_RDPID: return ix86_expand_special_args_builtin (bdesc_args + i, exp, target); - case IX86_BUILTIN_VCOMSBF16EQ: - case IX86_BUILTIN_VCOMSBF16NE: - case IX86_BUILTIN_VCOMSBF16GT: - case IX86_BUILTIN_VCOMSBF16GE: - case IX86_BUILTIN_VCOMSBF16LT: - case IX86_BUILTIN_VCOMSBF16LE: + case IX86_BUILTIN_VCOMISBF16EQ: + case IX86_BUILTIN_VCOMISBF16NE: + case IX86_BUILTIN_VCOMISBF16GT: + case IX86_BUILTIN_VCOMISBF16GE: + case IX86_BUILTIN_VCOMISBF16LT: + case IX86_BUILTIN_VCOMISBF16LE: return ix86_expand_sse_comi (bdesc_args + i, exp, target, false); case IX86_BUILTIN_FABSQ: case IX86_BUILTIN_COPYSIGNQ: diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c977e86b72e..52c02b6351a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2115,7 +2115,7 @@ (match_operand:BF 0 "register_operand" "v") (match_operand:BF 1 "nonimmediate_operand" "vm")))] "TARGET_AVX10_2_256" - "vcomsbf16\t{%1, %0|%0, %1}" + "vcomisbf16\t{%1, %0|%0, %1}" [(set_attr "type" "ssecomi") (set_attr "prefix" "evex") (set_attr "mode" "BF")]) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 0bca52848a1..c239f4102ae 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -234,7 +234,6 @@ UNSPEC_VREDUCEBF16 UNSPEC_VGETMANTBF16 UNSPEC_VFPCLASSBF16 - UNSPEC_VCOMSBF16 UNSPEC_VCVTNEBF162IBS UNSPEC_VCVTNEBF162IUBS UNSPEC_VCVTPH2IBS @@ -4877,6 +4876,20 @@ (const_string "0"))) (set_attr "mode" "<MODE>")]) +(define_insn "avx10_2_comisbf16_v8bf" + [(set (reg:CCFP FLAGS_REG) + (compare:CCFP + (vec_select:BF + (match_operand:V8BF 0 "register_operand" "v") + (parallel [(const_int 0)])) + (vec_select:BF + (match_operand:V8BF 1 "nonimmediate_operand" "vm") + (parallel [(const_int 0)]))))] + "TARGET_AVX10_2_256" + "vcomisbf16\t{%1, %0|%0, %1}" + [(set_attr "prefix" "evex") + (set_attr "type" "ssecomi")]) + (define_expand "vec_cmp<mode><avx512fmaskmodelower>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand") (match_operator:<avx512fmaskmode> 1 "" @@ -32447,20 +32460,6 @@ "vcmpbf16\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}" [(set_attr "prefix" "evex")]) -(define_insn "avx10_2_comsbf16_v8bf" - [(set (reg:CCFP FLAGS_REG) - (unspec:CCFP - [(vec_select:BF - (match_operand:V8BF 0 "register_operand" "v") - (parallel [(const_int 0)])) - (vec_select:BF - (match_operand:V8BF 1 "nonimmediate_operand" "vm") - (parallel [(const_int 0)]))] - UNSPEC_VCOMSBF16))] - "TARGET_AVX10_2_256" - "vcomsbf16\t{%1, %0|%0, %1}" - [(set_attr "prefix" "evex")]) - (define_int_iterator UNSPEC_CVTNE_BF16_IBS_ITER [UNSPEC_VCVTNEBF162IBS UNSPEC_VCVTNEBF162IUBS diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-comibf-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-comibf-1.c index 85b773b89f2..3862f1e0d90 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-comibf-1.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-comibf-1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-march=x86-64-v3 -mavx10.2 -O2 -fno-trapping-math" } */ -/* { dg-final { scan-assembler-times "vcomsbf16\[ \\t\]+\[^{}\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */ +/* { dg-final { scan-assembler-times "vcomisbf16\[ \\t\]+\[^{}\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */ /* { dg-final { scan-assembler-times {j[a-z]+\s} 6 } } */ __bf16 diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-comibf-3.c b/gcc/testsuite/gcc.target/i386/avx10_2-comibf-3.c index afa41a3f071..28b2ad35975 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-comibf-3.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-comibf-3.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-march=x86-64-v3 -O2" } */ -/* { dg-final { scan-assembler-times "vcomsbf16\[ \\t\]+\[^{}\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */ +/* { dg-final { scan-assembler-times "vcomisbf16\[ \\t\]+\[^{}\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */ /* { dg-final { scan-assembler-times "set\[aeglnb\]+" 6 } } */ #define AVX10_ATTR \ diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcomisbf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcomisbf16-1.c new file mode 100644 index 00000000000..3f08ff51088 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcomisbf16-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v3 -mavx10.2 -O2" } */ +/* { dg-final { scan-assembler-times "vcomisbf16\[ \\t\]+\[^{}\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */ +/* { dg-final { scan-assembler-times "jp" 2 } } */ +#include <immintrin.h> + +volatile __m128bh x1, x2; +volatile int res; + +void extern +avx10_2_vcomi_test (void) +{ + res = _mm_comieq_sbh (x1, x2); + res = _mm_comilt_sbh (x1, x2); + res = _mm_comile_sbh (x1, x2); + res = _mm_comigt_sbh (x1, x2); + res = _mm_comige_sbh (x1, x2); + res = _mm_comineq_sbh (x1, x2); +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcomsbf16-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcomisbf16-2.c similarity index 95% rename from gcc/testsuite/gcc.target/i386/avx10_2-vcomsbf16-2.c rename to gcc/testsuite/gcc.target/i386/avx10_2-vcomisbf16-2.c index de98043dcf3..7266e3a7b18 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vcomsbf16-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vcomisbf16-2.c @@ -9,7 +9,7 @@ #define CMP(PRED, IMM) \ exp = _mm_comi_round_ss (__A, __B, IMM, _MM_FROUND_NO_EXC); \ - res1 = _mm_com##PRED##_sbh (src1.x, src2.x); \ + res1 = _mm_comi##PRED##_sbh (src1.x, src2.x); \ if (exp != res1) \ abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vcomsbf16-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-vcomsbf16-1.c deleted file mode 100644 index 5c1e7050d9c..00000000000 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vcomsbf16-1.c +++ /dev/null @@ -1,19 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-march=x86-64-v3 -mavx10.2 -O2" } */ -/* { dg-final { scan-assembler-times "vcomsbf16\[ \\t\]+\[^{}\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */ -/* { dg-final { scan-assembler-times "jp" 2 } } */ -#include <immintrin.h> - -volatile __m128bh x1, x2; -volatile int res; - -void extern -avx10_2_vcom_test (void) -{ - res = _mm_comeq_sbh (x1, x2); - res = _mm_comlt_sbh (x1, x2); - res = _mm_comle_sbh (x1, x2); - res = _mm_comgt_sbh (x1, x2); - res = _mm_comge_sbh (x1, x2); - res = _mm_comneq_sbh (x1, x2); -} diff --git a/gcc/testsuite/gcc.target/i386/pr117495.c b/gcc/testsuite/gcc.target/i386/pr117495.c index 274b6cef361..90f35610499 100644 --- a/gcc/testsuite/gcc.target/i386/pr117495.c +++ b/gcc/testsuite/gcc.target/i386/pr117495.c @@ -1,7 +1,7 @@ /* PR target/117495 */ /* { dg-do compile } */ /* { dg-options "-march=x86-64-v3 -fno-trapping-math" } */ -/* { dg-final { scan-assembler-times "vcomsbf16" 2 } } */ +/* { dg-final { scan-assembler-times "vcomisbf16" 2 } } */ __attribute__((target("avx10.2"))) int foo (int b, int x) -- 2.31.1