Re: [GCC][PATCH][AArch64] ACLE intrinsics for BFCVTN, BFCVTN2 (AArch64 AdvSIMD) and BFCVT (AArch64 FP)

Delia Burduv Fri, 31 Jan 2020 06:51:43 -0800

Sorry for the confusion, what I meant to say was:

This patch adds the Armv8.6-a ACLE intrinsics for bfcvtn, bfcvtn2 and 
bfcvt as part of the BFloat16 extension.
(https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics)
The intrinsics are declared in arm_bf16.h and arm_neon.h and the RTL 
patterns are defined in aarch64-simd.md.


Tested for regression on aarch64-none-elf and aarch64_be-none-elf. I 
don't have commit rights, so if this is ok can someone please commit it 
for me?

Here is the updated patch.

Thank you,
Delia


gcc/ChangeLog:

2019-11-06  Delia Burduv  <delia.bur...@arm.com>

         * config/aarch64/aarch64-simd-builtins.def
         (bfcvtn): New built-in function.
         (bfcvtn_q): New built-in function.
         (bfcvtn2): New built-in function.
         (bfcvt): New built-in function.
         * config/aarch64/aarch64-simd.md
         (aarch64_bfcvtn<q><mode>): New pattern.
         (aarch64_bfcvtn2v8bf): New pattern.
         (aarch64_bfcvtbf): New pattern.
         * config/aarch64/arm_bf16.h (float32_t): New typedef.
         (vcvth_bf16_f32): New intrinsic.
         * config/aarch64/arm_bf16.h (vcvt_bf16_f32): New intrinsic.
         (vcvtq_low_bf16_f32): New intrinsic.
         (vcvtq_high_bf16_f32): New intrinsic.
         * config/aarch64/iterators.md (V4SF_TO_BF): New mode iterator.
         (UNSPEC_BFCVTN): New UNSPEC.
         (UNSPEC_BFCVTN2): New UNSPEC.
         (UNSPEC_BFCVT): New UNSPEC.
         * config/arm/types.md (bf_cvt): New type.


gcc/testsuite/ChangeLog:

2020-01-31  Delia Burduv  <delia.bur...@arm.com>

         * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c: New
        test.
        * gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c: New
        test.
        * gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c: New
        test.
        * gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: New
        test.

On 12/23/19 6:30 PM, Richard Sandiford wrote:
> Some of the comments on the BFMMLA/BFMLA[LT] patch apply here too.
> 
> Delia Burduv <delia.bur...@arm.com> writes:
>> This patch adds the Armv8.6-a ACLE intrinsics for bfmmla, bfmlalb and
>> bfmlalt as part of the BFloat16 extension.
> 
> That's the other patch :-)
> 
>> [...]
>> diff --git a/gcc/config/aarch64/aarch64-simd.md 
>> b/gcc/config/aarch64/aarch64-simd.md
>> index 
>> 55660ae248f4fa75d35ba2949cd4b9d5139ff5f5..ff7a1f5f34a19b05eba48dba96c736dfdfdf7bac
>>  100644
>> --- a/gcc/config/aarch64/aarch64-simd.md
>> +++ b/gcc/config/aarch64/aarch64-simd.md
>> @@ -7027,3 +7027,32 @@
>>     "xtn\t%0.<Vntype>, %1.<Vtype>"
>>     [(set_attr "type" "neon_shift_imm_narrow_q")]
>>   )
>> +
>> +;; bfcvtn
>> +(define_insn "aarch64_bfcvtn<q><mode>"
>> +  [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
>> +        (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
>> +                            UNSPEC_BFCVTN))]
>> +  "TARGET_BF16_SIMD"
>> +  "bfcvtn\\t%0.4h, %1.4s"
>> +  [(set_attr "type" "f_cvt")]
>> +)
>> +
> 
> If I've understood the naming convention correctly, the closest type
> seems to be "neon_fp_cvt_narrow_s_q".
> 
>> +(define_insn "aarch64_bfcvtn2v8bf"
>> +  [(set (match_operand:V8BF 0 "register_operand" "=w")
>> +        (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "w")
>> +                      (match_operand:V4SF 2 "register_operand" "w")]
>> +                      UNSPEC_BFCVTN2))]
>> +  "TARGET_BF16_SIMD"
>> +  "bfcvtn2\\t%0.8h, %2.4s"
>> +  [(set_attr "type" "f_cvt")]
>> +)
> 
> Same here.
> 
> The constraint on operand 1 needs to be "0", otherwise operands 1 and 0
> could end up in different registers.  You could test for this using
> something like:
> 
> bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive,
>                                  float32x4_t a)
> {
>    return vcvtq_high_bf16_f32 (inactive, a);
> }
> 
> which when compiled at -O should produce something like:
> 
> /*
> **test_bfcvtnq2_untied:
> **    mov     v0\.8h, v1\.8h
> **    bfcvtn2 v0\.8h, v2\.4s
> **    ret
> */
> 
> (Completely untested, the code above is probably wrong.)
> 
>> +
>> +(define_insn "aarch64_bfcvtbf"
>> +  [(set (match_operand:BF 0 "register_operand" "=w")
>> +        (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
>> +                    UNSPEC_BFCVT))]
>> +  "TARGET_BF16_SIMD"
> 
> I think this just needs the scalar macro rather than *_SIMD.
> 
>> +  "bfcvt\\t%h0, %s1"
>> +  [(set_attr "type" "f_cvt")]
>> +)
>> diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
>> index 
>> aedb0972735ce549fac1870bacd1ef3101e8fd26..1b9ab3690d35e153cd4f24b9e3bbb5b4cc4b4f4d
>>  100644
>> --- a/gcc/config/aarch64/arm_bf16.h
>> +++ b/gcc/config/aarch64/arm_bf16.h
>> @@ -34,7 +34,15 @@
>>   #ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC
>>   
>>   typedef __bf16 bfloat16_t;
>> -
>> +typedef float float32_t;
>> +
>> +__extension__ extern __inline bfloat16_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vcvth_bf16_f32 \
>> +      (float32_t __a)
> 
> No need for the line break here.
> 
>> +{
>> +  return __builtin_aarch64_bfcvtbf (__a);
>> +}
>>   
>>   #endif
>>   #pragma GCC pop_options
>> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
>> index 
>> 6cdbf381f0156ed993f03b847228b36ebbdd14f8..120f4b7d8827aee51834e75aeaa6ab8f8451980e
>>  100644
>> --- a/gcc/config/aarch64/arm_neon.h
>> +++ b/gcc/config/aarch64/arm_neon.h
>> @@ -34610,6 +34610,35 @@ vrnd64xq_f64 (float64x2_t __a)
>>   
>>   #include "arm_bf16.h"
>>   
>> +#pragma GCC push_options
>> +#pragma GCC target ("arch=armv8.2-a+bf16")
>> +#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC
>> +
>> +__extension__ extern __inline bfloat16x4_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vcvt_bf16_f32 (float32x4_t __a)
>> +{
>> +  return __builtin_aarch64_bfcvtnv4bf (__a);
>> +
>> +}
> 
> Nit: extra blank line.
> 
>> +
>> +__extension__ extern __inline bfloat16x8_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vcvtq_low_bf16_f32 (float32x4_t __a)
>> +{
>> +  return __builtin_aarch64_bfcvtn_qv8bf (__a);
>> +}
>> +
>> +__extension__ extern __inline bfloat16x8_t
>> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>> +vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
>> +{
>> +  return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
>> +}
>> +
>> +#endif
>> +#pragma GCC pop_options
>> +
>>   #pragma GCC pop_options
>>   
>>   #undef __aarch64_vget_lane_any
>> diff --git a/gcc/config/aarch64/iterators.md 
>> b/gcc/config/aarch64/iterators.md
>> index 
>> 931166da5e47302afe810498eea9c8c2ab89b9de..f9f0bafb1eca4da42e564224fca1fd43d89f6ed1
>>  100644
>> --- a/gcc/config/aarch64/iterators.md
>> +++ b/gcc/config/aarch64/iterators.md
>> @@ -431,6 +431,9 @@
>>   ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements.
>>   (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI])
>>   
>> +;; Bfloat16 modes to which V4SF can be converted
>> +(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
>> +
>>   ;; ------------------------------------------------------------------
>>   ;; Unspec enumerations for Advance SIMD. These could well go into
>>   ;; aarch64.md but for their use in int_iterators here.
>> @@ -673,6 +676,9 @@
>>       UNSPEC_UMULHS  ; Used in aarch64-sve2.md.
>>       UNSPEC_UMULHRS ; Used in aarch64-sve2.md.
>>       UNSPEC_ASRD            ; Used in aarch64-sve.md.
>> +    UNSPEC_BFCVTN   ; Used in aarch64-simd.md.
>> +    UNSPEC_BFCVTN2  ; Used in aarch64-simd.md.
>> +    UNSPEC_BFCVT    ; Used in aarch64-simd.md.
>>   ])
>>   
>>   ;; ------------------------------------------------------------------
>> diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
>> index 
>> df39522f2ad63a52c910b1a6bcc7aa13aaf5d021..dbcb4d58798d7f51b1b8310cd446c58317d7b50d
>>  100644
>> --- a/gcc/config/arm/types.md
>> +++ b/gcc/config/arm/types.md
>> @@ -1097,7 +1097,8 @@
>>     crypto_sm4,\
>>     coproc,\
>>     tme,\
>> -  memtag"
>> +  memtag,\
>> +  bf_cvt"
> 
> This doesn't seem to be used.
> 
>> diff --git 
>> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c 
>> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
>> new file mode 100644
>> index 
>> 0000000000000000000000000000000000000000..ebe5b578c1fa82a6f2a166d55c7dc7e905b87135
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
>> @@ -0,0 +1,56 @@
>> +/* { dg-do assemble { target { aarch64*-*-* } } } */
>> +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
>> +/* { dg-add-options arm_v8_2a_bf16_neon } */
>> +/* { dg-additional-options "-save-temps" } */
>> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
>> +
>> +#include <arm_neon.h>
>> +
>> +/*
>> +**test_bfcvtn:
>> +**  ...
>> +**  bfcvtn\tv[0-9]+.4h, v[0-9]+.4s
>> +**  ...
>> +*/
>> +bfloat16x4_t test_bfcvtn (float32x4_t a)
>> +{
>> +  return vcvt_bf16_f32 (a);
>> +}
>> +
>> +/*
>> +**test_bfcvtnq:
>> +**  ...
>> +**  bfcvtn  v[0-9]+.4h, v[0-9]+.4s
>> +**  ...
>> +*/
>> +bfloat16x8_t test_bfcvtnq (float32x4_t a)
>> +{
>> +  return vcvtq_low_bf16_f32 (a);
>> +}
>> +
>> +/*
>> +**test_bfcvtnq2:
>> +**  ...
>> +**  bfcvtn  v[0-9]+.4h, v[0-9]+.4s
>> +**  ...
>> +*/
>> +bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a)
>> +{
>> +  return vcvtq_high_bf16_f32 (inactive, a);
>> +}
>> +
>> +/*
>> +**test_bfcvt:
>> +**  ...
>> +**  bfcvt   h[0-9]+, s[0-9]+
>> +**  ...
>> +*/
>> +bfloat16_t test_bfcvt (float32_t a)
>> +{
>> +  return vcvth_bf16_f32 (a);
>> +}
>> +
>> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
>> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
>> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */
>> +/* { dg-final { scan-assembler {bfcvt\th[0-9]+, s[0-9]+} } } */
> 
> Same comments as for the BFMMLA/BFMLA[BT] tests.
> 
> As well as testing all these combinations for the SIMD case,
> it would be good to have a direct arm_bf16.h-only test for:
> 
> #pragma GCC target "arch=armv8.2-a+bf16+nosimd"
> 
> test_bfcvt should still work in that case.
> 
> It would also be good to have a test that test_bfcvt reports
> an appropriate error if compiled after:
> 
> #pragma GCC target "arch=armv8.2-a+nobf16"
> 
> Thanks,
> Richard
>

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index a118f4f121de067c0a80f691b852247b0ab27f7a..c1e364b4d1cb7a207c1de5a409a08e18a405a107 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -692,3 +692,9 @@
   VAR2 (TERNOP, bfdot, 0, v2sf, v4sf)
   VAR2 (QUADOP_LANE_PAIR, bfdot_lane, 0, v2sf, v4sf)
   VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, v2sf, v4sf)
+
+  /* Implemented by aarch64_bfcvtn{q}{2}<mode>  */
+  VAR1 (UNOP, bfcvtn, 0, v4bf)
+  VAR1 (UNOP, bfcvtn_q, 0, v8bf)
+  VAR1 (BINOP, bfcvtn2, 0, v8bf)
+  VAR1 (UNOP, bfcvt, 0, bf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 97f46f96968a6bc2f93bbc812931537b819b3b19..111e48ea6b70548158ba696d997a2f2fc3cb2769 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7091,3 +7091,32 @@
 }
   [(set_attr "type" "neon_dot<VDQSF:q>")]
 )
+
+;; bfcvtn
+(define_insn "aarch64_bfcvtn<q><mode>"
+  [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
+        (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
+                            UNSPEC_BFCVTN))]
+  "TARGET_BF16_SIMD"
+  "bfcvtn\\t%0.4h, %1.4s"
+  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
+)
+
+(define_insn "aarch64_bfcvtn2v8bf"
+  [(set (match_operand:V8BF 0 "register_operand" "=w")
+        (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
+                      (match_operand:V4SF 2 "register_operand" "w")]
+                      UNSPEC_BFCVTN2))]
+  "TARGET_BF16_SIMD"
+  "bfcvtn2\\t%0.8h, %2.4s"
+  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
+)
+
+(define_insn "aarch64_bfcvtbf"
+  [(set (match_operand:BF 0 "register_operand" "=w")
+        (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
+                    UNSPEC_BFCVT))]
+  "TARGET_BF16_FP"
+  "bfcvt\\t%h0, %s1"
+  [(set_attr "type" "f_cvt")]
+)
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
index 3759c0d1cb449a7f0125cc2a1433127564d66622..fb2150e1d60a590046e2c034422021aafc721e23 100644
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -28,5 +28,13 @@
 #define _AARCH64_BF16_H_
 
 typedef __bf16 bfloat16_t;
+typedef float float32_t;
+
+__extension__ extern __inline bfloat16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvth_bf16_f32 (float32_t __a)
+{
+  return __builtin_aarch64_bfcvtbf (__a);
+}
 
 #endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 7f05c3f9eca844b0e7b824a191223a4906c825b1..36f82743231a7160050695267e75a08e0cd73e03 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -34660,6 +34660,27 @@ vbfdotq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
   return __builtin_aarch64_bfdot_laneqv4sf (__r, __a, __b, __index);
 }
 
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvt_bf16_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_bfcvtnv4bf (__a);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_low_bf16_f32 (float32x4_t __a)
+{
+  return __builtin_aarch64_bfcvtn_qv8bf (__a);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
+{
+  return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
+}
+
 #pragma GCC pop_options
 
 /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index fc973086cb91ae0dc54eeeb0b832d522539d7982..9f25d4805d2d78025885f573176296e71f74f447 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -455,6 +455,9 @@
 ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements.
 (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI])
 
+;; Bfloat16 modes to which V4SF can be converted
+(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
+
 ;; ------------------------------------------------------------------
 ;; Unspec enumerations for Advance SIMD. These could well go into
 ;; aarch64.md but for their use in int_iterators here.
@@ -808,6 +811,9 @@
     UNSPEC_USDOT	; Used in aarch64-simd.md.
     UNSPEC_SUDOT	; Used in aarch64-simd.md.
     UNSPEC_BFDOT	; Used in aarch64-simd.md.
+    UNSPEC_BFCVTN      ; Used in aarch64-simd.md.
+    UNSPEC_BFCVTN2     ; Used in aarch64-simd.md.
+    UNSPEC_BFCVT       ; Used in aarch64-simd.md.
 ])
 
 ;; ------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
new file mode 100644
index 0000000000000000000000000000000000000000..ffb5305e2e5ea1aadae07e82fd8ed6f9f247c1a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
@@ -0,0 +1,48 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_bfcvtn:
+**     bfcvtn\tv0.4h, v0.4s
+**     ret
+*/
+bfloat16x4_t test_bfcvtn (float32x4_t a)
+{
+  return vcvt_bf16_f32 (a);
+}
+
+/*
+**test_bfcvtnq:
+**     bfcvtn\tv0.4h, v0.4s
+**     ret
+*/
+bfloat16x8_t test_bfcvtnq (float32x4_t a)
+{
+  return vcvtq_low_bf16_f32 (a);
+}
+
+/*
+**test_bfcvtnq2:
+**     bfcvtn2\tv0.8h, v1.4s
+**     ret
+*/
+bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a)
+{
+  return vcvtq_high_bf16_f32 (inactive, a);
+}
+
+/*
+**test_bfcvt:
+**     bfcvt\th0, s0
+**     ret
+*/
+bfloat16_t test_bfcvt (float32_t a)
+{
+  return vcvth_bf16_f32 (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c
new file mode 100644
index 0000000000000000000000000000000000000000..b3b6db12358677b2f34169214241318e5d0d764f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-march=armv8.2-a+nobf16" } */
+
+#include <arm_neon.h>
+
+bfloat16x4_t test_bfcvtn (float32x4_t a)
+{
+  /* { dg-error "inlining failed .* 'vcvt_bf16_f32" "" { target *-*-* } 0 } */
+  return vcvt_bf16_f32 (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
new file mode 100644
index 0000000000000000000000000000000000000000..8d7dffe16275de60e884c449afa0fea0b1af6081
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
@@ -0,0 +1,15 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-save-temps -march=armv8.2-a+bf16+nosimd" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_bfcvt:
+**     bfcvt\th0, s0
+**     ret
+*/
+bfloat16_t test_bfcvt (float32_t a)
+{
+  return vcvth_bf16_f32 (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c
new file mode 100644
index 0000000000000000000000000000000000000000..7d2cef68438da83e386fa02753d018242a74a452
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c
@@ -0,0 +1,20 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_bfcvtnq2_untied:
+**     mov\tv0.16b, v1.16b
+**     bfcvtn2\tv0.8h, v2.4s
+**     ret
+*/
+bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive,
+                                  float32x4_t a)
+{
+  return vcvtq_high_bf16_f32 (inactive, a);
+}

Re: [GCC][PATCH][AArch64] ACLE intrinsics for BFCVTN, BFCVTN2 (AArch64 AdvSIMD) and BFCVT (AArch64 FP)

Reply via email to