Sorry for the confusion, what I meant to say was: This patch adds the Armv8.6-a ACLE intrinsics for bfcvtn, bfcvtn2 and bfcvt as part of the BFloat16 extension. (https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics) The intrinsics are declared in arm_bf16.h and arm_neon.h and the RTL patterns are defined in aarch64-simd.md.
Tested for regression on aarch64-none-elf and aarch64_be-none-elf. I don't have commit rights, so if this is ok can someone please commit it for me? Here is the updated patch. Thank you, Delia gcc/ChangeLog: 2019-11-06 Delia Burduv <delia.bur...@arm.com> * config/aarch64/aarch64-simd-builtins.def (bfcvtn): New built-in function. (bfcvtn_q): New built-in function. (bfcvtn2): New built-in function. (bfcvt): New built-in function. * config/aarch64/aarch64-simd.md (aarch64_bfcvtn<q><mode>): New pattern. (aarch64_bfcvtn2v8bf): New pattern. (aarch64_bfcvtbf): New pattern. * config/aarch64/arm_bf16.h (float32_t): New typedef. (vcvth_bf16_f32): New intrinsic. * config/aarch64/arm_bf16.h (vcvt_bf16_f32): New intrinsic. (vcvtq_low_bf16_f32): New intrinsic. (vcvtq_high_bf16_f32): New intrinsic. * config/aarch64/iterators.md (V4SF_TO_BF): New mode iterator. (UNSPEC_BFCVTN): New UNSPEC. (UNSPEC_BFCVTN2): New UNSPEC. (UNSPEC_BFCVT): New UNSPEC. * config/arm/types.md (bf_cvt): New type. gcc/testsuite/ChangeLog: 2020-01-31 Delia Burduv <delia.bur...@arm.com> * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c: New test. * gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c: New test. * gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c: New test. * gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c: New test. On 12/23/19 6:30 PM, Richard Sandiford wrote: > Some of the comments on the BFMMLA/BFMLA[LT] patch apply here too. > > Delia Burduv <delia.bur...@arm.com> writes: >> This patch adds the Armv8.6-a ACLE intrinsics for bfmmla, bfmlalb and >> bfmlalt as part of the BFloat16 extension. > > That's the other patch :-) > >> [...] >> diff --git a/gcc/config/aarch64/aarch64-simd.md >> b/gcc/config/aarch64/aarch64-simd.md >> index >> 55660ae248f4fa75d35ba2949cd4b9d5139ff5f5..ff7a1f5f34a19b05eba48dba96c736dfdfdf7bac >> 100644 >> --- a/gcc/config/aarch64/aarch64-simd.md >> +++ b/gcc/config/aarch64/aarch64-simd.md >> @@ -7027,3 +7027,32 @@ >> "xtn\t%0.<Vntype>, %1.<Vtype>" >> [(set_attr "type" "neon_shift_imm_narrow_q")] >> ) >> + >> +;; bfcvtn >> +(define_insn "aarch64_bfcvtn<q><mode>" >> + [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w") >> + (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")] >> + UNSPEC_BFCVTN))] >> + "TARGET_BF16_SIMD" >> + "bfcvtn\\t%0.4h, %1.4s" >> + [(set_attr "type" "f_cvt")] >> +) >> + > > If I've understood the naming convention correctly, the closest type > seems to be "neon_fp_cvt_narrow_s_q". > >> +(define_insn "aarch64_bfcvtn2v8bf" >> + [(set (match_operand:V8BF 0 "register_operand" "=w") >> + (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "w") >> + (match_operand:V4SF 2 "register_operand" "w")] >> + UNSPEC_BFCVTN2))] >> + "TARGET_BF16_SIMD" >> + "bfcvtn2\\t%0.8h, %2.4s" >> + [(set_attr "type" "f_cvt")] >> +) > > Same here. > > The constraint on operand 1 needs to be "0", otherwise operands 1 and 0 > could end up in different registers. You could test for this using > something like: > > bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive, > float32x4_t a) > { > return vcvtq_high_bf16_f32 (inactive, a); > } > > which when compiled at -O should produce something like: > > /* > **test_bfcvtnq2_untied: > ** mov v0\.8h, v1\.8h > ** bfcvtn2 v0\.8h, v2\.4s > ** ret > */ > > (Completely untested, the code above is probably wrong.) > >> + >> +(define_insn "aarch64_bfcvtbf" >> + [(set (match_operand:BF 0 "register_operand" "=w") >> + (unspec:BF [(match_operand:SF 1 "register_operand" "w")] >> + UNSPEC_BFCVT))] >> + "TARGET_BF16_SIMD" > > I think this just needs the scalar macro rather than *_SIMD. > >> + "bfcvt\\t%h0, %s1" >> + [(set_attr "type" "f_cvt")] >> +) >> diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h >> index >> aedb0972735ce549fac1870bacd1ef3101e8fd26..1b9ab3690d35e153cd4f24b9e3bbb5b4cc4b4f4d >> 100644 >> --- a/gcc/config/aarch64/arm_bf16.h >> +++ b/gcc/config/aarch64/arm_bf16.h >> @@ -34,7 +34,15 @@ >> #ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC >> >> typedef __bf16 bfloat16_t; >> - >> +typedef float float32_t; >> + >> +__extension__ extern __inline bfloat16_t >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >> +vcvth_bf16_f32 \ >> + (float32_t __a) > > No need for the line break here. > >> +{ >> + return __builtin_aarch64_bfcvtbf (__a); >> +} >> >> #endif >> #pragma GCC pop_options >> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h >> index >> 6cdbf381f0156ed993f03b847228b36ebbdd14f8..120f4b7d8827aee51834e75aeaa6ab8f8451980e >> 100644 >> --- a/gcc/config/aarch64/arm_neon.h >> +++ b/gcc/config/aarch64/arm_neon.h >> @@ -34610,6 +34610,35 @@ vrnd64xq_f64 (float64x2_t __a) >> >> #include "arm_bf16.h" >> >> +#pragma GCC push_options >> +#pragma GCC target ("arch=armv8.2-a+bf16") >> +#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC >> + >> +__extension__ extern __inline bfloat16x4_t >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >> +vcvt_bf16_f32 (float32x4_t __a) >> +{ >> + return __builtin_aarch64_bfcvtnv4bf (__a); >> + >> +} > > Nit: extra blank line. > >> + >> +__extension__ extern __inline bfloat16x8_t >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >> +vcvtq_low_bf16_f32 (float32x4_t __a) >> +{ >> + return __builtin_aarch64_bfcvtn_qv8bf (__a); >> +} >> + >> +__extension__ extern __inline bfloat16x8_t >> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) >> +vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a) >> +{ >> + return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a); >> +} >> + >> +#endif >> +#pragma GCC pop_options >> + >> #pragma GCC pop_options >> >> #undef __aarch64_vget_lane_any >> diff --git a/gcc/config/aarch64/iterators.md >> b/gcc/config/aarch64/iterators.md >> index >> 931166da5e47302afe810498eea9c8c2ab89b9de..f9f0bafb1eca4da42e564224fca1fd43d89f6ed1 >> 100644 >> --- a/gcc/config/aarch64/iterators.md >> +++ b/gcc/config/aarch64/iterators.md >> @@ -431,6 +431,9 @@ >> ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements. >> (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI]) >> >> +;; Bfloat16 modes to which V4SF can be converted >> +(define_mode_iterator V4SF_TO_BF [V4BF V8BF]) >> + >> ;; ------------------------------------------------------------------ >> ;; Unspec enumerations for Advance SIMD. These could well go into >> ;; aarch64.md but for their use in int_iterators here. >> @@ -673,6 +676,9 @@ >> UNSPEC_UMULHS ; Used in aarch64-sve2.md. >> UNSPEC_UMULHRS ; Used in aarch64-sve2.md. >> UNSPEC_ASRD ; Used in aarch64-sve.md. >> + UNSPEC_BFCVTN ; Used in aarch64-simd.md. >> + UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. >> + UNSPEC_BFCVT ; Used in aarch64-simd.md. >> ]) >> >> ;; ------------------------------------------------------------------ >> diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md >> index >> df39522f2ad63a52c910b1a6bcc7aa13aaf5d021..dbcb4d58798d7f51b1b8310cd446c58317d7b50d >> 100644 >> --- a/gcc/config/arm/types.md >> +++ b/gcc/config/arm/types.md >> @@ -1097,7 +1097,8 @@ >> crypto_sm4,\ >> coproc,\ >> tme,\ >> - memtag" >> + memtag,\ >> + bf_cvt" > > This doesn't seem to be used. > >> diff --git >> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c >> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c >> new file mode 100644 >> index >> 0000000000000000000000000000000000000000..ebe5b578c1fa82a6f2a166d55c7dc7e905b87135 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c >> @@ -0,0 +1,56 @@ >> +/* { dg-do assemble { target { aarch64*-*-* } } } */ >> +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ >> +/* { dg-add-options arm_v8_2a_bf16_neon } */ >> +/* { dg-additional-options "-save-temps" } */ >> +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ >> + >> +#include <arm_neon.h> >> + >> +/* >> +**test_bfcvtn: >> +** ... >> +** bfcvtn\tv[0-9]+.4h, v[0-9]+.4s >> +** ... >> +*/ >> +bfloat16x4_t test_bfcvtn (float32x4_t a) >> +{ >> + return vcvt_bf16_f32 (a); >> +} >> + >> +/* >> +**test_bfcvtnq: >> +** ... >> +** bfcvtn v[0-9]+.4h, v[0-9]+.4s >> +** ... >> +*/ >> +bfloat16x8_t test_bfcvtnq (float32x4_t a) >> +{ >> + return vcvtq_low_bf16_f32 (a); >> +} >> + >> +/* >> +**test_bfcvtnq2: >> +** ... >> +** bfcvtn v[0-9]+.4h, v[0-9]+.4s >> +** ... >> +*/ >> +bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a) >> +{ >> + return vcvtq_high_bf16_f32 (inactive, a); >> +} >> + >> +/* >> +**test_bfcvt: >> +** ... >> +** bfcvt h[0-9]+, s[0-9]+ >> +** ... >> +*/ >> +bfloat16_t test_bfcvt (float32_t a) >> +{ >> + return vcvth_bf16_f32 (a); >> +} >> + >> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ >> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ >> +/* { dg-final { scan-assembler {bfcvtn\tv[0-9]+.4h, v[0-9]+.4s} } } */ >> +/* { dg-final { scan-assembler {bfcvt\th[0-9]+, s[0-9]+} } } */ > > Same comments as for the BFMMLA/BFMLA[BT] tests. > > As well as testing all these combinations for the SIMD case, > it would be good to have a direct arm_bf16.h-only test for: > > #pragma GCC target "arch=armv8.2-a+bf16+nosimd" > > test_bfcvt should still work in that case. > > It would also be good to have a test that test_bfcvt reports > an appropriate error if compiled after: > > #pragma GCC target "arch=armv8.2-a+nobf16" > > Thanks, > Richard >
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index a118f4f121de067c0a80f691b852247b0ab27f7a..c1e364b4d1cb7a207c1de5a409a08e18a405a107 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -692,3 +692,9 @@ VAR2 (TERNOP, bfdot, 0, v2sf, v4sf) VAR2 (QUADOP_LANE_PAIR, bfdot_lane, 0, v2sf, v4sf) VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, v2sf, v4sf) + + /* Implemented by aarch64_bfcvtn{q}{2}<mode> */ + VAR1 (UNOP, bfcvtn, 0, v4bf) + VAR1 (UNOP, bfcvtn_q, 0, v8bf) + VAR1 (BINOP, bfcvtn2, 0, v8bf) + VAR1 (UNOP, bfcvt, 0, bf) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 97f46f96968a6bc2f93bbc812931537b819b3b19..111e48ea6b70548158ba696d997a2f2fc3cb2769 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -7091,3 +7091,32 @@ } [(set_attr "type" "neon_dot<VDQSF:q>")] ) + +;; bfcvtn +(define_insn "aarch64_bfcvtn<q><mode>" + [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w") + (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")] + UNSPEC_BFCVTN))] + "TARGET_BF16_SIMD" + "bfcvtn\\t%0.4h, %1.4s" + [(set_attr "type" "neon_fp_cvt_narrow_s_q")] +) + +(define_insn "aarch64_bfcvtn2v8bf" + [(set (match_operand:V8BF 0 "register_operand" "=w") + (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "w")] + UNSPEC_BFCVTN2))] + "TARGET_BF16_SIMD" + "bfcvtn2\\t%0.8h, %2.4s" + [(set_attr "type" "neon_fp_cvt_narrow_s_q")] +) + +(define_insn "aarch64_bfcvtbf" + [(set (match_operand:BF 0 "register_operand" "=w") + (unspec:BF [(match_operand:SF 1 "register_operand" "w")] + UNSPEC_BFCVT))] + "TARGET_BF16_FP" + "bfcvt\\t%h0, %s1" + [(set_attr "type" "f_cvt")] +) diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h index 3759c0d1cb449a7f0125cc2a1433127564d66622..fb2150e1d60a590046e2c034422021aafc721e23 100644 --- a/gcc/config/aarch64/arm_bf16.h +++ b/gcc/config/aarch64/arm_bf16.h @@ -28,5 +28,13 @@ #define _AARCH64_BF16_H_ typedef __bf16 bfloat16_t; +typedef float float32_t; + +__extension__ extern __inline bfloat16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvth_bf16_f32 (float32_t __a) +{ + return __builtin_aarch64_bfcvtbf (__a); +} #endif diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 7f05c3f9eca844b0e7b824a191223a4906c825b1..36f82743231a7160050695267e75a08e0cd73e03 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -34660,6 +34660,27 @@ vbfdotq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, return __builtin_aarch64_bfdot_laneqv4sf (__r, __a, __b, __index); } +__extension__ extern __inline bfloat16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvt_bf16_f32 (float32x4_t __a) +{ + return __builtin_aarch64_bfcvtnv4bf (__a); +} + +__extension__ extern __inline bfloat16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtq_low_bf16_f32 (float32x4_t __a) +{ + return __builtin_aarch64_bfcvtn_qv8bf (__a); +} + +__extension__ extern __inline bfloat16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a) +{ + return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a); +} + #pragma GCC pop_options /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index fc973086cb91ae0dc54eeeb0b832d522539d7982..9f25d4805d2d78025885f573176296e71f74f447 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -455,6 +455,9 @@ ;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements. (define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI]) +;; Bfloat16 modes to which V4SF can be converted +(define_mode_iterator V4SF_TO_BF [V4BF V8BF]) + ;; ------------------------------------------------------------------ ;; Unspec enumerations for Advance SIMD. These could well go into ;; aarch64.md but for their use in int_iterators here. @@ -808,6 +811,9 @@ UNSPEC_USDOT ; Used in aarch64-simd.md. UNSPEC_SUDOT ; Used in aarch64-simd.md. UNSPEC_BFDOT ; Used in aarch64-simd.md. + UNSPEC_BFCVTN ; Used in aarch64-simd.md. + UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. + UNSPEC_BFCVT ; Used in aarch64-simd.md. ]) ;; ------------------------------------------------------------------ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c new file mode 100644 index 0000000000000000000000000000000000000000..ffb5305e2e5ea1aadae07e82fd8ed6f9f247c1a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c @@ -0,0 +1,48 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ + +#include <arm_neon.h> + +/* +**test_bfcvtn: +** bfcvtn\tv0.4h, v0.4s +** ret +*/ +bfloat16x4_t test_bfcvtn (float32x4_t a) +{ + return vcvt_bf16_f32 (a); +} + +/* +**test_bfcvtnq: +** bfcvtn\tv0.4h, v0.4s +** ret +*/ +bfloat16x8_t test_bfcvtnq (float32x4_t a) +{ + return vcvtq_low_bf16_f32 (a); +} + +/* +**test_bfcvtnq2: +** bfcvtn2\tv0.8h, v1.4s +** ret +*/ +bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a) +{ + return vcvtq_high_bf16_f32 (inactive, a); +} + +/* +**test_bfcvt: +** bfcvt\th0, s0 +** ret +*/ +bfloat16_t test_bfcvt (float32_t a) +{ + return vcvth_bf16_f32 (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c new file mode 100644 index 0000000000000000000000000000000000000000..b3b6db12358677b2f34169214241318e5d0d764f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c @@ -0,0 +1,10 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+nobf16" } */ + +#include <arm_neon.h> + +bfloat16x4_t test_bfcvtn (float32x4_t a) +{ + /* { dg-error "inlining failed .* 'vcvt_bf16_f32" "" { target *-*-* } 0 } */ + return vcvt_bf16_f32 (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c new file mode 100644 index 0000000000000000000000000000000000000000..8d7dffe16275de60e884c449afa0fea0b1af6081 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c @@ -0,0 +1,15 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-save-temps -march=armv8.2-a+bf16+nosimd" } */ +/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ + +#include <arm_neon.h> + +/* +**test_bfcvt: +** bfcvt\th0, s0 +** ret +*/ +bfloat16_t test_bfcvt (float32_t a) +{ + return vcvth_bf16_f32 (a); +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c new file mode 100644 index 0000000000000000000000000000000000000000..7d2cef68438da83e386fa02753d018242a74a452 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c @@ -0,0 +1,20 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps" } */ +/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ +/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ + +#include <arm_neon.h> + +/* +**test_bfcvtnq2_untied: +** mov\tv0.16b, v1.16b +** bfcvtn2\tv0.8h, v2.4s +** ret +*/ +bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive, + float32x4_t a) +{ + return vcvtq_high_bf16_f32 (inactive, a); +}