Hi,
Here is the latest version of the patch.
On 2/18/20 1:51 PM, Richard Sandiford wrote:
Tamar Christina <tamar.christ...@arm.com> writes:
Hi Richard,
0000000000000000000000000000000000000000..ffb5305e2e5ea1aadae07e82f
d8e
d6f9f247c1a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compil
+++ e.c
@@ -0,0 +1,48 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
The { target ... } isn't necessary here. (Missed that in the other review,
sorry.)
Why not? The advsimd-intrinsics tests are shared between both AArch32 and
AArch64.
Ah, so they are. Think it would better to move them to a new
gcc.target/arm-common or something in that case. Tests in
gcc.target/aarch64 really ought to be specific to aarch64.
Thanks,
Richard
I left the advsimd-intrinsics tests shared since creating a new
gcc.target/arm-common should probably be a separate patch.
Let me know if this patch is ok. And if it is, can someone please commit
it for me?
Thanks,
Delia
Tamar.
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_bfcvtn:
+** bfcvtn\tv0.4h, v0.4s
Like with the other review, I think the literal tab you had in the original
patch
looks better than \t.
[...]
diff --git
a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
new file mode 100644
index
0000000000000000000000000000000000000000..8d7dffe16275de60e884c449af
a0
fea0b1af6081
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd
+++ .c
@@ -0,0 +1,15 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
This needs:
/* { dg-require-effective-target aarch64_asm_bf16_ok } */
(Doesn't exist yet, but I hope to post a patch soon.)
Looks good otherwise, thanks.
Richard
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index a118f4f121de067c0a80f691b852247b0ab27f7a..c1e364b4d1cb7a207c1de5a409a08e18a405a107 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -692,3 +692,9 @@
VAR2 (TERNOP, bfdot, 0, v2sf, v4sf)
VAR2 (QUADOP_LANE_PAIR, bfdot_lane, 0, v2sf, v4sf)
VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, v2sf, v4sf)
+
+ /* Implemented by aarch64_bfcvtn{q}{2}<mode> */
+ VAR1 (UNOP, bfcvtn, 0, v4bf)
+ VAR1 (UNOP, bfcvtn_q, 0, v8bf)
+ VAR1 (BINOP, bfcvtn2, 0, v8bf)
+ VAR1 (UNOP, bfcvt, 0, bf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 97f46f96968a6bc2f93bbc812931537b819b3b19..111e48ea6b70548158ba696d997a2f2fc3cb2769 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7091,3 +7091,32 @@
}
[(set_attr "type" "neon_dot<VDQSF:q>")]
)
+
+;; bfcvtn
+(define_insn "aarch64_bfcvtn<q><mode>"
+ [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
+ (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
+ UNSPEC_BFCVTN))]
+ "TARGET_BF16_SIMD"
+ "bfcvtn\\t%0.4h, %1.4s"
+ [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
+)
+
+(define_insn "aarch64_bfcvtn2v8bf"
+ [(set (match_operand:V8BF 0 "register_operand" "=w")
+ (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "register_operand" "w")]
+ UNSPEC_BFCVTN2))]
+ "TARGET_BF16_SIMD"
+ "bfcvtn2\\t%0.8h, %2.4s"
+ [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
+)
+
+(define_insn "aarch64_bfcvtbf"
+ [(set (match_operand:BF 0 "register_operand" "=w")
+ (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
+ UNSPEC_BFCVT))]
+ "TARGET_BF16_FP"
+ "bfcvt\\t%h0, %s1"
+ [(set_attr "type" "f_cvt")]
+)
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
index 3759c0d1cb449a7f0125cc2a1433127564d66622..fa7080c2953bc3254f01d842a8afef917d469080 100644
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -27,6 +27,19 @@
#ifndef _AARCH64_BF16_H_
#define _AARCH64_BF16_H_
+#pragma GCC push_options
+#pragma GCC target ("+nothing+bf16")
+
typedef __bf16 bfloat16_t;
+typedef float float32_t;
+
+__extension__ extern __inline bfloat16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvth_bf16_f32 (float32_t __a)
+{
+ return __builtin_aarch64_bfcvtbf (__a);
+}
+
+#pragma GCC pop_options
#endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 7f05c3f9eca844b0e7b824a191223a4906c825b1..36f82743231a7160050695267e75a08e0cd73e03 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -34660,6 +34660,27 @@ vbfdotq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
return __builtin_aarch64_bfdot_laneqv4sf (__r, __a, __b, __index);
}
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvt_bf16_f32 (float32x4_t __a)
+{
+ return __builtin_aarch64_bfcvtnv4bf (__a);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_low_bf16_f32 (float32x4_t __a)
+{
+ return __builtin_aarch64_bfcvtn_qv8bf (__a);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
+{
+ return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
+}
+
#pragma GCC pop_options
/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index fc973086cb91ae0dc54eeeb0b832d522539d7982..9f25d4805d2d78025885f573176296e71f74f447 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -455,6 +455,9 @@
;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements.
(define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI])
+;; Bfloat16 modes to which V4SF can be converted
+(define_mode_iterator V4SF_TO_BF [V4BF V8BF])
+
;; ------------------------------------------------------------------
;; Unspec enumerations for Advance SIMD. These could well go into
;; aarch64.md but for their use in int_iterators here.
@@ -808,6 +811,9 @@
UNSPEC_USDOT ; Used in aarch64-simd.md.
UNSPEC_SUDOT ; Used in aarch64-simd.md.
UNSPEC_BFDOT ; Used in aarch64-simd.md.
+ UNSPEC_BFCVTN ; Used in aarch64-simd.md.
+ UNSPEC_BFCVTN2 ; Used in aarch64-simd.md.
+ UNSPEC_BFCVT ; Used in aarch64-simd.md.
])
;; ------------------------------------------------------------------
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
new file mode 100644
index 0000000000000000000000000000000000000000..bbea630b1820d578bdf1619834f29b919f5c3f32
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
@@ -0,0 +1,48 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_bfcvtn:
+** bfcvtn v0.4h, v0.4s
+** ret
+*/
+bfloat16x4_t test_bfcvtn (float32x4_t a)
+{
+ return vcvt_bf16_f32 (a);
+}
+
+/*
+**test_bfcvtnq:
+** bfcvtn v0.4h, v0.4s
+** ret
+*/
+bfloat16x8_t test_bfcvtnq (float32x4_t a)
+{
+ return vcvtq_low_bf16_f32 (a);
+}
+
+/*
+**test_bfcvtnq2:
+** bfcvtn2 v0.8h, v1.4s
+** ret
+*/
+bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a)
+{
+ return vcvtq_high_bf16_f32 (inactive, a);
+}
+
+/*
+**test_bfcvt:
+** bfcvt h0, s0
+** ret
+*/
+bfloat16_t test_bfcvt (float32_t a)
+{
+ return vcvth_bf16_f32 (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c
new file mode 100644
index 0000000000000000000000000000000000000000..9904d65f9148debc1a1e147d75e5c54fb0f9da71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-march=armv8.2-a+nobf16" } */
+
+#include <arm_neon.h>
+
+bfloat16_t test_bfcvt (float32_t a)
+{
+ /* { dg-error "inlining failed .* 'vcvth_bf16_f32" "" { target *-*-* } 0 } */
+ return vcvth_bf16_f32 (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
new file mode 100644
index 0000000000000000000000000000000000000000..1da230b988c33832a791f0cc23553f3e6889099e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c
@@ -0,0 +1,16 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target aarch64_asm_bf16_ok } */
+/* { dg-additional-options "-save-temps -march=armv8.2-a+bf16+nosimd" } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_bfcvt:
+** bfcvt h0, s0
+** ret
+*/
+bfloat16_t test_bfcvt (float32_t a)
+{
+ return vcvth_bf16_f32 (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c
new file mode 100644
index 0000000000000000000000000000000000000000..b3b6db12358677b2f34169214241318e5d0d764f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c
@@ -0,0 +1,10 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-additional-options "-march=armv8.2-a+nobf16" } */
+
+#include <arm_neon.h>
+
+bfloat16x4_t test_bfcvtn (float32x4_t a)
+{
+ /* { dg-error "inlining failed .* 'vcvt_bf16_f32" "" { target *-*-* } 0 } */
+ return vcvt_bf16_f32 (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c
new file mode 100644
index 0000000000000000000000000000000000000000..4b730e39d4ec2fb680235a61cc0a6d16a8ac4cad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c
@@ -0,0 +1,20 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include <arm_neon.h>
+
+/*
+**test_bfcvtnq2_untied:
+** mov v0.16b, v1.16b
+** bfcvtn2 v0.8h, v2.4s
+** ret
+*/
+bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive,
+ float32x4_t a)
+{
+ return vcvtq_high_bf16_f32 (inactive, a);
+}