Hi all,
This patch enables intrinsics to convert BFloat16 scalar and vector operands to
Float32 modes.
The intrinsics are implemented by shifting each BFloat16 item 16 bits to left
using shl/shll/shll2 instructions.
Intrinsics are documented at
https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics
ISA is documented at https://developer.arm.com/docs/ddi0596/latest
Regtested and bootstrapped.
Is it OK for trunk please?
Thanks
Dennis
gcc/ChangeLog:
2020-10-29 Dennis Zhang <dennis.zh...@arm.com>
* config/aarch64/aarch64-simd-builtins.def(vbfcvt): New entry.
(vbfcvt_high, bfcvt): Likewise.
* config/aarch64/aarch64-simd.md(aarch64_vbfcvt<mode>): New entry.
(aarch64_vbfcvt_highv8bf, aarch64_bfcvtsf): Likewise.
* config/aarch64/arm_bf16.h (vcvtah_f32_bf16): New intrinsic.
* config/aarch64/arm_neon.h (vcvt_f32_bf16): Likewise.
(vcvtq_low_f32_bf16, vcvtq_high_f32_bf16): Likewise.
gcc/testsuite/ChangeLog
2020-10-29 Dennis Zhang <dennis.zh...@arm.com>
* gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
(test_vcvt_f32_bf16, test_vcvtq_low_f32_bf16): New tests.
(test_vcvtq_high_f32_bf16, test_vcvth_f32_bf16): Likewise.
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 5bc596dbffc..b68c3ca7f4b 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -732,3 +732,8 @@
VAR1 (UNOP, bfcvtn_q, 0, ALL, v8bf)
VAR1 (BINOP, bfcvtn2, 0, ALL, v8bf)
VAR1 (UNOP, bfcvt, 0, ALL, bf)
+
+ /* Implemented by aarch64_{v}bfcvt{_high}<mode>. */
+ VAR2 (UNOP, vbfcvt, 0, ALL, v4bf, v8bf)
+ VAR1 (UNOP, vbfcvt_high, 0, ALL, v8bf)
+ VAR1 (UNOP, bfcvt, 0, ALL, sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 381a702eba0..5ae79d67981 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7238,3 +7238,31 @@
"bfcvt\\t%h0, %s1"
[(set_attr "type" "f_cvt")]
)
+
+;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
+(define_insn "aarch64_vbfcvt<mode>"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
+ UNSPEC_BFCVTN))]
+ "TARGET_BF16_SIMD"
+ "shll\\t%0.4s, %1.4h, #16"
+ [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_vbfcvt_highv8bf"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
+ UNSPEC_BFCVTN2))]
+ "TARGET_BF16_SIMD"
+ "shll2\\t%0.4s, %1.8h, #16"
+ [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_bfcvtsf"
+ [(set (match_operand:SF 0 "register_operand" "=w")
+ (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
+ UNSPEC_BFCVT))]
+ "TARGET_BF16_FP"
+ "shl\\t%d0, %d1, #16"
+ [(set_attr "type" "neon_shift_reg")]
+)
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
index 984875dcc01..881615498d3 100644
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -40,6 +40,13 @@ vcvth_bf16_f32 (float32_t __a)
return __builtin_aarch64_bfcvtbf (__a);
}
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtah_f32_bf16 (bfloat16_t __a)
+{
+ return __builtin_aarch64_bfcvtsf (__a);
+}
+
#pragma GCC pop_options
#endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 85c0d62ca12..9c0386ed7b1 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -35716,6 +35716,27 @@ vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
}
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvt_f32_bf16 (bfloat16x4_t __a)
+{
+ return __builtin_aarch64_vbfcvtv4bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_low_f32_bf16 (bfloat16x8_t __a)
+{
+ return __builtin_aarch64_vbfcvtv8bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_high_f32_bf16 (bfloat16x8_t __a)
+{
+ return __builtin_aarch64_vbfcvt_highv8bf (__a);
+}
+
#pragma GCC pop_options
/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
index bbea630b182..47af7c494d9 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
@@ -46,3 +46,43 @@ bfloat16_t test_bfcvt (float32_t a)
{
return vcvth_bf16_f32 (a);
}
+
+/*
+**test_vcvt_f32_bf16:
+** shll v0.4s, v0.4h, #16
+** ret
+*/
+float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a)
+{
+ return vcvt_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_low_f32_bf16:
+** shll v0.4s, v0.4h, #16
+** ret
+*/
+float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a)
+{
+ return vcvtq_low_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_high_f32_bf16:
+** shll2 v0.4s, v0.8h, #16
+** ret
+*/
+float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a)
+{
+ return vcvtq_high_f32_bf16 (a);
+}
+
+/*
+**test_vcvtah_f32_bf16:
+** shl d0, d0, #16
+** ret
+*/
+float32_t test_vcvtah_f32_bf16 (bfloat16_t a)
+{
+ return vcvtah_f32_bf16 (a);
+}