[PATCH][AArch64] ACLE intrinsics: convert from BFloat16 to Float32

Dennis Zhang via Gcc-patches Thu, 29 Oct 2020 05:20:03 -0700

Hi all,

This patch enables intrinsics to convert BFloat16 scalar and vector operands to 
Float32 modes.
The intrinsics are implemented by shifting each BFloat16 item 16 bits to left 
using shl/shll/shll2 instructions.


Intrinsics are documented at 
https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics
ISA is documented at https://developer.arm.com/docs/ddi0596/latest

Regtested and bootstrapped.

Is it OK for trunk please?

Thanks
Dennis

gcc/ChangeLog:

2020-10-29  Dennis Zhang  <[email protected]>

        * config/aarch64/aarch64-simd-builtins.def(vbfcvt): New entry.
        (vbfcvt_high, bfcvt): Likewise.
        * config/aarch64/aarch64-simd.md(aarch64_vbfcvt<mode>): New entry.
        (aarch64_vbfcvt_highv8bf, aarch64_bfcvtsf): Likewise.
        * config/aarch64/arm_bf16.h (vcvtah_f32_bf16): New intrinsic.
        * config/aarch64/arm_neon.h (vcvt_f32_bf16): Likewise.
        (vcvtq_low_f32_bf16, vcvtq_high_f32_bf16): Likewise.

gcc/testsuite/ChangeLog

2020-10-29  Dennis Zhang  <[email protected]>

        * gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
        (test_vcvt_f32_bf16, test_vcvtq_low_f32_bf16): New tests.
        (test_vcvtq_high_f32_bf16, test_vcvth_f32_bf16): Likewise.

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 5bc596dbffc..b68c3ca7f4b 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -732,3 +732,8 @@
   VAR1 (UNOP, bfcvtn_q, 0, ALL, v8bf)
   VAR1 (BINOP, bfcvtn2, 0, ALL, v8bf)
   VAR1 (UNOP, bfcvt, 0, ALL, bf)
+
+  /* Implemented by aarch64_{v}bfcvt{_high}<mode>.  */
+  VAR2 (UNOP, vbfcvt, 0, ALL, v4bf, v8bf)
+  VAR1 (UNOP, vbfcvt_high, 0, ALL, v8bf)
+  VAR1 (UNOP, bfcvt, 0, ALL, sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 381a702eba0..5ae79d67981 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7238,3 +7238,31 @@
   "bfcvt\\t%h0, %s1"
   [(set_attr "type" "f_cvt")]
 )
+
+;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
+(define_insn "aarch64_vbfcvt<mode>"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
+		      UNSPEC_BFCVTN))]
+  "TARGET_BF16_SIMD"
+  "shll\\t%0.4s, %1.4h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_vbfcvt_highv8bf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
+		      UNSPEC_BFCVTN2))]
+  "TARGET_BF16_SIMD"
+  "shll2\\t%0.4s, %1.8h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_bfcvtsf"
+  [(set (match_operand:SF 0 "register_operand" "=w")
+	(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
+		    UNSPEC_BFCVT))]
+  "TARGET_BF16_FP"
+  "shl\\t%d0, %d1, #16"
+  [(set_attr "type" "neon_shift_reg")]
+)
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
index 984875dcc01..881615498d3 100644
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -40,6 +40,13 @@ vcvth_bf16_f32 (float32_t __a)
   return __builtin_aarch64_bfcvtbf (__a);
 }
 
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtah_f32_bf16 (bfloat16_t __a)
+{
+  return __builtin_aarch64_bfcvtsf (__a);
+}
+
 #pragma GCC pop_options
 
 #endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 85c0d62ca12..9c0386ed7b1 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -35716,6 +35716,27 @@ vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a)
   return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
 }
 
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvt_f32_bf16 (bfloat16x4_t __a)
+{
+  return __builtin_aarch64_vbfcvtv4bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_low_f32_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vbfcvtv8bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_high_f32_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vbfcvt_highv8bf (__a);
+}
+
 #pragma GCC pop_options
 
 /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
index bbea630b182..47af7c494d9 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
@@ -46,3 +46,43 @@ bfloat16_t test_bfcvt (float32_t a)
 {
   return vcvth_bf16_f32 (a);
 }
+
+/*
+**test_vcvt_f32_bf16:
+**     shll	v0.4s, v0.4h, #16
+**     ret
+*/
+float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a)
+{
+  return vcvt_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_low_f32_bf16:
+**     shll	v0.4s, v0.4h, #16
+**     ret
+*/
+float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a)
+{
+  return vcvtq_low_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_high_f32_bf16:
+**     shll2	v0.4s, v0.8h, #16
+**     ret
+*/
+float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a)
+{
+  return vcvtq_high_f32_bf16 (a);
+}
+
+/*
+**test_vcvtah_f32_bf16:
+**     shl	d0, d0, #16
+**     ret
+*/
+float32_t test_vcvtah_f32_bf16 (bfloat16_t a)
+{
+  return vcvtah_f32_bf16 (a);
+}

[PATCH][AArch64] ACLE intrinsics: convert from BFloat16 to Float32

Reply via email to