Hi all,

forth patch of the series to backport a number of bfloat16 intrinsics from
trunk to gcc-10.

These patch are including the fixes to the tests that we have applied
into master.

Please see refer to:
ACLE <https://developer.arm.com/docs/101028/latest>
ISA  <https://developer.arm.com/docs/ddi0596/latest>

The series has been bootstrapped on arm-linux-gnueabihf and regtested.

Okay for gcc-10?

Thanks

  Andrea

>From c2b787d773ff51485d0fdc594596b0873beb59c5 Mon Sep 17 00:00:00 2001
From: Andrea Corallo <andrea.cora...@arm.com>
Date: Thu, 29 Oct 2020 15:11:37 +0100
Subject: [PATCH 4/6] arm: Add vst1_bf16 + vst1q_bf16 intrinsics

gcc/ChangeLog

2020-10-29  Andrea Corallo  <andrea.cora...@arm.com>

        * config/arm/arm_neon.h (vst1_bf16, vst1q_bf16): Add intrinsics.
        * config/arm/arm_neon_builtins.def : Touch for:
        __builtin_neon_vst1v4bf, __builtin_neon_vst1v8bf.

gcc/testsuite/ChangeLog

2020-10-29  Andrea Corallo  <andrea.cora...@arm.com>

        * gcc.target/arm/simd/vst1_bf16_1.c: New test.
---
 gcc/config/arm/arm_neon.h                     | 14 +++++++++
 gcc/config/arm/arm_neon_builtins.def          |  5 ++--
 .../gcc.target/arm/simd/vst1_bf16_1.c         | 29 +++++++++++++++++++
 3 files changed, 46 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_bf16_1.c

diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index b77175eaa3e..24aad3370f6 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -19509,6 +19509,20 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t 
__a, bfloat16x8_t __b,
   return __builtin_neon_vfmat_laneqv8bf (__r, __a, __b, __index);
 }
 
+__extension__ extern __inline void
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vst1_bf16 (bfloat16_t * __a, bfloat16x4_t __b)
+{
+  __builtin_neon_vst1v4bf (__a, __b);
+}
+
+__extension__ extern __inline void
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vst1q_bf16 (bfloat16_t * __a, bfloat16x8_t __b)
+{
+  __builtin_neon_vst1v8bf (__a, __b);
+}
+
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vst2_bf16 (bfloat16_t * __ptr, bfloat16x4x2_t __val)
diff --git a/gcc/config/arm/arm_neon_builtins.def 
b/gcc/config/arm/arm_neon_builtins.def
index 07eda44cc58..e3ab6281497 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -317,8 +317,9 @@ VAR12 (LOAD1LANE, vld1_lane,
        v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf)
 VAR10 (LOAD1, vld1_dup,
        v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
-VAR12 (STORE1, vst1,
-       v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
+VAR14 (STORE1, vst1,
+        v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di,
+        v4bf, v8bf)
 VAR14 (STORE1LANE, vst1_lane,
        v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, 
v4bf, v8bf)
 VAR13 (LOAD1, vld2,
diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_1.c 
b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_1.c
new file mode 100644
index 00000000000..06fb58ecd79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_1.c
@@ -0,0 +1,29 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps -O2 -mfloat-abi=hard" }  */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+**test_vst1_bf16:
+**     vst1.16 {d0}, \[r0\]
+**     bx      lr
+*/
+void
+test_vst1_bf16 (bfloat16_t *a, bfloat16x4_t b)
+{
+  vst1_bf16 (a, b);
+}
+
+/*
+**test_vst1q_bf16:
+**     vst1.16 {d0-d1}, \[r0\]
+**     bx      lr
+*/
+void
+test_vst1q_bf16 (bfloat16_t *a, bfloat16x8_t b)
+{
+  vst1q_bf16 (a, b);
+}
-- 
2.20.1

Reply via email to