Hi all, forth patch of the series to backport a number of bfloat16 intrinsics from trunk to gcc-10.
These patch are including the fixes to the tests that we have applied into master. Please see refer to: ACLE <https://developer.arm.com/docs/101028/latest> ISA <https://developer.arm.com/docs/ddi0596/latest> The series has been bootstrapped on arm-linux-gnueabihf and regtested. Okay for gcc-10? Thanks Andrea
>From c2b787d773ff51485d0fdc594596b0873beb59c5 Mon Sep 17 00:00:00 2001 From: Andrea Corallo <andrea.cora...@arm.com> Date: Thu, 29 Oct 2020 15:11:37 +0100 Subject: [PATCH 4/6] arm: Add vst1_bf16 + vst1q_bf16 intrinsics gcc/ChangeLog 2020-10-29 Andrea Corallo <andrea.cora...@arm.com> * config/arm/arm_neon.h (vst1_bf16, vst1q_bf16): Add intrinsics. * config/arm/arm_neon_builtins.def : Touch for: __builtin_neon_vst1v4bf, __builtin_neon_vst1v8bf. gcc/testsuite/ChangeLog 2020-10-29 Andrea Corallo <andrea.cora...@arm.com> * gcc.target/arm/simd/vst1_bf16_1.c: New test. --- gcc/config/arm/arm_neon.h | 14 +++++++++ gcc/config/arm/arm_neon_builtins.def | 5 ++-- .../gcc.target/arm/simd/vst1_bf16_1.c | 29 +++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/simd/vst1_bf16_1.c diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index b77175eaa3e..24aad3370f6 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -19509,6 +19509,20 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, return __builtin_neon_vfmat_laneqv8bf (__r, __a, __b, __index); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1_bf16 (bfloat16_t * __a, bfloat16x4_t __b) +{ + __builtin_neon_vst1v4bf (__a, __b); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst1q_bf16 (bfloat16_t * __a, bfloat16x8_t __b) +{ + __builtin_neon_vst1v8bf (__a, __b); +} + __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2_bf16 (bfloat16_t * __ptr, bfloat16x4x2_t __val) diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 07eda44cc58..e3ab6281497 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -317,8 +317,9 @@ VAR12 (LOAD1LANE, vld1_lane, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di, v4bf, v8bf) VAR10 (LOAD1, vld1_dup, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -VAR12 (STORE1, vst1, - v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di) +VAR14 (STORE1, vst1, + v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, + v4bf, v8bf) VAR14 (STORE1LANE, vst1_lane, v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di, v4bf, v8bf) VAR13 (LOAD1, vld2, diff --git a/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_1.c b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_1.c new file mode 100644 index 00000000000..06fb58ecd79 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vst1_bf16_1.c @@ -0,0 +1,29 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-save-temps -O2 -mfloat-abi=hard" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include "arm_neon.h" + +/* +**test_vst1_bf16: +** vst1.16 {d0}, \[r0\] +** bx lr +*/ +void +test_vst1_bf16 (bfloat16_t *a, bfloat16x4_t b) +{ + vst1_bf16 (a, b); +} + +/* +**test_vst1q_bf16: +** vst1.16 {d0-d1}, \[r0\] +** bx lr +*/ +void +test_vst1q_bf16 (bfloat16_t *a, bfloat16x8_t b) +{ + vst1q_bf16 (a, b); +} -- 2.20.1