Hi all,

I'd like to submit the following patch implementing the bfloat16_t
neon related copy intrinsics: vcopy_lane_bf16, vcopyq_lane_bf16,
vcopyq_laneq_bf16, vcopy_laneq_bf16.

Please see refer to:
ACLE <https://developer.arm.com/docs/101028/latest>
ISA  <https://developer.arm.com/docs/ddi0596/latest>

Regtested and bootstrapped.

Regards

  Andrea

>From d1335c0f49df849b87ee522e9507023113051839 Mon Sep 17 00:00:00 2001
From: Andrea Corallo <andrea.cora...@arm.com>
Date: Thu, 8 Oct 2020 12:29:00 +0200
Subject: [PATCH] aarch64: Add vcopy(q)__lane(q)_bf16 intrinsics

gcc/ChangeLog

2020-10-20  Andrea Corallo  <andrea.cora...@arm.com>

        * config/aarch64/arm_neon.h (vcopy_lane_bf16, vcopyq_lane_bf16)
        (vcopyq_laneq_bf16, vcopy_laneq_bf16): New intrinsics.

gcc/testsuite/ChangeLog

2020-10-20  Andrea Corallo  <andrea.cora...@arm.com>

        * gcc.target/aarch64/advsimd-intrinsics/bf16_vect_copy_lane_1.c:
        New test.
        * gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_1.c:
        Likewise.
        * gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_2.c:
        Likewise.
        * gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_1.c:
        Likewise.
        * gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_2.c:
        Likewise.
        * gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_1.c:
        Likewise.
        * gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_2.c:
        Likewise.
        * gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_1.c:
        Likewise.
        * gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_2.c:
        Likewise.
---
 gcc/config/aarch64/arm_neon.h                 | 36 +++++++++++++++++++
 .../bf16_vect_copy_lane_1.c                   | 32 +++++++++++++++++
 .../vcopy_lane_bf16_indices_1.c               | 18 ++++++++++
 .../vcopy_lane_bf16_indices_2.c               | 18 ++++++++++
 .../vcopy_laneq_bf16_indices_1.c              | 17 +++++++++
 .../vcopy_laneq_bf16_indices_2.c              | 17 +++++++++
 .../vcopyq_lane_bf16_indices_1.c              | 17 +++++++++
 .../vcopyq_lane_bf16_indices_2.c              | 17 +++++++++
 .../vcopyq_laneq_bf16_indices_1.c             | 17 +++++++++
 .../vcopyq_laneq_bf16_indices_2.c             | 17 +++++++++
 10 files changed, 206 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vect_copy_lane_1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_2.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_2.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_2.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_1.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_2.c

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 0088ea9896f..9c801661775 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -35155,6 +35155,42 @@ vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, 
float32x4_t __a)
   return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a);
 }
 
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopy_lane_bf16 (bfloat16x4_t __a, const int __lane1,
+                bfloat16x4_t __b, const int __lane2)
+{
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+                                 __a, __lane1);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopyq_lane_bf16 (bfloat16x8_t __a, const int __lane1,
+                 bfloat16x4_t __b, const int __lane2)
+{
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+                                 __a, __lane1);
+}
+
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopy_laneq_bf16 (bfloat16x4_t __a, const int __lane1,
+                 bfloat16x8_t __b, const int __lane2)
+{
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+                                 __a, __lane1);
+}
+
+__extension__ extern __inline bfloat16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcopyq_laneq_bf16 (bfloat16x8_t __a, const int __lane1,
+                  bfloat16x8_t __b, const int __lane2)
+{
+  return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2),
+                                 __a, __lane1);
+}
+
 #pragma GCC pop_options
 
 /* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics.  */
diff --git 
a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vect_copy_lane_1.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vect_copy_lane_1.c
new file mode 100644
index 00000000000..d5aa215c21a
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vect_copy_lane_1.c
@@ -0,0 +1,32 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-additional-options "-march=armv8.2-a+bf16 -O3 --save-temps -std=gnu90" 
} */
+
+#include "arm_neon.h"
+
+bfloat16x4_t __attribute__((noinline,noclone))
+test_vcopy_lane_bf16 (bfloat16x4_t a, bfloat16x4_t b)
+{
+  return vcopy_lane_bf16 (a, 1, b, 2);
+}
+
+bfloat16x8_t __attribute__((noinline,noclone))
+test_vcopyq_lane_bf16 (bfloat16x8_t a, bfloat16x4_t b)
+{
+  return vcopyq_lane_bf16 (a, 1, b, 2);
+}
+
+bfloat16x4_t __attribute__((noinline,noclone))
+test_vcopy_laneq_bf16 (bfloat16x4_t a, bfloat16x8_t b)
+{
+  return vcopy_laneq_bf16 (a, 1, b, 2);
+}
+
+bfloat16x8_t __attribute__((noinline,noclone))
+test_vcopyq_laneq_bf16 (bfloat16x8_t a, bfloat16x8_t b)
+{
+  return vcopyq_laneq_bf16 (a, 1, b, 2);
+}
+
+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[1\\\], v1.h\\\[2\\\]" 2 } 
} */
+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[1\\\], v1.h\\\[0\\\]" 2 } 
} */
diff --git 
a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_1.c
 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_1.c
new file mode 100644
index 00000000000..4b9a3b210a5
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_1.c
@@ -0,0 +1,18 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok { target { arm*-*-* } 
} } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+
+bfloat16x4_t
+test_vcopy_lane_bf16 (bfloat16x4_t a, bfloat16x4_t b)
+{
+  bfloat16x4_t res;
+  res = vcopy_lane_bf16 (a, 0, b, 4);
+  res = vcopy_lane_bf16 (a, 0, b, -1);
+  return res;
+}
+
+/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
diff --git 
a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_2.c
 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_2.c
new file mode 100644
index 00000000000..659f0f210d7
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_lane_bf16_indices_2.c
@@ -0,0 +1,18 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok { target { arm*-*-* } 
} } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+
+bfloat16x4_t
+test_vcopy_lane_bf16 (bfloat16x4_t a, bfloat16x4_t b)
+{
+  bfloat16x4_t res;
+  res = vcopy_lane_bf16 (a, -1, b, 2);
+  res = vcopy_lane_bf16 (a, 4, b, 2);
+  return res;
+}
+
+/* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+/* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
diff --git 
a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_1.c
 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_1.c
new file mode 100644
index 00000000000..33cc289dc6d
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_1.c
@@ -0,0 +1,17 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok { target { arm*-*-* } 
} } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+
+bfloat16x4_t
+test_vcopy_laneq_bf16 (bfloat16x4_t a, bfloat16x8_t b)
+{
+  bfloat16x4_t res;
+  /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+  res = vcopy_laneq_bf16 (a, -1, b, 2);
+  /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+  res = vcopy_laneq_bf16 (a, 4, b, 2);
+  return res;
+}
diff --git 
a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_2.c
 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_2.c
new file mode 100644
index 00000000000..503cd0f2e45
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopy_laneq_bf16_indices_2.c
@@ -0,0 +1,17 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok { target { arm*-*-* } 
} } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+
+bfloat16x4_t
+test_vcopy_laneq_bf16 (bfloat16x4_t a, bfloat16x8_t b)
+{
+  bfloat16x4_t res;
+  /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+  res = vcopy_laneq_bf16 (a, 1, b, -1);
+  /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+  res = vcopy_laneq_bf16 (a, 1, b, 8);
+  return res;
+}
diff --git 
a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_1.c
 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_1.c
new file mode 100644
index 00000000000..a46f54ab0ca
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_1.c
@@ -0,0 +1,17 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok { target { arm*-*-* } 
} } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+
+bfloat16x8_t
+test_vcopyq_lane_bf16 (bfloat16x8_t a, bfloat16x4_t b)
+{
+  bfloat16x8_t res;
+  /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+  res = vcopyq_lane_bf16 (a, -1, b, 2);
+  /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+  res = vcopyq_lane_bf16 (a, 8, b, 2);
+  return res;
+}
diff --git 
a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_2.c
 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_2.c
new file mode 100644
index 00000000000..100e5dd40ef
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_lane_bf16_indices_2.c
@@ -0,0 +1,17 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok { target { arm*-*-* } 
} } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+
+bfloat16x8_t
+test_vcopyq_lane_bf16 (bfloat16x8_t a, bfloat16x4_t b)
+{
+  bfloat16x8_t res;
+  /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */
+  res = vcopyq_lane_bf16 (a, 2, b, -1);
+  /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */
+  res = vcopyq_lane_bf16 (a, 2, b, 4);
+  return res;
+}
diff --git 
a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_1.c
 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_1.c
new file mode 100644
index 00000000000..914d7318f8d
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_1.c
@@ -0,0 +1,17 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok { target { arm*-*-* } 
} } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+
+bfloat16x8_t
+test_vcopyq_laneq_bf16 (bfloat16x8_t a, bfloat16x8_t b)
+{
+  bfloat16x8_t res;
+  /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+  res = vcopyq_laneq_bf16 (a, -1, b, 2);
+  /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+  res = vcopyq_laneq_bf16 (a, 8, b, 2);
+  return res;
+}
diff --git 
a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_2.c
 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_2.c
new file mode 100644
index 00000000000..244e6eb514e
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcopyq_laneq_bf16_indices_2.c
@@ -0,0 +1,17 @@
+#include <arm_neon.h>
+
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok { target { arm*-*-* } 
} } */
+/* { dg-add-options arm_v8_2a_bf16_neon }  */
+
+bfloat16x8_t
+test_vcopyq_laneq_bf16 (bfloat16x8_t a, bfloat16x8_t b)
+{
+  bfloat16x8_t res;
+  /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */
+  res = vcopyq_laneq_bf16 (a, 2, b, -1);
+  /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */
+  res = vcopyq_laneq_bf16 (a, 2, b, 8);
+  return res;
+}
-- 
2.20.1

Reply via email to