[PATCH] D114713: [AArch64][SVE][NEON] Add NEON-SVE-Bridge intrinsics

Matt Devereau via Phabricator via cfe-commits Mon, 29 Nov 2021 08:15:57 -0800

MattDevereau created this revision.
MattDevereau added reviewers: bsmith, peterwaller-arm, DavidTruby, 
paulwalker-arm.
Herald added subscribers: psnobl, hiraditya, kristof.beyls, tschuett, mgorny.
Herald added a reviewer: efriedma.
MattDevereau requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.


Adds svset_neonq, svget_neonq, svdup_neonq AArch64 intrinsics.

These are described in the ACLE specification:
https://github.com/ARM-software/acle/pull/72


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D114713

Files:
  clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge.def
  clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def
  clang/include/clang/Basic/BuiltinsSVE.def
  clang/lib/Basic/Targets/AArch64.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/arm_neon_sve_bridge.h
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
  
clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1322,6 +1322,7 @@
       setOperationAction(ISD::MGATHER, VT, Custom);
       setOperationAction(ISD::MSCATTER, VT, Custom);
       setOperationAction(ISD::MLOAD, VT, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
     }
 
     setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
Index: clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_set_neonq.c
@@ -0,0 +1,188 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o /dev/null %s
+#include <arm_neon_sve_bridge.h>
+
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+
+// CHECK-LABEL: @test_svset_neonq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svset_neonq_s8u10__SVInt8_t11__Int8x16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svset_neonq_s8(svint8_t s, int8x16_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_s8,,)(s, n);
+}
+
+// CHECK-LABEL: @test_svset_neonq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svset_neonq_s16u11__SVInt16_t11__Int16x8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svint16_t test_svset_neonq_s16(svint16_t s, int16x8_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_s16,,)(s, n);
+}
+
+// CHECK-LABEL: @test_svset_neonq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svset_neonq_s32u11__SVInt32_t11__Int32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svint32_t test_svset_neonq_s32(svint32_t s, int32x4_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_s32,,)(s, n);
+}
+
+// CHECK-LABEL: @test_svset_neonq_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svset_neonq_s64u11__SVInt64_t11__Int64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t test_svset_neonq_s64(svint64_t s, int64x2_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_s64,,)(s, n);
+}
+
+// CHECK-LABEL: @test_svset_neonq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svset_neonq_u8u11__SVUint8_t12__Uint8x16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> [[S:%.*]], <16 x i8> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svset_neonq_u8(svuint8_t s, uint8x16_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_u8,,)(s, n);
+}
+
+// CHECK-LABEL: @test_svset_neonq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svset_neonq_u16u12__SVUint16_t12__Uint16x8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> [[S:%.*]], <8 x i16> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
+//
+svuint16_t test_svset_neonq_u16(svuint16_t s, uint16x8_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_u16,,)(s, n);
+}
+
+// CHECK-LABEL: @test_svset_neonq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svset_neonq_u32u12__SVUint32_t12__Uint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[S:%.*]], <4 x i32> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
+//
+svuint32_t test_svset_neonq_u32(svuint32_t s, uint32x4_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_u32,,)(s, n);
+}
+
+// CHECK-LABEL: @test_svset_neonq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svset_neonq_u64u12__SVUint64_t12__Uint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> [[S:%.*]], <2 x i64> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svuint64_t test_svset_neonq_u64(svuint64_t s, uint64x2_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_u64,,)(s, n);
+}
+
+// CHECK-LABEL: @test_svset_neonq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> [[S:%.*]], <8 x half> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svset_neonq_f16u13__SVFloat16_t13__Float16x8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> [[S:%.*]], <8 x half> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
+//
+svfloat16_t test_svset_neonq_f16(svfloat16_t s, float16x8_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_f16,,)(s, n);
+}
+
+// CHECK-LABEL: @test_svset_neonq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> [[S:%.*]], <4 x float> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svset_neonq_f32u13__SVFloat32_t13__Float32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> [[S:%.*]], <4 x float> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+svfloat32_t test_svset_neonq_f32(svfloat32_t s, float32x4_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_f32,,)(s, n);
+}
+
+// CHECK-LABEL: @test_svset_neonq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> [[S:%.*]], <2 x double> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svset_neonq_f64u13__SVFloat64_t13__Float64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> [[S:%.*]], <2 x double> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
+//
+svfloat64_t test_svset_neonq_f64(svfloat64_t s, float64x2_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_f64,,)(s, n);
+}
+
+// CHECK-LABEL: @test_svset_neonq_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> [[S:%.*]], <8 x bfloat> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svset_neonq_bf16u14__SVBFloat16_t14__Bfloat16x8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> [[S:%.*]], <8 x bfloat> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t test_svset_neonq_bf16(svbfloat16_t s, bfloat16x8_t n)
+{
+  return SVE_ACLE_FUNC(svset_neonq,_bf16,,)(s, n);
+}
Index: clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_get_neonq.c
@@ -0,0 +1,189 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o /dev/null %s
+#include <arm_neon_sve_bridge.h>
+
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+
+// CHECK-LABEL: @test_svget_neonq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svget_neonq_s8u10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_svget_neonq_s8(svint8_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_s8,,)(n);
+}
+
+//
+// CHECK-LABEL: @test_svget_neonq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svget_neonq_s16u11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_svget_neonq_s16(svint16_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_s16,,)(n);
+}
+
+// CHECK-LABEL: @test_svget_neonq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svget_neonq_s32u11__SVInt32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_svget_neonq_s32(svint32_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_s32,,)(n);
+}
+
+// CHECK-LABEL: @test_svget_neonq_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svget_neonq_s64u11__SVInt64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_svget_neonq_s64(svint64_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_s64,,)(n);
+}
+
+// CHECK-LABEL: @test_svget_neonq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svget_neonq_u8u11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_svget_neonq_u8(svuint8_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_u8,,)(n);
+}
+
+// CHECK-LABEL: @test_svget_neonq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svget_neonq_u16u12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_svget_neonq_u16(svuint16_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_u16,,)(n);
+}
+
+// CHECK-LABEL: @test_svget_neonq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svget_neonq_u32u12__SVUint32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_svget_neonq_u32(svuint32_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_u32,,)(n);
+}
+
+// CHECK-LABEL: @test_svget_neonq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svget_neonq_u64u12__SVUint64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_svget_neonq_u64(svuint64_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_u64,,)(n);
+}
+
+// CHECK-LABEL: @test_svget_neonq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.experimental.vector.extract.v8f16.nxv8f16(<vscale x 8 x half> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svget_neonq_f16u13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.experimental.vector.extract.v8f16.nxv8f16(<vscale x 8 x half> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_svget_neonq_f16(svfloat16_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_f16,,)(n);
+}
+
+// CHECK-LABEL: @test_svget_neonq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.experimental.vector.extract.v4f32.nxv4f32(<vscale x 4 x float> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svget_neonq_f32u13__SVFloat32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.experimental.vector.extract.v4f32.nxv4f32(<vscale x 4 x float> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_svget_neonq_f32(svfloat32_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_f32,,)(n);
+}
+
+// CHECK-LABEL: @test_svget_neonq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <2 x double> @llvm.experimental.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <2 x double> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svget_neonq_f64u13__SVFloat64_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <2 x double> @llvm.experimental.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <2 x double> [[TMP0]]
+//
+float64x2_t test_svget_neonq_f64(svfloat64_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_f64,,)(n);
+}
+
+// CHECK-LABEL: @test_svget_neonq_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x bfloat> @llvm.experimental.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[N:%.*]], i64 0)
+// CHECK-NEXT:    ret <8 x bfloat> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svget_neonq_bf16u14__SVBFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <8 x bfloat> @llvm.experimental.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    ret <8 x bfloat> [[TMP0]]
+//
+bfloat16x8_t test_svget_neonq_bf16(svbfloat16_t n)
+{
+  return SVE_ACLE_FUNC(svget_neonq,_bf16,,)(n);
+}
Index: clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/acle_neon_sve_bridge_dup_neonq.c
@@ -0,0 +1,215 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o /dev/null %s
+#include <arm_neon_sve_bridge.h>
+
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+
+// CHECK-LABEL: @test_svdup_neonq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svdup_neonq_s811__Int8x16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
+//
+svint8_t test_svdup_neonq_s8(int8x16_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_s8,,)(n);
+}
+
+// CHECK-LABEL: @test_svdup_neonq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s1611__Int16x8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svdup_neonq_s16(int16x8_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_s16,,)(n);
+}
+
+// CHECK-LABEL: @test_svdup_neonq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s3211__Int32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+svint32_t test_svdup_neonq_s32(int32x4_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_s32,,)(n);
+}
+
+// CHECK-LABEL: @test_svdup_neonq_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_s6411__Int64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t test_svdup_neonq_s64(int64x2_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_s64,,)(n);
+}
+
+// CHECK-LABEL: @test_svdup_neonq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svdup_neonq_u812__Uint8x16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
+//
+svuint8_t test_svdup_neonq_u8(uint8x16_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_u8,,)(n);
+}
+
+// CHECK-LABEL: @test_svdup_neonq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u1612__Uint16x8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svdup_neonq_u16(uint16x8_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_u16,,)(n);
+}
+
+// CHECK-LABEL: @test_svdup_neonq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u3212__Uint32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+svuint32_t test_svdup_neonq_u32(uint32x4_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_u32,,)(n);
+}
+
+// CHECK-LABEL: @test_svdup_neonq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_u6412__Uint64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+svuint64_t test_svdup_neonq_u64(uint64x2_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_u64,,)(n);
+}
+
+// CHECK-LABEL: @test_svdup_neonq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> undef, <8 x half> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f1613__Float16x8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x half> @llvm.experimental.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> undef, <8 x half> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svdup_neonq_f16(float16x8_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_f16,,)(n);
+}
+
+// CHECK-NEXT  %0 = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %n, i64 0)
+// CHECK-NEXT  %1 = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %0, i64 0)
+// CHECK-NEXT  ret <vscale x 4 x float> %1
+// CHECK-LABEL: @test_svdup_neonq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f3213__Float32x4_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.experimental.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_svdup_neonq_f32(float32x4_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_f32,,)(n);
+}
+
+// CHECK-LABEL: @test_svdup_neonq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> undef, <2 x double> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z20test_svdup_neonq_f6413__Float64x2_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> undef, <2 x double> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_svdup_neonq_f64(float64x2_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_f64,,)(n);
+}
+
+// CHECK-LABEL: @test_svdup_neonq_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> undef, <8 x bfloat> [[N:%.*]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z21test_svdup_neonq_bf1614__Bfloat16x8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> undef, <8 x bfloat> [[N:%.*]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], i64 0)
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svdup_neonq_bf16(bfloat16x8_t n)
+{
+  return SVE_ACLE_FUNC(svdup_neonq,_bf16,,)(n);
+}
Index: clang/lib/Headers/arm_neon_sve_bridge.h
===================================================================
--- /dev/null
+++ clang/lib/Headers/arm_neon_sve_bridge.h
@@ -0,0 +1,103 @@
+/*===---- arm_neon_sve_bridge.h - ARM NEON SVE Bridge intrinsics -----------===
+ *
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __ARM_NEON_SVE_BRIDGE_H
+#define __ARM_NEON_SVE_BRIDGE_H
+
+#include <arm_neon.h>
+#include <arm_sve.h>
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+/* Function attributes */
+#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
+
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s8)))
+svint8_t svset_neonq_s8(svint8_t, int8x16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s16)))
+svint16_t svset_neonq_s16(svint16_t, int16x8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s32)))
+svint32_t svset_neonq_s32(svint32_t, int32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_s64)))
+svint64_t svset_neonq_s64(svint64_t, int64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u8)))
+svuint8_t svset_neonq_u8(svuint8_t, uint8x16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u16)))
+svuint16_t svset_neonq_u16(svuint16_t, uint16x8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u32)))
+svuint32_t svset_neonq_u32(svuint32_t, uint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_u64)))
+svuint64_t svset_neonq_u64(svuint64_t, uint64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f16)))
+svfloat16_t svset_neonq_f16(svfloat16_t, float16x8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f32)))
+svfloat32_t svset_neonq_f32(svfloat32_t, float32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_f64)))
+svfloat64_t svset_neonq_f64(svfloat64_t, float64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s8)))
+int8x16_t svget_neonq_s8(svint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s16)))
+int16x8_t svget_neonq_s16(svint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s32)))
+int32x4_t svget_neonq_s32(svint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_s64)))
+int64x2_t svget_neonq_s64(svint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u8)))
+uint8x16_t svget_neonq_u8(svuint8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u16)))
+uint16x8_t svget_neonq_u16(svuint16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u32)))
+uint32x4_t svget_neonq_u32(svuint32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_u64)))
+uint64x2_t svget_neonq_u64(svuint64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f16)))
+float16x8_t svget_neonq_f16(svfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f32)))
+float32x4_t svget_neonq_f32(svfloat32_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_f64)))
+float64x2_t svget_neonq_f64(svfloat64_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s8)))
+svint8_t svdup_neonq_s8(int8x16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s16)))
+svint16_t svdup_neonq_s16(int16x8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s32)))
+svint32_t svdup_neonq_s32(int32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_s64)))
+svint64_t svdup_neonq_s64(int64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u8)))
+svuint8_t svdup_neonq_u8(uint8x16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u16)))
+svuint16_t svdup_neonq_u16(uint16x8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u32)))
+svuint32_t svdup_neonq_u32(uint32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_u64)))
+svuint64_t svdup_neonq_u64(int64x2_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f16)))
+svfloat16_t svdup_neonq_f16(float16x8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f32)))
+svfloat32_t svdup_neonq_f32(float32x4_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_f64)))
+svfloat64_t svdup_neonq_f64(float64x2_t);
+#if defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_SVE_BF16)
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svset_neonq_bf16)))
+svbfloat16_t svset_neonq_bf16(svbfloat16_t, bfloat16x8_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svget_neonq_bf16)))
+bfloat16x8_t svget_neonq_bf16(svbfloat16_t);
+__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_neonq_bf16)))
+svbfloat16_t svdup_neonq_bf16(bfloat16x8_t);
+#endif  //defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_SVE_BF16)
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif //__ARM_NEON_SVE_BRIDGE_H
Index: clang/lib/Headers/CMakeLists.txt
===================================================================
--- clang/lib/Headers/CMakeLists.txt
+++ clang/lib/Headers/CMakeLists.txt
@@ -219,6 +219,8 @@
   clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h)
   # Generate arm_cde.h
   clang_generate_header(-gen-arm-cde-header arm_cde.td arm_cde.h)
+  # Copy arm_neon_sve_bridge.h
+  copy_header_to_output_dir(${CMAKE_CURRENT_SOURCE_DIR} arm_neon_sve_bridge.h)
 endif()
 if(RISCV IN_LIST LLVM_TARGETS_TO_BUILD)
   # Generate riscv_vector.h
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -6384,6 +6384,7 @@
 static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {
 #define GET_SVE_LLVM_INTRINSIC_MAP
 #include "clang/Basic/arm_sve_builtin_cg.inc"
+#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"
 #undef GET_SVE_LLVM_INTRINSIC_MAP
 };
 
@@ -9307,6 +9308,52 @@
     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);
     return Builder.CreateCall(F, {V0, V1, Ops[1]});
   }
+
+  case SVE::BI__builtin_sve_svset_neonq_s8:
+  case SVE::BI__builtin_sve_svset_neonq_s16:
+  case SVE::BI__builtin_sve_svset_neonq_s32:
+  case SVE::BI__builtin_sve_svset_neonq_s64:
+  case SVE::BI__builtin_sve_svset_neonq_u8:
+  case SVE::BI__builtin_sve_svset_neonq_u16:
+  case SVE::BI__builtin_sve_svset_neonq_u32:
+  case SVE::BI__builtin_sve_svset_neonq_u64:
+  case SVE::BI__builtin_sve_svset_neonq_f16:
+  case SVE::BI__builtin_sve_svset_neonq_f32:
+  case SVE::BI__builtin_sve_svset_neonq_f64:
+  case SVE::BI__builtin_sve_svset_neonq_bf16: {
+    return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));
+  }
+
+  case SVE::BI__builtin_sve_svget_neonq_s8:
+  case SVE::BI__builtin_sve_svget_neonq_s16:
+  case SVE::BI__builtin_sve_svget_neonq_s32:
+  case SVE::BI__builtin_sve_svget_neonq_s64:
+  case SVE::BI__builtin_sve_svget_neonq_u8:
+  case SVE::BI__builtin_sve_svget_neonq_u16:
+  case SVE::BI__builtin_sve_svget_neonq_u32:
+  case SVE::BI__builtin_sve_svget_neonq_u64:
+  case SVE::BI__builtin_sve_svget_neonq_f16:
+  case SVE::BI__builtin_sve_svget_neonq_f32:
+  case SVE::BI__builtin_sve_svget_neonq_f64:
+  case SVE::BI__builtin_sve_svget_neonq_bf16: {
+    return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));
+  }
+
+  case SVE::BI__builtin_sve_svdup_neonq_s8:
+  case SVE::BI__builtin_sve_svdup_neonq_s16:
+  case SVE::BI__builtin_sve_svdup_neonq_s32:
+  case SVE::BI__builtin_sve_svdup_neonq_s64:
+  case SVE::BI__builtin_sve_svdup_neonq_u8:
+  case SVE::BI__builtin_sve_svdup_neonq_u16:
+  case SVE::BI__builtin_sve_svdup_neonq_u32:
+  case SVE::BI__builtin_sve_svdup_neonq_u64:
+  case SVE::BI__builtin_sve_svdup_neonq_f16:
+  case SVE::BI__builtin_sve_svdup_neonq_f32:
+  case SVE::BI__builtin_sve_svdup_neonq_f64:
+  case SVE::BI__builtin_sve_svdup_neonq_bf16:{
+    Value *Insert = Builder.CreateInsertVector(Ty, UndefValue::get(Ty), Ops[0], Builder.getInt64(0));
+    return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, { Ty }, { Insert, Builder.getInt64(0) } );
+  }
   }
 
   /// Should not happen
Index: clang/lib/Basic/Targets/AArch64.cpp
===================================================================
--- clang/lib/Basic/Targets/AArch64.cpp
+++ clang/lib/Basic/Targets/AArch64.cpp
@@ -307,6 +307,9 @@
   if (FPU & SveMode)
     Builder.defineMacro("__ARM_FEATURE_SVE", "1");
 
+  if ((FPU & NeonMode) && (FPU & SveMode))
+    Builder.defineMacro("__ARM_NEON_SVE_BRIDGE", "1");
+
   if (HasSVE2)
     Builder.defineMacro("__ARM_FEATURE_SVE2", "1");
 
Index: clang/include/clang/Basic/BuiltinsSVE.def
===================================================================
--- clang/include/clang/Basic/BuiltinsSVE.def
+++ clang/include/clang/Basic/BuiltinsSVE.def
@@ -15,6 +15,7 @@
 
 #define GET_SVE_BUILTINS
 #include "clang/Basic/arm_sve_builtins.inc"
+#include "clang/Basic/BuiltinsAArch64NeonSVEBridge.def"
 #undef GET_SVE_BUILTINS
 
 #undef BUILTIN
Index: clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def
===================================================================
--- /dev/null
+++ clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def
@@ -0,0 +1,39 @@
+#ifdef GET_SVE_LLVM_INTRINSIC_MAP
+SVEMAP2(svget_neonq_s8, 1),
+SVEMAP2(svget_neonq_s16, 2),
+SVEMAP2(svget_neonq_s32, 3),
+SVEMAP2(svget_neonq_s64, 4),
+SVEMAP2(svget_neonq_u8, 1),
+SVEMAP2(svget_neonq_u16, 2),
+SVEMAP2(svget_neonq_u32, 3),
+SVEMAP2(svget_neonq_u64, 4),
+SVEMAP2(svget_neonq_f16, 1),
+SVEMAP2(svget_neonq_f32, 2),
+SVEMAP2(svget_neonq_f64, 3),
+SVEMAP2(svget_neonq_bf16, 1),
+SVEMAP2(svset_neonq_s8, 1),
+SVEMAP2(svset_neonq_s16, 2),
+SVEMAP2(svset_neonq_s32, 3),
+SVEMAP2(svset_neonq_s64, 4),
+SVEMAP2(svset_neonq_u8, 1),
+SVEMAP2(svset_neonq_u16, 2),
+SVEMAP2(svset_neonq_u32, 3),
+SVEMAP2(svset_neonq_u64, 4),
+SVEMAP2(svset_neonq_f16, 1),
+SVEMAP2(svset_neonq_f32, 2),
+SVEMAP2(svset_neonq_f64, 3),
+SVEMAP2(svset_neonq_bf16, 1),
+SVEMAP2(svdup_neonq_s8, 1),
+SVEMAP2(svdup_neonq_s16, 2),
+SVEMAP2(svdup_neonq_s32, 3),
+SVEMAP2(svdup_neonq_s64, 4),
+SVEMAP2(svdup_neonq_u8, 1),
+SVEMAP2(svdup_neonq_u16, 2),
+SVEMAP2(svdup_neonq_u32, 3),
+SVEMAP2(svdup_neonq_u64, 4),
+SVEMAP2(svdup_neonq_f16, 1),
+SVEMAP2(svdup_neonq_f32, 2),
+SVEMAP2(svdup_neonq_f64, 3),
+SVEMAP2(svdup_neonq_bf16, 1),
+#endif
+
Index: clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge.def
===================================================================
--- /dev/null
+++ clang/include/clang/Basic/BuiltinsAArch64NeonSVEBridge.def
@@ -0,0 +1,39 @@
+#ifdef GET_SVE_BUILTINS
+BUILTIN(__builtin_sve_svget_neonq_s8, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svget_neonq_s16, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svget_neonq_s32, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svget_neonq_s64, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svget_neonq_u8, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svget_neonq_u16, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svget_neonq_u32, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svget_neonq_u64, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svget_neonq_f16, "q8hq16bq8hh", "n")
+BUILTIN(__builtin_sve_svget_neonq_f32, "q8hq16bq8hh", "n")
+BUILTIN(__builtin_sve_svget_neonq_f64, "q8hq16bq8hh", "n")
+BUILTIN(__builtin_sve_svget_neonq_bf16, "q8yq8y", "n")
+BUILTIN(__builtin_sve_svset_neonq_s8, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svset_neonq_s16, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svset_neonq_s32, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svset_neonq_s64, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svset_neonq_u8, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svset_neonq_u16, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svset_neonq_u32, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svset_neonq_u64, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svset_neonq_f16, "q8hq16bq8hh", "n")
+BUILTIN(__builtin_sve_svset_neonq_f32, "q8hq16bq8hh", "n")
+BUILTIN(__builtin_sve_svset_neonq_f64, "q8hq16bq8hh", "n")
+BUILTIN(__builtin_sve_svset_neonq_bf16, "q8yq8y", "n")
+BUILTIN(__builtin_sve_svdup_neonq_s8, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svdup_neonq_s16, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svdup_neonq_s32, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svdup_neonq_s64, "q4iq16bq4i", "n")
+BUILTIN(__builtin_sve_svdup_neonq_u8, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svdup_neonq_u16, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svdup_neonq_u32, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svdup_neonq_u64, "q4Uiq4Uiq4Uiq4Ui", "n")
+BUILTIN(__builtin_sve_svdup_neonq_f16, "q8hq16bq8hh", "n")
+BUILTIN(__builtin_sve_svdup_neonq_f32, "q8hq16bq8hh", "n")
+BUILTIN(__builtin_sve_svdup_neonq_f64, "q8hq16bq8hh", "n")
+BUILTIN(__builtin_sve_svdup_neonq_bf16, "q8yq8y", "n")
+#endif
+

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D114713: [AArch64][SVE][NEON] Add NEON-SVE-Bridge intrinsics

Reply via email to