[PATCH] D14982: ARM v8.1a adds Advanced SIMD instructions for Rounding Double Multiply Add/Subtract.

Alexandros Lamprineas via cfe-commits Wed, 25 Nov 2015 04:29:01 -0800

labrinea created this revision.
labrinea added reviewers: jmolloy, rengolin, cfe-commits.
Herald added subscribers: rengolin, aemerson.


The following instructions are added to AArch32 instruction set:
  - VQRDMLAH: Vector Saturating Rounding Doubling Multiply Accumulate Retu   
ing High Half
  - VQRDMLSH: Vector Saturating Rounding Doubling Multiply Subtract Returning 
High Half
    
The following instructions are added to AArch64 instruction set:
  - SQRDMLAH: Signed Saturating Rounding Doubling Multiply Accumulate Returning 
High Half
  - SQRDMLSH: Signed Saturating Rounding Doubling Multiply Subtract Returning 
High Half
    
This patch adds intrinsic and ACLE macro support for these instructions, as 
well as corresponding tests.

http://reviews.llvm.org/D14982

Files:
  include/clang/Basic/arm_neon.td
  lib/Basic/Targets.cpp
  test/CodeGen/aarch64-neon-2velem.c
  test/CodeGen/aarch64-neon-intrinsics.c
  test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
  test/CodeGen/arm_neon_intrinsics.c
  test/Preprocessor/aarch64-target-features.c
  test/Preprocessor/arm-target-features.c

Index: test/Preprocessor/arm-target-features.c
===================================================================
--- test/Preprocessor/arm-target-features.c
+++ test/Preprocessor/arm-target-features.c
@@ -407,4 +407,5 @@
 // CHECK-V81A: __ARM_ARCH 8
 // CHECK-V81A: __ARM_ARCH_8_1A__ 1
 // CHECK-V81A: #define __ARM_ARCH_PROFILE 'A'
+// CHECK-V81A: __ARM_FEATURE_QRDMX 1
 // CHECK-V81A: #define __ARM_FP 0xE
Index: test/Preprocessor/aarch64-target-features.c
===================================================================
--- test/Preprocessor/aarch64-target-features.c
+++ test/Preprocessor/aarch64-target-features.c
@@ -71,6 +71,9 @@
 // CHECK-NEON: __ARM_NEON 1
 // CHECK-NEON: __ARM_NEON_FP 0xE
 
+// RUN: %clang -target aarch64-none-eabi -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-V81A %s
+// CHECK-V81A: __ARM_FEATURE_QRDMX 1
+
 // RUN: %clang -target aarch64 -march=arm64 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-ARCH-NOT-ACCEPT %s
 // RUN: %clang -target aarch64 -march=aarch64 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-ARCH-NOT-ACCEPT %s
 // CHECK-ARCH-NOT-ACCEPT: error: the clang compiler does not support
Index: test/CodeGen/arm_neon_intrinsics.c
===================================================================
--- test/CodeGen/arm_neon_intrinsics.c
+++ test/CodeGen/arm_neon_intrinsics.c
@@ -6425,6 +6425,64 @@
 }
 
 
+// CHECK-LABEL: test_vqrdmlah_s16
+// CHECK:      vqrdmulh.s16 [[REG:d[0-9]+]], d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-NEXT: vqadd.s16 d{{[0-9]+}}, d{{[0-9]+}}, [[REG]]
+int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+  return vqrdmlah_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlah_s32
+// CHECK:      vqrdmulh.s32 [[REG:d[0-9]+]], d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-NEXT: vqadd.s32 d{{[0-9]+}}, d{{[0-9]+}}, [[REG]]
+int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+  return vqrdmlah_s32(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_s16
+// CHECK:      vqrdmulh.s16 [[REG:q[0-9]+]], q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-NEXT: vqadd.s16 q{{[0-9]+}}, q{{[0-9]+}}, [[REG]]
+int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
+  return vqrdmlahq_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_s32
+// CHECK:      vqrdmulh.s32 [[REG:q[0-9]+]], q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-NEXT: vqadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, [[REG]]
+int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
+  return vqrdmlahq_s32(a, b, c);
+}
+
+
+// CHECK-LABEL: test_vqrdmlsh_s16
+// CHECK:      vqrdmulh.s16 [[REG:d[0-9]+]], d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-NEXT: vqsub.s16 d{{[0-9]+}}, d{{[0-9]+}}, [[REG]]
+int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+  return vqrdmlsh_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlsh_s32
+// CHECK:      vqrdmulh.s32 [[REG:d[0-9]+]], d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-NEXT: vqsub.s32 d{{[0-9]+}}, d{{[0-9]+}}, [[REG]]
+int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+  return vqrdmlsh_s32(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_s16
+// CHECK:      vqrdmulh.s16 [[REG:q[0-9]+]], q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-NEXT: vqsub.s16 q{{[0-9]+}}, q{{[0-9]+}}, [[REG]]
+int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
+  return vqrdmlshq_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_s32
+// CHECK:      vqrdmulh.s32 [[REG:q[0-9]+]], q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-NEXT: vqsub.s32 q{{[0-9]+}}, q{{[0-9]+}}, [[REG]]
+int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
+  return vqrdmlshq_s32(a, b, c);
+}
+
+
 // CHECK-LABEL: test_vqrdmulh_lane_s16
 // CHECK: vqrdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
 int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) {
@@ -6450,6 +6508,64 @@
 }
 
 
+// CHECK-LABEL: test_vqrdmlah_lane_s16
+// CHECK:      vqrdmulh.s16 [[REG:d[0-9]+]], d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-NEXT: vqadd.s16 d{{[0-9]+}}, d{{[0-9]+}}, [[REG]]
+int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+  return vqrdmlah_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlah_lane_s32
+// CHECK:      vqrdmulh.s32 [[REG:d[0-9]+]], d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-NEXT: vqadd.s32 d{{[0-9]+}}, d{{[0-9]+}}, [[REG]]
+int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+  return vqrdmlah_lane_s32(a, b, c, 1);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_lane_s16
+// CHECK:      vqrdmulh.s16 [[REG:q[0-9]+]], q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-NEXT: vqadd.s16 q{{[0-9]+}}, q{{[0-9]+}}, [[REG]]
+int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
+  return vqrdmlahq_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_lane_s32
+// CHECK:      vqrdmulh.s32 [[REG:q[0-9]+]], q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-NEXT: vqadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, [[REG]]
+int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
+  return vqrdmlahq_lane_s32(a, b, c, 1);
+}
+
+
+// CHECK-LABEL: test_vqrdmlsh_lane_s16
+// CHECK:      vqrdmulh.s16 [[REG:d[0-9]+]], d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-NEXT: vqsub.s16 d{{[0-9]+}}, d{{[0-9]+}}, [[REG]]
+int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+  return vqrdmlsh_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlsh_lane_s32
+// CHECK:      vqrdmulh.s32 [[REG:d[0-9]+]], d{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-NEXT: vqsub.s32 d{{[0-9]+}}, d{{[0-9]+}}, [[REG]]
+int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+  return vqrdmlsh_lane_s32(a, b, c, 1);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_lane_s16
+// CHECK:      vqrdmulh.s16 [[REG:q[0-9]+]], q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-NEXT: vqsub.s16 q{{[0-9]+}}, q{{[0-9]+}}, [[REG]]
+int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
+  return vqrdmlshq_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_lane_s32
+// CHECK:      vqrdmulh.s32 [[REG:q[0-9]+]], q{{[0-9]+}}, d{{[0-9]+}}[{{[0-9]}}]
+// CHECK-NEXT: vqsub.s32 q{{[0-9]+}}, q{{[0-9]+}}, [[REG]]
+int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
+  return vqrdmlshq_lane_s32(a, b, c, 1);
+}
+
+
 // CHECK-LABEL: test_vqrdmulh_n_s16
 // CHECK: vqrdmulh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
 int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
Index: test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
===================================================================
--- test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
+++ test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
@@ -191,6 +191,33 @@
 // CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
 }
 
+// CHECK-LABEL: test_vqrdmlahh_lane_s16
+int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
+  return vqrdmlahh_lane_s16(a, b, c, 3);
+// CHECK: sqrdmulh [[REG:h[0-9]+|v[0-9]+.4h]], {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+// CHECK: sqadd {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, [[REG]]
+}
+
+// CHECK-LABEL: test_vqrdmlahs_lane_s32
+int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
+  return vqrdmlahs_lane_s32(a, b, c, 1);
+// CHECK: sqrdmulh [[REG:s[0-9]+]], {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+// CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, [[REG]]
+}
+
+// CHECK-LABEL: test_vqrdmlshh_lane_s16
+int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
+  return vqrdmlshh_lane_s16(a, b, c, 3);
+// CHECK: sqrdmulh [[REG:h[0-9]+|v[0-9]+.4h]], {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+// CHECK: sqsub {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, [[REG]]
+}
+
+// CHECK-LABEL: test_vqrdmlshs_lane_s32
+int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
+  return vqrdmlshs_lane_s32(a, b, c, 1);
+// CHECK: sqrdmulh [[REG:s[0-9]+]], {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+// CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, [[REG]]
+}
 
 // CHECK-LABEL: test_vqrdmulhh_laneq_s16
 int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) {
@@ -205,6 +232,34 @@
 // CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
 }
 
+// CHECK-LABEL: test_vqrdmlahh_laneq_s16
+int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
+  return vqrdmlahh_laneq_s16(a, b, c, 7);
+// CHECK: sqrdmulh [[REG:h[0-9]+|v[0-9]+.4h]], {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+// CHECK: sqadd {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, [[REG]]
+}
+
+// CHECK-LABEL: test_vqrdmlahs_laneq_s32
+int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
+  return vqrdmlahs_laneq_s32(a, b, c, 3);
+// CHECK: sqrdmulh [[REG:s[0-9]+]], {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+// CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, [[REG]]
+}
+
+// CHECK-LABEL: test_vqrdmlshh_laneq_s16
+int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
+  return vqrdmlshh_laneq_s16(a, b, c, 7);
+// CHECK: sqrdmulh [[REG:h[0-9]+|v[0-9]+.4h]], {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+// CHECK: sqsub {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, [[REG]]
+}
+
+// CHECK-LABEL: test_vqrdmlshs_laneq_s32
+int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
+  return vqrdmlshs_laneq_s32(a, b, c, 3);
+// CHECK: sqrdmulh [[REG:s[0-9]+]], {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+// CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, [[REG]]
+}
+
 // CHECK-LABEL: test_vqdmlalh_lane_s16
 int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
   return vqdmlalh_lane_s16(a, b, c, 3);
Index: test/CodeGen/aarch64-neon-intrinsics.c
===================================================================
--- test/CodeGen/aarch64-neon-intrinsics.c
+++ test/CodeGen/aarch64-neon-intrinsics.c
@@ -3133,6 +3133,62 @@
 // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 }
 
+int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+// CHECK-LABEL: test_vqrdmlah_s16
+  return vqrdmlah_s16(a, b, c);
+// CHECK:      sqrdmulh [[REG:v[0-9]+.4h]], {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+// CHECK-NEXT: sqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, [[REG]]
+}
+
+int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+// CHECK-LABEL: test_vqrdmlah_s32
+  return vqrdmlah_s32(a, b, c);
+// CHECK:      sqrdmulh [[REG:v[0-9]+.2s]], {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+// CHECK-NEXT: sqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[REG]]
+}
+
+int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
+// CHECK-LABEL: test_vqrdmlahq_s16
+  return vqrdmlahq_s16(a, b, c);
+// CHECK:      sqrdmulh [[REG:v[0-9]+.8h]], {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+// CHECK-NEXT: sqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, [[REG]]
+}
+
+int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
+// CHECK-LABEL: test_vqrdmlahq_s32
+  return vqrdmlahq_s32(a, b, c);
+// CHECK:      sqrdmulh [[REG:v[0-9]+.4s]], {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+// CHECK-NEXT: sqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[REG]]
+}
+
+int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+// CHECK-LABEL: test_vqrdmlsh_s16
+  return vqrdmlsh_s16(a, b, c);
+// CHECK:      sqrdmulh [[REG:v[0-9]+.4h]], {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+// CHECK-NEXT: sqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, [[REG]]
+}
+
+int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+// CHECK-LABEL: test_vqrdmlsh_s32
+  return vqrdmlsh_s32(a, b, c);
+// CHECK:      sqrdmulh [[REG:v[0-9]+.2s]], {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+// CHECK-NEXT: sqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[REG]]
+}
+
+int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
+// CHECK-LABEL: test_vqrdmlshq_s16
+  return vqrdmlshq_s16(a, b, c);
+// CHECK:      sqrdmulh [[REG:v[0-9]+.8h]], {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+// CHECK-NEXT: sqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, [[REG]]
+}
+
+int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
+// CHECK-LABEL: test_vqrdmlshq_s32
+  return vqrdmlshq_s32(a, b, c);
+// CHECK:      sqrdmulh [[REG:v[0-9]+.4s]], {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+// CHECK-NEXT: sqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[REG]]
+}
+
 float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) {
 // CHECK-LABEL: test_vmulx_f32
   return vmulx_f32(a, b);
@@ -5704,6 +5760,34 @@
 // CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
 }
 
+int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) {
+// CHECK-LABEL: test_vqrdmlahh_s16
+  return vqrdmlahh_s16(a, b, c);
+// CHECK: sqrdmulh [[REG:h[0-9]+|v[0-9]+.4h]], {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+// CHECK: sqadd {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, [[REG]]
+}
+
+int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) {
+// CHECK-LABEL: test_vqrdmlahs_s32
+  return vqrdmlahs_s32(a, b, c);
+// CHECK: sqrdmulh [[REG:s[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
+// CHECK: sqadd {{s[0-9]+}}, {{s[0-9]+}}, [[REG]]
+}
+
+int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) {
+// CHECK-LABEL: test_vqrdmlshh_s16
+  return vqrdmlshh_s16(a, b, c);
+// CHECK: sqrdmulh [[REG:h[0-9]+|v[0-9]+.4h]], {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+// CHECK: sqsub {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, [[REG]]
+}
+
+int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) {
+// CHECK-LABEL: test_vqrdmlshs_s32
+  return vqrdmlshs_s32(a, b, c);
+// CHECK: sqrdmulh [[REG:s[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
+// CHECK: sqsub {{s[0-9]+}}, {{s[0-9]+}}, [[REG]]
+}
+
 float32_t test_vmulxs_f32(float32_t a, float32_t b) {
 // CHECK-LABEL: test_vmulxs_f32
   return vmulxs_f32(a, b);
Index: test/CodeGen/aarch64-neon-2velem.c
===================================================================
--- test/CodeGen/aarch64-neon-2velem.c
+++ test/CodeGen/aarch64-neon-2velem.c
@@ -725,6 +725,62 @@
   // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
 }
 
+int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK-LABEL: test_vqrdmlah_lane_s16
+  return vqrdmlah_lane_s16(a, b, v, 3);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4h]], {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, [[REG]]
+}
+
+int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) {
+  // CHECK-LABEL: test_vqrdmlahq_lane_s16
+  return vqrdmlahq_lane_s16(a, b, v, 3);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.8h]], {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, [[REG]]
+}
+
+int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK-LABEL: test_vqrdmlah_lane_s32
+  return vqrdmlah_lane_s32(a, b, v, 1);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.2s]], {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[REG]]
+}
+
+int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) {
+  // CHECK-LABEL: test_vqrdmlahq_lane_s32
+  return vqrdmlahq_lane_s32(a, b, v, 1);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4s]], {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[REG]]
+}
+
+int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK-LABEL: test_vqrdmlsh_lane_s16
+  return vqrdmlsh_lane_s16(a, b, v, 3);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4h]], {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, [[REG]]
+}
+
+int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) {
+  // CHECK-LABEL: test_vqrdmlshq_lane_s16
+  return vqrdmlshq_lane_s16(a, b, v, 3);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.8h]], {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, [[REG]]
+}
+
+int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK-LABEL: test_vqrdmlsh_lane_s32
+  return vqrdmlsh_lane_s32(a, b, v, 1);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.2s]], {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[REG]]
+}
+
+int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) {
+  // CHECK-LABEL: test_vqrdmlshq_lane_s32
+  return vqrdmlshq_lane_s32(a, b, v, 1);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4s]], {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[REG]]
+}
+
 float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t v) {
   // CHECK-LABEL: test_vmul_lane_f32
   return vmul_lane_f32(a, v, 1);
@@ -1496,6 +1552,62 @@
   // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
 }
 
+int16x4_t test_vqrdmlah_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK-LABEL: test_vqrdmlah_lane_s16_0
+  return vqrdmlah_lane_s16(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4h]], {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, [[REG]]
+}
+
+int16x8_t test_vqrdmlahq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) {
+  // CHECK-LABEL: test_vqrdmlahq_lane_s16_0
+  return vqrdmlahq_lane_s16(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.8h]], {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, [[REG]]
+}
+
+int32x2_t test_vqrdmlah_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK-LABEL: test_vqrdmlah_lane_s32_0
+  return vqrdmlah_lane_s32(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.2s]], {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[REG]]
+}
+
+int32x4_t test_vqrdmlahq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) {
+  // CHECK-LABEL: test_vqrdmlahq_lane_s32_0
+  return vqrdmlahq_lane_s32(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4s]], {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[REG]]
+}
+
+int16x4_t test_vqrdmlsh_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) {
+  // CHECK-LABEL: test_vqrdmlsh_lane_s16_0
+  return vqrdmlsh_lane_s16(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4h]], {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, [[REG]]
+}
+
+int16x8_t test_vqrdmlshq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) {
+  // CHECK-LABEL: test_vqrdmlshq_lane_s16_0
+  return vqrdmlshq_lane_s16(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.8h]], {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, [[REG]]
+}
+
+int32x2_t test_vqrdmlsh_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) {
+  // CHECK-LABEL: test_vqrdmlsh_lane_s32_0
+  return vqrdmlsh_lane_s32(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.2s]], {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[REG]]
+}
+
+int32x4_t test_vqrdmlshq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) {
+  // CHECK-LABEL: test_vqrdmlshq_lane_s32_0
+  return vqrdmlshq_lane_s32(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4s]], {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[REG]]
+}
+
 float32x2_t test_vmul_lane_f32_0(float32x2_t a, float32x2_t v) {
   // CHECK-LABEL: test_vmul_lane_f32_0
   return vmul_lane_f32(a, v, 0);
@@ -2258,6 +2370,62 @@
   // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
 }
 
+int16x4_t test_vqrdmlah_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK-LABEL: test_vqrdmlah_laneq_s16_0
+  return vqrdmlah_laneq_s16(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4h]], {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, [[REG]]
+}
+
+int16x8_t test_vqrdmlahq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) {
+  // CHECK-LABEL: test_vqrdmlahq_laneq_s16_0
+  return vqrdmlahq_laneq_s16(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.8h]], {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, [[REG]]
+}
+
+int32x2_t test_vqrdmlah_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK-LABEL: test_vqrdmlah_laneq_s32_0
+  return vqrdmlah_laneq_s32(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.2s]], {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[REG]]
+}
+
+int32x4_t test_vqrdmlahq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) {
+  // CHECK-LABEL: test_vqrdmlahq_laneq_s32_0
+  return vqrdmlahq_laneq_s32(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4s]], {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[REG]]
+}
+
+int16x4_t test_vqrdmlsh_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK-LABEL: test_vqrdmlsh_laneq_s16_0
+  return vqrdmlsh_laneq_s16(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4h]], {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, [[REG]]
+}
+
+int16x8_t test_vqrdmlshq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) {
+  // CHECK-LABEL: test_vqrdmlshq_laneq_s16_0
+  return vqrdmlshq_laneq_s16(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.8h]], {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, [[REG]]
+}
+
+int32x2_t test_vqrdmlsh_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK-LABEL: test_vqrdmlsh_laneq_s32_0
+  return vqrdmlsh_laneq_s32(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.2s]], {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[REG]]
+}
+
+int32x4_t test_vqrdmlshq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) {
+  // CHECK-LABEL: test_vqrdmlshq_laneq_s32_0
+  return vqrdmlshq_laneq_s32(a, b, v, 0);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4s]], {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[REG]]
+}
+
 uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) {
   // CHECK-LABEL: test_vmla_lane_u16
   return vmla_lane_u16(a, b, v, 3);
@@ -2450,3 +2618,59 @@
   // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
 }
 
+int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK-LABEL: test_vqrdmlah_laneq_s16
+  return vqrdmlah_laneq_s16(a, b, v, 7);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4h]], {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, [[REG]]
+}
+
+int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
+  // CHECK-LABEL: test_vqrdmlahq_laneq_s16
+  return vqrdmlahq_laneq_s16(a, b, v, 7);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.8h]], {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, [[REG]]
+}
+
+int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK-LABEL: test_vqrdmlah_laneq_s32
+  return vqrdmlah_laneq_s32(a, b, v, 3);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.2s]], {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[REG]]
+}
+
+int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
+  // CHECK-LABEL: test_vqrdmlahq_laneq_s32
+  return vqrdmlahq_laneq_s32(a, b, v, 3);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4s]], {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+  // CHECK-NEXT: sqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[REG]]
+}
+
+int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
+  // CHECK-LABEL: test_vqrdmlsh_laneq_s16
+  return vqrdmlsh_laneq_s16(a, b, v, 7);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4h]], {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, [[REG]]
+}
+
+int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
+  // CHECK-LABEL: test_vqrdmlshq_laneq_s16
+  return vqrdmlshq_laneq_s16(a, b, v, 7);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.8h]], {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, [[REG]]
+}
+
+int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
+  // CHECK-LABEL: test_vqrdmlsh_laneq_s32
+  return vqrdmlsh_laneq_s32(a, b, v, 3);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.2s]], {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[REG]]
+}
+
+int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
+  // CHECK-LABEL: test_vqrdmlshq_laneq_s32
+  return vqrdmlshq_laneq_s32(a, b, v, 3);
+  // CHECK:      sqrdmulh [[REG:v[0-9]+.4s]], {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+  // CHECK-NEXT: sqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[REG]]
+}
+
Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -4869,6 +4869,9 @@
 
     if (Opts.UnsafeFPMath)
       Builder.defineMacro("__ARM_FP_FAST", "1");
+
+    if (ArchKind == llvm::ARM::AK_ARMV8_1A)
+      Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
   }
 
   ArrayRef<Builtin::Info> getTargetBuiltins() const override {
@@ -5250,6 +5253,7 @@
   unsigned CRC;
   unsigned Crypto;
   unsigned Unaligned;
+  unsigned V8_1A;
 
   static const Builtin::Info BuiltinInfo[];
 
@@ -5372,6 +5376,9 @@
     if (Unaligned)
       Builder.defineMacro("__ARM_FEATURE_UNALIGNED", "1");
 
+    if (V8_1A)
+      Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
+
     // All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work.
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
@@ -5397,6 +5404,7 @@
     CRC = 0;
     Crypto = 0;
     Unaligned = 1;
+    V8_1A = 0;
 
     for (const auto &Feature : Features) {
       if (Feature == "+neon")
@@ -5407,6 +5415,8 @@
         Crypto = 1;
       if (Feature == "+strict-align")
         Unaligned = 0;
+      if (Feature == "+v8.1a")
+        V8_1A = 1;
     }
 
     setDataLayoutString();
Index: include/clang/Basic/arm_neon.td
===================================================================
--- include/clang/Basic/arm_neon.td
+++ include/clang/Basic/arm_neon.td
@@ -373,6 +373,10 @@
                                               (splat $p2, $p3))>;
 def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (splat $p1, $p2))>;
 def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (splat $p1, $p2))>;
+def OP_QRDMLAH : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, $p2))>;
+def OP_QRDMLSH : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, $p2))>;
+def OP_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
+def OP_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
 def OP_FMS_LN   : Op<(call "vfma_lane", $p0, $p1, (op "-", $p2), $p3)>;
 def OP_FMS_LNQ  : Op<(call "vfma_laneq", $p0, $p1, (op "-", $p2), $p3)>;
 def OP_TRN1     : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2),
@@ -473,6 +477,11 @@
 def OP_SCALAR_QDMULH_LN : ScalarMulOp<"vqdmulh">;
 def OP_SCALAR_QRDMULH_LN : ScalarMulOp<"vqrdmulh">;
 
+def OP_SCALAR_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1,
+                              (call "vget_lane", $p2, $p3)))>;
+def OP_SCALAR_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1,
+                              (call "vget_lane", $p2, $p3)))>;
+
 def OP_SCALAR_HALF_GET_LN : Op<(bitcast "float16_t",
                                    (call "vget_lane",
                                          (bitcast "int16x4_t", $p0), $p1))>;
@@ -514,6 +523,8 @@
 def VMLSL    : SOpInst<"vmlsl", "wwdd", "csiUcUsUi", OP_MLSL>;
 def VQDMULH  : SInst<"vqdmulh", "ddd", "siQsQi">;
 def VQRDMULH : SInst<"vqrdmulh", "ddd", "siQsQi">;
+def VQRDMLAH : SOpInst<"vqrdmlah", "dddd", "siQsQi", OP_QRDMLAH>;
+def VQRDMLSH : SOpInst<"vqrdmlsh", "dddd", "siQsQi", OP_QRDMLSH>;
 def VQDMLAL  : SInst<"vqdmlal", "wwdd", "si">;
 def VQDMLSL  : SInst<"vqdmlsl", "wwdd", "si">;
 def VMULL    : SInst<"vmull", "wdd", "csiUcUsUiPc">;
@@ -741,6 +752,8 @@
 def VQDMULH_LANE  : SOpInst<"vqdmulh_lane", "ddgi", "siQsQi", OP_QDMULH_LN>;
 def VQRDMULH_N    : SInst<"vqrdmulh_n", "dda", "siQsQi">;
 def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ddgi", "siQsQi", OP_QRDMULH_LN>;
+def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "dddgi", "siQsQi", OP_QRDMLAH_LN>;
+def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "dddgi", "siQsQi", OP_QRDMLSH_LN>;
 def VMLA_N        : IOpInst<"vmla_n", "ddda", "siUsUifQsQiQUsQUiQf", OP_MLA_N>;
 def VMLAL_N       : SOpInst<"vmlal_n", "wwda", "siUsUi", OP_MLAL_N>;
 def VQDMLAL_N     : SInst<"vqdmlal_n", "wwda", "si">;
@@ -1159,6 +1172,8 @@
 
 def VQDMULH_LANEQ  : SOpInst<"vqdmulh_laneq", "ddji", "siQsQi", OP_QDMULH_LN>;
 def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ddji", "siQsQi", OP_QRDMULH_LN>;
+def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "dddji", "siQsQi", OP_QRDMLAH_LN>;
+def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "dddji", "siQsQi", OP_QRDMLSH_LN>;
 
 // Note: d type implemented by SCALAR_VMULX_LANE
 def VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "fQfQd", OP_MULX_LN>;
@@ -1406,6 +1421,14 @@
 def SCALAR_SQRDMULH : SInst<"vqrdmulh", "sss", "SsSi">;
 
 ////////////////////////////////////////////////////////////////////////////////
+// Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
+def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "ssss", "SsSi", OP_QRDMLAH>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
+def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "ssss", "SsSi", OP_QRDMLSH>;
+
+////////////////////////////////////////////////////////////////////////////////
 // Scalar Floating-point Multiply Extended
 def SCALAR_FMULX : IInst<"vmulx", "sss", "SfSd">;
 
@@ -1606,6 +1629,14 @@
 def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>;
 def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LN>;
 
+// Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
+def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLAH_LN>;
+def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLAH_LN>;
+
+// Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
+def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLSH_LN>;
+def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLSH_LN>;
+
 def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
 def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D14982: ARM v8.1a adds Advanced SIMD instructions for Rounding Double Multiply Add/Subtract.

Reply via email to