[llvm-branch-commits] [llvm] fef8113 - [SVE] Optimize new cases for lowerConvertToSVBool

Peter Waller via llvm-branch-commits Mon, 09 May 2022 03:25:10 -0700

Author: Alban Bridonneau
Date: 2022-05-09T10:17:57Z
New Revision: fef81131d92ef71f43640667b6fc88b241aebe50


URL: 
https://github.com/llvm/llvm-project/commit/fef81131d92ef71f43640667b6fc88b241aebe50
DIFF: 
https://github.com/llvm/llvm-project/commit/fef81131d92ef71f43640667b6fc88b241aebe50.diff

LOG: [SVE] Optimize new cases for lowerConvertToSVBool

Converts to SVBool are already considered as a nop, if they
are converting an operand from a ptrue or a cmp, because
they zero the extra predicate lanes by construction.

This patch adds 2 similar cases:
- The wide cmp, which were not directly recognized by the test
for other forms of cmp
- Splats of 1, which will be generated as ptrue, and as such
will also zero the extra predicate lines.

Reviewed By: paulwalker-arm, peterwaller-arm

Differential Revision: https://reviews.llvm.org/D124908

Added: 
    llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret-no-streaming.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
    llvm/test/CodeGen/AArch64/sve-vector-splat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9d7a8e67374d..dc99ed0b4066 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4181,10 +4181,26 @@ static SDValue lowerConvertToSVBool(SDValue Op, 
SelectionDAG &DAG) {
   case AArch64ISD::SETCC_MERGE_ZERO:
     return Reinterpret;
   case ISD::INTRINSIC_WO_CHAIN:
-    if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
+    switch (InOp.getConstantOperandVal(0)) {
+    case Intrinsic::aarch64_sve_ptrue:
+    case Intrinsic::aarch64_sve_cmpeq_wide:
+    case Intrinsic::aarch64_sve_cmpne_wide:
+    case Intrinsic::aarch64_sve_cmpge_wide:
+    case Intrinsic::aarch64_sve_cmpgt_wide:
+    case Intrinsic::aarch64_sve_cmplt_wide:
+    case Intrinsic::aarch64_sve_cmple_wide:
+    case Intrinsic::aarch64_sve_cmphs_wide:
+    case Intrinsic::aarch64_sve_cmphi_wide:
+    case Intrinsic::aarch64_sve_cmplo_wide:
+    case Intrinsic::aarch64_sve_cmpls_wide:
       return Reinterpret;
+    }
   }
 
+  // Splat vectors of 1 will generate ptrue instructions
+  if (ISD::isConstantSplatVectorAllOnes(InOp.getNode()))
+    return Reinterpret;
+
   // Otherwise, zero the newly introduced lanes.
   SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
   SDValue MaskReinterpret =

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret-no-streaming.ll 
b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret-no-streaming.ll
new file mode 100644
index 000000000000..bc5cdb48fef6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret-no-streaming.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+; This test should belong in sve-intrinsics-reinterpret.ll, but uses types
+; that are invalid with sve-streaming
+
+define <vscale x 16 x i1> @reinterpret_bool_from_splat() {
+; CHECK-LABEL: reinterpret_bool_from_splat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ret
+  %ins = insertelement <vscale x 2 x i1> undef, i1 1, i32 0
+  %splat = shufflevector <vscale x 2 x i1> %ins, <vscale x 2 x i1> undef, 
<vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %splat)
+  ret <vscale x 16 x i1> %out
+}
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale 
x 2 x i1>)
+

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll 
b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
index 37b6c80c19a0..3e9a21da0eb7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
@@ -102,7 +102,22 @@ define <vscale x 16 x i1> @reinterpret_cmpgt(<vscale x 8 x 
i1> %p, <vscale x 8 x
   ret <vscale x 16 x i1> %2
 }
 
+; The first reinterpret should prevent the second one from being simplified as 
a nop
+define <vscale x 16 x i1> @chained_reinterpret() {
+; CHECK-LABEL: chained_reinterpret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    ptrue p1.d
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %in = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+  %cast2 = call <vscale x 2 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %in)
+  %out = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %cast2)
+  ret <vscale x 16 x i1> %out
+}
+
 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.nxv8i16(<vscale x 8 x i1>, 
<vscale x 8 x i16>, <vscale x 8 x i16>)
 
 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv16i1(<vscale 
x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll 
b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
index 157a73b4f06e..75b518265d40 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
@@ -46,9 +46,43 @@ define i32 @cmpeq_wide_nxv16i8(<vscale x 16 x i1> %pg, 
<vscale x 16 x i8> %a, <v
   ret i32 %conv
 }
 
+define i32 @cmpeq_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpeq_wide_nxv8i16:
+; CHECK: cmpeq p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.cmpeq.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
+define i32 @cmpeq_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpeq_wide_nxv4i32:
+; CHECK: cmpeq p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.cmpeq.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1>, 
<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv16i8(<vscale x 16 x 
i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv8i16(<vscale x 8 x 
i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv4i32(<vscale x 4 x 
i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
 
 declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
 
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale 
x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale 
x 4 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale 
x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale 
x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll 
b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
index 6363c3deeba1..25ab93ee4bf6 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
@@ -46,9 +46,43 @@ define i32 @cmpge_wide_nxv16i8(<vscale x 16 x i1> %pg, 
<vscale x 16 x i8> %a, <v
   ret i32 %conv
 }
 
+define i32 @cmpge_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpge_wide_nxv8i16:
+; CHECK: cmpge p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.cmpge.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
+define i32 @cmpge_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpge_wide_nxv4i32:
+; CHECK: cmpge p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.cmpge.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1>, 
<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.wide.nxv16i8(<vscale x 16 x 
i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpge.wide.nxv8i16(<vscale x 8 x 
i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpge.wide.nxv4i32(<vscale x 4 x 
i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
 
 declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
 
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale 
x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale 
x 4 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale 
x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale 
x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll 
b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
index 4d3c7e04f696..8a565c031205 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
@@ -46,9 +46,43 @@ define i32 @cmpgt_wide_nxv16i8(<vscale x 16 x i1> %pg, 
<vscale x 16 x i8> %a, <v
   ret i32 %conv
 }
 
+define i32 @cmpgt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpgt_wide_nxv8i16:
+; CHECK: cmpgt p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.cmpgt.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
+define i32 @cmpgt_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpgt_wide_nxv4i32:
+; CHECK: cmpgt p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.cmpgt.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1>, 
<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv16i8(<vscale x 16 x 
i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv8i16(<vscale x 8 x 
i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv4i32(<vscale x 4 x 
i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
 
 declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
 
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale 
x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale 
x 4 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale 
x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale 
x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll 
b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
index 5bba0b48cb30..b749e2421a55 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
@@ -46,9 +46,43 @@ define i32 @cmphi_wide_nxv16i8(<vscale x 16 x i1> %pg, 
<vscale x 16 x i8> %a, <v
   ret i32 %conv
 }
 
+define i32 @cmphi_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmphi_wide_nxv8i16:
+; CHECK: cmphi p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.cmphi.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
+define i32 @cmphi_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmphi_wide_nxv4i32:
+; CHECK: cmphi p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.cmphi.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1>, 
<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.wide.nxv16i8(<vscale x 16 x 
i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.cmphi.wide.nxv8i16(<vscale x 8 x 
i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.cmphi.wide.nxv4i32(<vscale x 4 x 
i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
 
 declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
 
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale 
x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale 
x 4 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale 
x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale 
x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll 
b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
index ff5a1ec09abf..f6d9e70fffe4 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
@@ -46,9 +46,43 @@ define i32 @cmphs_wide_nxv16i8(<vscale x 16 x i1> %pg, 
<vscale x 16 x i8> %a, <v
   ret i32 %conv
 }
 
+define i32 @cmphs_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmphs_wide_nxv8i16:
+; CHECK: cmphs p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.cmphs.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
+define i32 @cmphs_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmphs_wide_nxv4i32:
+; CHECK: cmphs p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.cmphs.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1>, 
<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.wide.nxv16i8(<vscale x 16 x 
i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.cmphs.wide.nxv8i16(<vscale x 8 x 
i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.cmphs.wide.nxv4i32(<vscale x 4 x 
i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
 
 declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
 
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale 
x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale 
x 4 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale 
x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale 
x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll 
b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
index 3513acef7bbc..e3616af95ee9 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
@@ -31,9 +31,43 @@ define i32 @cmple_wide_nxv16i8(<vscale x 16 x i1> %pg, 
<vscale x 16 x i8> %a, <v
   ret i32 %conv
 }
 
+define i32 @cmple_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmple_wide_nxv8i16:
+; CHECK: cmple p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
+define i32 @cmple_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmple_wide_nxv4i32:
+; CHECK: cmple p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1>, 
<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x 
i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.cmple.wide.nxv8i16(<vscale x 8 x 
i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.cmple.wide.nxv4i32(<vscale x 4 x 
i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
 
 declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
 
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale 
x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale 
x 4 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale 
x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale 
x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll 
b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
index eae748d56e05..5701b8049150 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
@@ -31,9 +31,43 @@ define i32 @cmplo_wide_nxv16i8(<vscale x 16 x i1> %pg, 
<vscale x 16 x i8> %a, <v
   ret i32 %conv
 }
 
+define i32 @cmplo_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmplo_wide_nxv8i16:
+; CHECK: cmplo p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.cmplo.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
+define i32 @cmplo_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmplo_wide_nxv4i32:
+; CHECK: cmplo p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.cmplo.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1>, 
<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmplo.wide.nxv16i8(<vscale x 16 x 
i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.cmplo.wide.nxv8i16(<vscale x 8 x 
i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.cmplo.wide.nxv4i32(<vscale x 4 x 
i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
 
 declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
 
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale 
x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale 
x 4 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale 
x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale 
x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll 
b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
index d53ece953cbf..5f6d01f7d1f7 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
@@ -31,9 +31,43 @@ define i32 @cmpls_wide_nxv16i8(<vscale x 16 x i1> %pg, 
<vscale x 16 x i8> %a, <v
   ret i32 %conv
 }
 
+define i32 @cmpls_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpls_wide_nxv8i16:
+; CHECK: cmpls p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.cmpls.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
+define i32 @cmpls_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpls_wide_nxv4i32:
+; CHECK: cmpls p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.cmpls.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1>, 
<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpls.wide.nxv16i8(<vscale x 16 x 
i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpls.wide.nxv8i16(<vscale x 8 x 
i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpls.wide.nxv4i32(<vscale x 4 x 
i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
 
 declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
 
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale 
x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale 
x 4 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale 
x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale 
x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll 
b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
index fca33d72bce4..cee219725366 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
@@ -31,9 +31,43 @@ define i32 @cmplt_wide_nxv16i8(<vscale x 16 x i1> %pg, 
<vscale x 16 x i8> %a, <v
   ret i32 %conv
 }
 
+define i32 @cmplt_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmplt_wide_nxv8i16:
+; CHECK: cmplt p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
+define i32 @cmplt_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmplt_wide_nxv4i32:
+; CHECK: cmplt p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.cmplt.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1>, 
<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmplt.wide.nxv16i8(<vscale x 16 x 
i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.cmplt.wide.nxv8i16(<vscale x 8 x 
i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.cmplt.wide.nxv4i32(<vscale x 4 x 
i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
 
 declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
 
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale 
x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale 
x 4 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale 
x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale 
x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll 
b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
index ead20da2827a..0609d066fef5 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
@@ -46,9 +46,43 @@ define i32 @cmpne_wide_nxv16i8(<vscale x 16 x i1> %pg, 
<vscale x 16 x i8> %a, <v
   ret i32 %conv
 }
 
+define i32 @cmpne_wide_nxv8i16(<vscale x 16 x i1> %pg, <vscale x 8 x i16> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpne_wide_nxv8i16:
+; CHECK: cmpne p0.h, p0/z, z0.h, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
+define i32 @cmpne_wide_nxv4i32(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, 
<vscale x 2 x i64> %b) {
+; CHECK-LABEL: cmpne_wide_nxv4i32:
+; CHECK: cmpne p0.s, p0/z, z0.s, z1.d
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+  %1 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
+  %2 = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> 
%a, <vscale x 2 x i64> %b)
+  %3 = tail call <vscale x 16 x i1> 
@llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %2)
+  %4 = tail call i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1> %pg, 
<vscale x 16 x i1> %3)
+  %conv = zext i1 %4 to i32
+  ret i32 %conv
+}
+
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1>, 
<vscale x 16 x i8>, <vscale x 16 x i8>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x 
i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.cmpne.wide.nxv8i16(<vscale x 8 x 
i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.cmpne.wide.nxv4i32(<vscale x 4 x 
i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
 
 declare i1 @llvm.aarch64.sve.ptest.any(<vscale x 16 x i1>, <vscale x 16 x i1>)
 
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale 
x 8 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale 
x 4 x i1>)
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale 
x 16 x i1>)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale 
x 16 x i1>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll 
b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
index 5416f0c976e8..834841e08447 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -587,5 +587,18 @@ define <vscale x 2 x double> 
@splat_nxv2f64_imm_out_of_range() {
   ret <vscale x 2 x double> %2
 }
 
+; Splat for predicates
+; This guards optimizations that rely on splats of 1 being generated as a ptrue
+
+define <vscale x 2 x i1> @sve_splat_i1_allactive() {
+; CHECK-LABEL: sve_splat_i1_allactive:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ret
+  %ins = insertelement <vscale x 2 x i1> undef, i1 1, i32 0
+  %splat = shufflevector <vscale x 2 x i1> %ins, <vscale x 2 x i1> undef, 
<vscale x 2 x i32> zeroinitializer
+  ret <vscale x 2 x i1> %splat
+}
+
 ; +bf16 is required for the bfloat version.
 attributes #0 = { "target-features"="+sve,+bf16" }


        
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] fef8113 - [SVE] Optimize new cases for lowerConvertToSVBool

Reply via email to