https://github.com/goldsteinn updated https://github.com/llvm/llvm-project/pull/88183
>From 31b373984bcbb51db9f1d1c939492515fb721c8d Mon Sep 17 00:00:00 2001 From: Noah Goldstein <goldstein....@gmail.com> Date: Sat, 4 May 2024 18:12:34 -0500 Subject: [PATCH 1/5] [Inliner] Add tests for propagating more parameter attributes; NFC --- .../Inline/access-attributes-prop.ll | 116 +++++++++++++++++- 1 file changed, 114 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll b/llvm/test/Transforms/Inline/access-attributes-prop.ll index ffd31fbe8ae107..125d3f963e1338 100644 --- a/llvm/test/Transforms/Inline/access-attributes-prop.ll +++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll @@ -46,7 +46,6 @@ define dso_local void @foo3_writable(ptr %p) { ret void } - define dso_local void @foo1_bar_aligned64_deref512(ptr %p) { ; CHECK-LABEL: define {{[^@]+}}@foo1_bar_aligned64_deref512 ; CHECK-SAME: (ptr [[P:%.*]]) { @@ -322,6 +321,16 @@ define void @prop_param_nonnull_and_align(ptr %p) { ret void } +define void @prop_param_nofree_and_align(ptr %p) { +; CHECK-LABEL: define {{[^@]+}}@prop_param_nofree_and_align +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: call void @bar1(ptr [[P]]) +; CHECK-NEXT: ret void +; + call void @foo1(ptr nofree align 32 %p) + ret void +} + define void @prop_param_deref_align_no_update(ptr %p) { ; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_no_update ; CHECK-SAME: (ptr [[P:%.*]]) { @@ -528,7 +537,6 @@ define void @prop_no_conflict_writable(ptr %p) { ret void } - define void @prop_no_conflict_writable2(ptr %p) { ; CHECK-LABEL: define {{[^@]+}}@prop_no_conflict_writable2 ; CHECK-SAME: (ptr [[P:%.*]]) { @@ -539,3 +547,107 @@ define void @prop_no_conflict_writable2(ptr %p) { ret void } +declare void @bar4(i32) + +define dso_local void @foo4_range_0_10(i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@foo4_range_0_10 +; CHECK-SAME: (i32 [[V:%.*]]) { +; CHECK-NEXT: call void @bar4(i32 range(i32 0, 10) [[V]]) +; CHECK-NEXT: ret void +; + call void @bar4(i32 range(i32 0, 10) %v) + ret void +} + +define dso_local void @foo4_2_range_0_10(i32 range(i32 0, 10) %v) { +; CHECK-LABEL: define {{[^@]+}}@foo4_2_range_0_10 +; CHECK-SAME: (i32 range(i32 0, 10) [[V:%.*]]) { +; CHECK-NEXT: call void @bar4(i32 [[V]]) +; CHECK-NEXT: ret void +; + call void @bar4(i32 %v) + ret void +} + + +define dso_local void @foo4(i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@foo4 +; CHECK-SAME: (i32 [[V:%.*]]) { +; CHECK-NEXT: call void @bar4(i32 [[V]]) +; CHECK-NEXT: ret void +; + call void @bar4(i32 %v) + ret void +} + + + +define void @prop_range_empty_intersect(i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_intersect +; CHECK-SAME: (i32 [[V:%.*]]) { +; CHECK-NEXT: call void @bar4(i32 range(i32 0, 10) [[V]]) +; CHECK-NEXT: ret void +; + call void @foo4_range_0_10(i32 range(i32 11, 50) %v) + ret void +} + +define void @prop_range_empty(i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@prop_range_empty +; CHECK-SAME: (i32 [[V:%.*]]) { +; CHECK-NEXT: call void @bar4(i32 [[V]]) +; CHECK-NEXT: ret void +; + call void @foo4(i32 range(i32 1, 0) %v) + ret void +} + +define void @prop_range_empty_with_intersect(i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_with_intersect +; CHECK-SAME: (i32 [[V:%.*]]) { +; CHECK-NEXT: call void @bar4(i32 range(i32 0, 10) [[V]]) +; CHECK-NEXT: ret void +; + call void @foo4_range_0_10(i32 range(i32 1, 0) %v) + ret void +} + +define void @prop_range_intersect1(i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect1 +; CHECK-SAME: (i32 [[V:%.*]]) { +; CHECK-NEXT: call void @bar4(i32 range(i32 0, 10) [[V]]) +; CHECK-NEXT: ret void +; + call void @foo4_range_0_10(i32 range(i32 0, 9) %v) + ret void +} + +define void @prop_range_intersect2(i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect2 +; CHECK-SAME: (i32 [[V:%.*]]) { +; CHECK-NEXT: call void @bar4(i32 range(i32 0, 10) [[V]]) +; CHECK-NEXT: ret void +; + call void @foo4_range_0_10(i32 range(i32 1, 9) %v) + ret void +} + +define void @prop_range_intersect3(i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect3 +; CHECK-SAME: (i32 [[V:%.*]]) { +; CHECK-NEXT: call void @bar4(i32 [[V]]) +; CHECK-NEXT: ret void +; + call void @foo4_2_range_0_10(i32 range(i32 0, 11) %v) + ret void +} + +define void @prop_range_direct(i32 %v) { +; CHECK-LABEL: define {{[^@]+}}@prop_range_direct +; CHECK-SAME: (i32 [[V:%.*]]) { +; CHECK-NEXT: call void @bar4(i32 [[V]]) +; CHECK-NEXT: ret void +; + call void @foo4(i32 range(i32 1, 11) %v) + ret void +} >From 238dd3b7d8d9da5f09161df58106b635fd9cbe97 Mon Sep 17 00:00:00 2001 From: Noah Goldstein <goldstein....@gmail.com> Date: Sat, 4 May 2024 13:57:45 -0500 Subject: [PATCH 2/5] [Inliner] Propagate more attributes to params when inlining Add support for propagating: - `derefereancable` - `derefereancable_or_null` - `align` - `nonnull` - `nofree` These are only propagated if the parameter to the to-be-inlined callsite match the exact parameter used in the to-be-inlined function. --- .../test/CodeGen/attr-counted-by-pr88931.cpp | 2 +- clang/test/OpenMP/bug57757.cpp | 2 +- llvm/lib/Transforms/Utils/InlineFunction.cpp | 74 +++++++++++++++---- .../Inline/access-attributes-prop.ll | 16 ++-- .../Inline/assumptions-from-callsite-attrs.ll | 2 +- llvm/test/Transforms/Inline/byval.ll | 4 +- 6 files changed, 74 insertions(+), 26 deletions(-) diff --git a/clang/test/CodeGen/attr-counted-by-pr88931.cpp b/clang/test/CodeGen/attr-counted-by-pr88931.cpp index 2a8cc1d07e50d9..6d0c46bbbe8f9c 100644 --- a/clang/test/CodeGen/attr-counted-by-pr88931.cpp +++ b/clang/test/CodeGen/attr-counted-by-pr88931.cpp @@ -13,7 +13,7 @@ void init(void * __attribute__((pass_dynamic_object_size(0)))); // CHECK-LABEL: define dso_local void @_ZN3foo3barC1Ev( // CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @_Z4initPvU25pass_dynamic_object_size0(ptr noundef nonnull [[THIS]], i64 noundef -1) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: tail call void @_Z4initPvU25pass_dynamic_object_size0(ptr noundef nonnull align 4 dereferenceable(1) [[THIS]], i64 noundef -1) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // foo::bar::bar() { diff --git a/clang/test/OpenMP/bug57757.cpp b/clang/test/OpenMP/bug57757.cpp index e1f646e2b141a0..c4e309d7f566b5 100644 --- a/clang/test/OpenMP/bug57757.cpp +++ b/clang/test/OpenMP/bug57757.cpp @@ -39,7 +39,7 @@ void foo() { // CHECK-NEXT: ] // CHECK: .untied.jmp..i: // CHECK-NEXT: store i32 1, ptr [[TMP2]], align 4, !tbaa [[TBAA16]], !alias.scope [[META13]], !noalias [[META17]] -// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]), !noalias [[META13]] +// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP1]]), !noalias [[META13]] // CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] // CHECK: .untied.next..i: // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40 diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 1aae561d8817b5..45bccd0a041509 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1352,20 +1352,41 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB, auto &Context = CalledFunction->getContext(); // Collect valid attributes for all params. - SmallVector<AttrBuilder> ValidParamAttrs; + SmallVector<AttrBuilder> ValidObjParamAttrs, ValidExactParamAttrs; bool HasAttrToPropagate = false; for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) { - ValidParamAttrs.emplace_back(AttrBuilder{CB.getContext()}); + ValidObjParamAttrs.emplace_back(AttrBuilder{CB.getContext()}); + ValidExactParamAttrs.emplace_back(AttrBuilder{CB.getContext()}); // Access attributes can be propagated to any param with the same underlying // object as the argument. if (CB.paramHasAttr(I, Attribute::ReadNone)) - ValidParamAttrs.back().addAttribute(Attribute::ReadNone); + ValidObjParamAttrs.back().addAttribute(Attribute::ReadNone); if (CB.paramHasAttr(I, Attribute::ReadOnly)) - ValidParamAttrs.back().addAttribute(Attribute::ReadOnly); + ValidObjParamAttrs.back().addAttribute(Attribute::ReadOnly); if (CB.paramHasAttr(I, Attribute::WriteOnly)) - ValidParamAttrs.back().addAttribute(Attribute::WriteOnly); - HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes(); + ValidObjParamAttrs.back().addAttribute(Attribute::WriteOnly); + + // Attributes we can only propagate if the exact parameter is forwarded. + + // We can propagate both poison generating an UB generating attributes + // without any extra checks. The only attribute that is tricky to propagate + // is `noundef` (skipped for now) as that can create new UB where previous + // behavior was just using a poison value. + if (auto DerefBytes = CB.getParamDereferenceableBytes(I)) + ValidExactParamAttrs.back().addDereferenceableAttr(DerefBytes); + if (auto DerefOrNullBytes = CB.getParamDereferenceableOrNullBytes(I)) + ValidExactParamAttrs.back().addDereferenceableOrNullAttr( + DerefOrNullBytes); + if (CB.paramHasAttr(I, Attribute::NoFree)) + ValidExactParamAttrs.back().addAttribute(Attribute::NoFree); + if (CB.paramHasAttr(I, Attribute::NonNull)) + ValidExactParamAttrs.back().addAttribute(Attribute::NonNull); + if (auto Align = CB.getParamAlign(I)) + ValidExactParamAttrs.back().addAlignmentAttr(Align); + + HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes(); + HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes(); } // Won't be able to propagate anything. @@ -1383,15 +1404,42 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB, AttributeList AL = NewInnerCB->getAttributes(); for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) { // Check if the underlying value for the parameter is an argument. - const Value *UnderlyingV = - getUnderlyingObject(InnerCB->getArgOperand(I)); - const Argument *Arg = dyn_cast<Argument>(UnderlyingV); - if (!Arg) - continue; + const Argument *Arg = dyn_cast<Argument>(InnerCB->getArgOperand(I)); + unsigned ArgNo; + if (Arg) { + ArgNo = Arg->getArgNo(); + // For dereferenceable, dereferenceable_or_null, align, etc... + // we don't want to propagate if the existing param has the same + // attribute with "better" constraints. So, only remove from the + // existing AL if the region of the existing param is smaller than + // what we can propagate. AttributeList's merge API honours the + // already existing attribute value so we choose the "better" + // attribute by removing if the existing one is worse. + if (AL.getParamDereferenceableBytes(I) < + ValidExactParamAttrs[ArgNo].getDereferenceableBytes()) + AL = + AL.removeParamAttribute(Context, I, Attribute::Dereferenceable); + if (AL.getParamDereferenceableOrNullBytes(I) < + ValidExactParamAttrs[ArgNo].getDereferenceableOrNullBytes()) + AL = + AL.removeParamAttribute(Context, I, Attribute::Dereferenceable); + if (AL.getParamAlignment(I).valueOrOne() < + ValidExactParamAttrs[ArgNo].getAlignment().valueOrOne()) + AL = AL.removeParamAttribute(Context, I, Attribute::Alignment); + + AL = AL.addParamAttributes(Context, I, ValidExactParamAttrs[ArgNo]); + + } else { + const Value *UnderlyingV = + getUnderlyingObject(InnerCB->getArgOperand(I)); + Arg = dyn_cast<Argument>(UnderlyingV); + if (!Arg) + continue; + ArgNo = Arg->getArgNo(); + } - unsigned ArgNo = Arg->getArgNo(); // If so, propagate its access attributes. - AL = AL.addParamAttributes(Context, I, ValidParamAttrs[ArgNo]); + AL = AL.addParamAttributes(Context, I, ValidObjParamAttrs[ArgNo]); // We can have conflicting attributes from the inner callsite and // to-be-inlined callsite. In that case, choose the most // restrictive. diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll b/llvm/test/Transforms/Inline/access-attributes-prop.ll index 125d3f963e1338..f3c656be00f59b 100644 --- a/llvm/test/Transforms/Inline/access-attributes-prop.ll +++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll @@ -294,7 +294,7 @@ define void @prop_param_callbase_def_1x_partial_3(ptr %p, ptr %p2) { define void @prop_deref(ptr %p) { ; CHECK-LABEL: define {{[^@]+}}@prop_deref ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: call void @bar1(ptr [[P]]) +; CHECK-NEXT: call void @bar1(ptr dereferenceable(16) [[P]]) ; CHECK-NEXT: ret void ; call void @foo1(ptr dereferenceable(16) %p) @@ -304,7 +304,7 @@ define void @prop_deref(ptr %p) { define void @prop_deref_or_null(ptr %p) { ; CHECK-LABEL: define {{[^@]+}}@prop_deref_or_null ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: call void @bar1(ptr [[P]]) +; CHECK-NEXT: call void @bar1(ptr dereferenceable_or_null(256) [[P]]) ; CHECK-NEXT: ret void ; call void @foo1(ptr dereferenceable_or_null(256) %p) @@ -314,7 +314,7 @@ define void @prop_deref_or_null(ptr %p) { define void @prop_param_nonnull_and_align(ptr %p) { ; CHECK-LABEL: define {{[^@]+}}@prop_param_nonnull_and_align ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: call void @bar1(ptr [[P]]) +; CHECK-NEXT: call void @bar1(ptr nonnull align 32 [[P]]) ; CHECK-NEXT: ret void ; call void @foo1(ptr nonnull align 32 %p) @@ -324,7 +324,7 @@ define void @prop_param_nonnull_and_align(ptr %p) { define void @prop_param_nofree_and_align(ptr %p) { ; CHECK-LABEL: define {{[^@]+}}@prop_param_nofree_and_align ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: call void @bar1(ptr [[P]]) +; CHECK-NEXT: call void @bar1(ptr nofree align 32 [[P]]) ; CHECK-NEXT: ret void ; call void @foo1(ptr nofree align 32 %p) @@ -334,7 +334,7 @@ define void @prop_param_nofree_and_align(ptr %p) { define void @prop_param_deref_align_no_update(ptr %p) { ; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_no_update ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: call void @bar1(ptr align 64 dereferenceable(512) [[P]]) +; CHECK-NEXT: call void @bar1(ptr align 4 dereferenceable(64) [[P]]) ; CHECK-NEXT: ret void ; call void @foo1_bar_aligned64_deref512(ptr align 4 dereferenceable(64) %p) @@ -344,7 +344,7 @@ define void @prop_param_deref_align_no_update(ptr %p) { define void @prop_param_deref_align_update(ptr %p) { ; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_align_update ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: call void @bar1(ptr align 64 dereferenceable(512) [[P]]) +; CHECK-NEXT: call void @bar1(ptr align 128 dereferenceable(1024) [[P]]) ; CHECK-NEXT: ret void ; call void @foo1_bar_aligned64_deref512(ptr align 128 dereferenceable(1024) %p) @@ -354,7 +354,7 @@ define void @prop_param_deref_align_update(ptr %p) { define void @prop_param_deref_or_null_update(ptr %p) { ; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_or_null_update ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(512) [[P]]) +; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(1024) [[P]]) ; CHECK-NEXT: ret void ; call void @foo1_bar_aligned512_deref_or_null512(ptr dereferenceable_or_null(1024) %p) @@ -364,7 +364,7 @@ define void @prop_param_deref_or_null_update(ptr %p) { define void @prop_param_deref_or_null_no_update(ptr %p) { ; CHECK-LABEL: define {{[^@]+}}@prop_param_deref_or_null_no_update ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(512) [[P]]) +; CHECK-NEXT: call void @bar1(ptr align 512 dereferenceable_or_null(32) [[P]]) ; CHECK-NEXT: ret void ; call void @foo1_bar_aligned512_deref_or_null512(ptr dereferenceable_or_null(32) %p) diff --git a/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll b/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll index 1a219a22019c43..c0943f4aefb8f9 100644 --- a/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll +++ b/llvm/test/Transforms/Inline/assumptions-from-callsite-attrs.ll @@ -8,7 +8,7 @@ declare void @h(ptr %p, ptr %q, ptr %z) define void @f(ptr %p, ptr %q, ptr %z) { ; CHECK-LABEL: define void @f ; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[Z:%.*]]) { -; CHECK-NEXT: call void @h(ptr [[P]], ptr [[Q]], ptr [[Z]]) +; CHECK-NEXT: call void @h(ptr nonnull [[P]], ptr [[Q]], ptr nonnull [[Z]]) ; CHECK-NEXT: ret void ; call void @g(ptr nonnull %p, ptr %q, ptr nonnull %z) diff --git a/llvm/test/Transforms/Inline/byval.ll b/llvm/test/Transforms/Inline/byval.ll index dd5be40b90a8f2..1a70da8472cb1e 100644 --- a/llvm/test/Transforms/Inline/byval.ll +++ b/llvm/test/Transforms/Inline/byval.ll @@ -106,7 +106,7 @@ define void @test3() nounwind { ; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 12, ptr [[S1]]) ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[S1]], ptr align 1 [[S]], i64 12, i1 false) -; CHECK-NEXT: call void @g3(ptr [[S1]]) #[[ATTR0]] +; CHECK-NEXT: call void @g3(ptr align 64 [[S1]]) #[[ATTR0]] ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 12, ptr [[S1]]) ; CHECK-NEXT: ret void ; @@ -131,7 +131,7 @@ define i32 @test4() nounwind { ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 64 -; CHECK-NEXT: call void @g3(ptr [[S]]) #[[ATTR0]] +; CHECK-NEXT: call void @g3(ptr align 64 [[S]]) #[[ATTR0]] ; CHECK-NEXT: ret i32 4 ; entry: >From 420da827d1750d6ce10e469d9c36d12b434107df Mon Sep 17 00:00:00 2001 From: Noah Goldstein <goldstein....@gmail.com> Date: Sat, 4 May 2024 13:57:54 -0500 Subject: [PATCH 3/5] [Inliner] Propagate `range` attributes to params when inlining --- llvm/include/llvm/IR/Attributes.h | 12 ++++++++++ llvm/include/llvm/IR/InstrTypes.h | 4 ++++ llvm/lib/IR/Attributes.cpp | 22 +++++++++++++++++++ llvm/lib/IR/Instructions.cpp | 7 ++++++ llvm/lib/Transforms/Utils/InlineFunction.cpp | 14 ++++++++++++ .../Inline/access-attributes-prop.ll | 14 ++++++------ 6 files changed, 66 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index dd11955714895e..337254906db885 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -752,6 +752,11 @@ class AttributeList { [[nodiscard]] AttributeList addRangeRetAttr(LLVMContext &C, const ConstantRange &CR) const; + /// Add the range attribute to the attribute set at the given arg index. + /// Returns a new list because attribute lists are immutable. + [[nodiscard]] AttributeList addRangeParamAttr(LLVMContext &C, unsigned Index, + const ConstantRange &CR) const; + /// Add the allocsize attribute to the attribute set at the given arg index. /// Returns a new list because attribute lists are immutable. [[nodiscard]] AttributeList @@ -906,6 +911,9 @@ class AttributeList { /// arg. uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const; + /// Get range (or std::nullopt if unknown) of an arg. + std::optional<ConstantRange> getParamRange(unsigned ArgNo) const; + /// Get the disallowed floating-point classes of the return value. FPClassTest getRetNoFPClass() const; @@ -1082,6 +1090,10 @@ class AttrBuilder { /// invalid if the Kind is not present in the builder. Attribute getAttribute(StringRef Kind) const; + /// Retrieve the range if the attribute exists (std::nullopt is returned + /// otherwise). + std::optional<ConstantRange> getRange() const; + /// Return raw (possibly packed/encoded) value of integer attribute or /// std::nullopt if not set. std::optional<uint64_t> getRawIntAttr(Attribute::AttrKind Kind) const; diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index b9af3a6ca42c06..87335f0b28c6b4 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -2198,6 +2198,10 @@ class CallBase : public Instruction { /// parameter. FPClassTest getParamNoFPClass(unsigned i) const; + /// If arg ArgNo has a range attribute, return the value range of the + /// argument. Otherwise, std::nullopt is returned. + std::optional<ConstantRange> getParamRange(unsigned ArgNo) const; + /// If this return value has a range attribute, return the value range of the /// argument. Otherwise, std::nullopt is returned. std::optional<ConstantRange> getRange() const; diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index c8d6bdd423878b..0cbfe923032c86 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -1530,6 +1530,13 @@ AttributeList::addDereferenceableOrNullParamAttr(LLVMContext &C, unsigned Index, return addParamAttributes(C, Index, B); } +AttributeList AttributeList::addRangeParamAttr(LLVMContext &C, unsigned Index, + const ConstantRange &CR) const { + AttrBuilder B(C); + B.addRangeAttr(CR); + return addParamAttributes(C, Index, B); +} + AttributeList AttributeList::addRangeRetAttr(LLVMContext &C, const ConstantRange &CR) const { AttrBuilder B(C); @@ -1658,6 +1665,14 @@ AttributeList::getParamDereferenceableOrNullBytes(unsigned Index) const { return getParamAttrs(Index).getDereferenceableOrNullBytes(); } +std::optional<ConstantRange> +AttributeList::getParamRange(unsigned Index) const { + auto RangeAttr = getParamAttrs(Index).getAttribute(Attribute::Range); + if (RangeAttr.isValid()) + return RangeAttr.getRange(); + return std::nullopt; +} + FPClassTest AttributeList::getRetNoFPClass() const { return getRetAttrs().getNoFPClass(); } @@ -1991,6 +2006,13 @@ Attribute AttrBuilder::getAttribute(StringRef A) const { return {}; } +std::optional<ConstantRange> AttrBuilder::getRange() const { + const Attribute RangeAttr = getAttribute(Attribute::Range); + if (RangeAttr.isValid()) + return RangeAttr.getRange(); + return std::nullopt; +} + bool AttrBuilder::contains(Attribute::AttrKind A) const { return getAttribute(A).isValid(); } diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 7ad1ad4cddb703..ee832d2093a132 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -396,6 +396,13 @@ FPClassTest CallBase::getParamNoFPClass(unsigned i) const { return Mask; } +std::optional<ConstantRange> CallBase::getParamRange(unsigned ArgNo) const { + const Attribute RangeAttr = getParamAttr(ArgNo, llvm::Attribute::Range); + if (RangeAttr.isValid()) + return RangeAttr.getRange(); + return std::nullopt; +} + std::optional<ConstantRange> CallBase::getRange() const { const Attribute RangeAttr = getRetAttr(llvm::Attribute::Range); if (RangeAttr.isValid()) diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 45bccd0a041509..41f899fe120f63 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1384,6 +1384,8 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB, ValidExactParamAttrs.back().addAttribute(Attribute::NonNull); if (auto Align = CB.getParamAlign(I)) ValidExactParamAttrs.back().addAlignmentAttr(Align); + if (auto Range = CB.getParamRange(I)) + ValidExactParamAttrs.back().addRangeAttr(*Range); HasAttrToPropagate |= ValidObjParamAttrs.back().hasAttributes(); HasAttrToPropagate |= ValidExactParamAttrs.back().hasAttributes(); @@ -1427,8 +1429,20 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB, ValidExactParamAttrs[ArgNo].getAlignment().valueOrOne()) AL = AL.removeParamAttribute(Context, I, Attribute::Alignment); + auto ExistingRange = AL.getParamRange(I); AL = AL.addParamAttributes(Context, I, ValidExactParamAttrs[ArgNo]); + // For range we use the exact intersection. + if (ExistingRange.has_value()) { + if (auto NewRange = ValidExactParamAttrs[ArgNo].getRange()) { + auto CombinedRange = ExistingRange->exactIntersectWith(*NewRange); + if (!CombinedRange.has_value()) + CombinedRange = + ConstantRange::getEmpty(NewRange->getBitWidth()); + AL = AL.removeParamAttribute(Context, I, Attribute::Range); + AL = AL.addRangeParamAttr(Context, I, *CombinedRange); + } + } } else { const Value *UnderlyingV = getUnderlyingObject(InnerCB->getArgOperand(I)); diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll b/llvm/test/Transforms/Inline/access-attributes-prop.ll index f3c656be00f59b..e25023da6ed5ff 100644 --- a/llvm/test/Transforms/Inline/access-attributes-prop.ll +++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll @@ -585,7 +585,7 @@ define dso_local void @foo4(i32 %v) { define void @prop_range_empty_intersect(i32 %v) { ; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_intersect ; CHECK-SAME: (i32 [[V:%.*]]) { -; CHECK-NEXT: call void @bar4(i32 range(i32 0, 10) [[V]]) +; CHECK-NEXT: call void @bar4(i32 range(i32 0, 0) [[V]]) ; CHECK-NEXT: ret void ; call void @foo4_range_0_10(i32 range(i32 11, 50) %v) @@ -595,7 +595,7 @@ define void @prop_range_empty_intersect(i32 %v) { define void @prop_range_empty(i32 %v) { ; CHECK-LABEL: define {{[^@]+}}@prop_range_empty ; CHECK-SAME: (i32 [[V:%.*]]) { -; CHECK-NEXT: call void @bar4(i32 [[V]]) +; CHECK-NEXT: call void @bar4(i32 range(i32 1, 0) [[V]]) ; CHECK-NEXT: ret void ; call void @foo4(i32 range(i32 1, 0) %v) @@ -605,7 +605,7 @@ define void @prop_range_empty(i32 %v) { define void @prop_range_empty_with_intersect(i32 %v) { ; CHECK-LABEL: define {{[^@]+}}@prop_range_empty_with_intersect ; CHECK-SAME: (i32 [[V:%.*]]) { -; CHECK-NEXT: call void @bar4(i32 range(i32 0, 10) [[V]]) +; CHECK-NEXT: call void @bar4(i32 range(i32 1, 10) [[V]]) ; CHECK-NEXT: ret void ; call void @foo4_range_0_10(i32 range(i32 1, 0) %v) @@ -615,7 +615,7 @@ define void @prop_range_empty_with_intersect(i32 %v) { define void @prop_range_intersect1(i32 %v) { ; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect1 ; CHECK-SAME: (i32 [[V:%.*]]) { -; CHECK-NEXT: call void @bar4(i32 range(i32 0, 10) [[V]]) +; CHECK-NEXT: call void @bar4(i32 range(i32 0, 9) [[V]]) ; CHECK-NEXT: ret void ; call void @foo4_range_0_10(i32 range(i32 0, 9) %v) @@ -625,7 +625,7 @@ define void @prop_range_intersect1(i32 %v) { define void @prop_range_intersect2(i32 %v) { ; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect2 ; CHECK-SAME: (i32 [[V:%.*]]) { -; CHECK-NEXT: call void @bar4(i32 range(i32 0, 10) [[V]]) +; CHECK-NEXT: call void @bar4(i32 range(i32 1, 9) [[V]]) ; CHECK-NEXT: ret void ; call void @foo4_range_0_10(i32 range(i32 1, 9) %v) @@ -635,7 +635,7 @@ define void @prop_range_intersect2(i32 %v) { define void @prop_range_intersect3(i32 %v) { ; CHECK-LABEL: define {{[^@]+}}@prop_range_intersect3 ; CHECK-SAME: (i32 [[V:%.*]]) { -; CHECK-NEXT: call void @bar4(i32 [[V]]) +; CHECK-NEXT: call void @bar4(i32 range(i32 0, 11) [[V]]) ; CHECK-NEXT: ret void ; call void @foo4_2_range_0_10(i32 range(i32 0, 11) %v) @@ -645,7 +645,7 @@ define void @prop_range_intersect3(i32 %v) { define void @prop_range_direct(i32 %v) { ; CHECK-LABEL: define {{[^@]+}}@prop_range_direct ; CHECK-SAME: (i32 [[V:%.*]]) { -; CHECK-NEXT: call void @bar4(i32 [[V]]) +; CHECK-NEXT: call void @bar4(i32 range(i32 1, 11) [[V]]) ; CHECK-NEXT: ret void ; call void @foo4(i32 range(i32 1, 11) %v) >From 62a190bfac3c59feb9a97bb606e245fce23f3d08 Mon Sep 17 00:00:00 2001 From: Noah Goldstein <goldstein....@gmail.com> Date: Tue, 9 Apr 2024 14:36:08 -0500 Subject: [PATCH 4/5] [InstCombine] Add tests for folding `(icmp eq/ne (or (select cond, 0/NZ, 0/NZ), X), 0)`; NFC --- .../icmp-or-of-select-with-zero.ll | 247 ++++++++++++++++++ 1 file changed, 247 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll diff --git a/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll b/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll new file mode 100644 index 00000000000000..45537c2f2fbfee --- /dev/null +++ b/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll @@ -0,0 +1,247 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +declare void @use.i8(i8) +declare void @use.i1(i1) +define i1 @src_tv_eq(i1 %c0, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_tv_eq( +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]] +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nuw i8 %yy, 1 + %sel = select i1 %c0, i8 0, i8 %y + %selx = or i8 %sel, %x + %r = icmp eq i8 %selx, 0 + ret i1 %r +} + +define i1 @src_tv_eq_multiuse_or_fail(i1 %c0, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_tv_eq_multiuse_or_fail( +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]] +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX]], 0 +; CHECK-NEXT: call void @use.i8(i8 [[SELX]]) +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nuw i8 %yy, 1 + %sel = select i1 %c0, i8 0, i8 %y + %selx = or i8 %sel, %x + %r = icmp eq i8 %selx, 0 + call void @use.i8(i8 %selx) + ret i1 %r +} + +define i1 @src_tv_eq_fail_tv_nonzero(i1 %c0, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_tv_eq_fail_tv_nonzero( +; CHECK-NEXT: [[Y:%.*]] = add nsw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 1, i8 [[Y]] +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nsw i8 %yy, 1 + %sel = select i1 %c0, i8 1, i8 %y + %selx = or i8 %sel, %x + %r = icmp eq i8 %selx, 0 + ret i1 %r +} + +define i1 @src_fv_ne(i1 %c0, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_fv_ne( +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0 +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[SELX]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nuw i8 %yy, 1 + %sel = select i1 %c0, i8 %y, i8 0 + %selx = or i8 %sel, %x + %r = icmp ne i8 %selx, 0 + ret i1 %r +} + +define i1 @src_fv_ne_fail_maybe_zero(i1 %c0, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_fv_ne_fail_maybe_zero( +; CHECK-NEXT: [[Y:%.*]] = add nsw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0 +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[SELX]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nsw i8 %yy, 1 + %sel = select i1 %c0, i8 %y, i8 0 + %selx = or i8 %sel, %x + %r = icmp ne i8 %selx, 0 + ret i1 %r +} + +define i1 @src_tv_ne(i1 %c0, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_tv_ne( +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]] +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[SELX]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nuw i8 %yy, 1 + %sel = select i1 %c0, i8 0, i8 %y + %selx = or i8 %sel, %x + %r = icmp ne i8 %selx, 0 + ret i1 %r +} + +define i1 @src_tv_ne_fail_cmp_nonzero(i1 %c0, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_tv_ne_fail_cmp_nonzero( +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]] +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[SELX]], 1 +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nuw i8 %yy, 1 + %sel = select i1 %c0, i8 0, i8 %y + %selx = or i8 %sel, %x + %r = icmp ne i8 %selx, 1 + ret i1 %r +} + +define i1 @src_fv_eq(i1 %c0, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_fv_eq( +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0 +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nuw i8 %yy, 1 + %sel = select i1 %c0, i8 %y, i8 0 + %selx = or i8 %sel, %x + %r = icmp eq i8 %selx, 0 + ret i1 %r +} + +define i1 @src_fv_eq_fail_cant_invert(i1 %c0, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_fv_eq_fail_cant_invert( +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0 +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX]], 0 +; CHECK-NEXT: call void @use.i8(i8 [[SEL]]) +; CHECK-NEXT: ret i1 [[R]] +; + %y = add nuw i8 %yy, 1 + %sel = select i1 %c0, i8 %y, i8 0 + %selx = or i8 %sel, %x + %r = icmp eq i8 %selx, 0 + call void @use.i8(i8 %sel) + ret i1 %r +} + +define i1 @src_fv_eq_fail_cant_invert2(i1 %c1, i8 %a, i8 %b, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_fv_eq_fail_cant_invert2( +; CHECK-NEXT: [[C0:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0]], i8 [[Y]], i8 0 +; CHECK-NEXT: [[CC:%.*]] = or i1 [[C0]], [[C1:%.*]] +; CHECK-NEXT: [[SEL_OTHER:%.*]] = select i1 [[CC]], i8 [[Y]], i8 [[B]] +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX]], 0 +; CHECK-NEXT: call void @use.i8(i8 [[SEL]]) +; CHECK-NEXT: call void @use.i8(i8 [[SEL_OTHER]]) +; CHECK-NEXT: ret i1 [[R]] +; + %c0 = icmp ugt i8 %a, %b + %y = add nuw i8 %yy, 1 + %sel = select i1 %c0, i8 %y, i8 0 + %cc = or i1 %c0, %c1 + %sel_other = select i1 %cc, i8 %y, i8 %b + + %selx = or i8 %sel, %x + %r = icmp eq i8 %selx, 0 + call void @use.i8(i8 %sel) + call void @use.i8(i8 %sel_other) + ret i1 %r +} + +define i1 @src_fv_eq_invert2(i1 %c1, i8 %a, i8 %b, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_fv_eq_invert2( +; CHECK-NEXT: [[C0:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0]], i8 [[Y]], i8 0 +; CHECK-NEXT: [[CC:%.*]] = or i1 [[C0]], [[C1:%.*]] +; CHECK-NEXT: [[SEL_OTHER:%.*]] = select i1 [[CC]], i8 [[Y]], i8 [[B]] +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX]], 0 +; CHECK-NEXT: call void @use.i8(i8 [[SEL_OTHER]]) +; CHECK-NEXT: ret i1 [[R]] +; + %c0 = icmp ugt i8 %a, %b + %y = add nuw i8 %yy, 1 + %sel = select i1 %c0, i8 %y, i8 0 + %cc = or i1 %c0, %c1 + %sel_other = select i1 %cc, i8 %y, i8 %b + + %selx = or i8 %sel, %x + %r = icmp eq i8 %selx, 0 + call void @use.i8(i8 %sel_other) + ret i1 %r +} + +define i1 @src_fv_eq_invert3(i8 %a, i8 %b, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_fv_eq_invert3( +; CHECK-NEXT: [[C0:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0]], i8 [[Y]], i8 0 +; CHECK-NEXT: [[SEL_OTHER:%.*]] = select i1 [[C0]], i8 [[Y]], i8 [[B]] +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX]], 0 +; CHECK-NEXT: call void @use.i8(i8 [[SEL_OTHER]]) +; CHECK-NEXT: call void @use.i8(i8 [[SEL]]) +; CHECK-NEXT: ret i1 [[R]] +; + %c0 = icmp ugt i8 %a, %b + %y = add nuw i8 %yy, 1 + %sel = select i1 %c0, i8 %y, i8 0 + %sel_other = select i1 %c0, i8 %y, i8 %b + + %selx = or i8 %sel, %x + %r = icmp eq i8 %selx, 0 + call void @use.i8(i8 %sel_other) + call void @use.i8(i8 %sel) + ret i1 %r +} + +define i1 @src_tv_ne_invert(i1 %c1, i8 %a, i8 %b, i8 %x, i8 %yy) { +; CHECK-LABEL: @src_tv_ne_invert( +; CHECK-NEXT: [[NOT_C0:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: call void @use.i1(i1 [[NOT_C0]]) +; CHECK-NEXT: [[C0:%.*]] = xor i1 [[NOT_C0]], true +; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C0]], i8 [[Y]], i8 0 +; CHECK-NEXT: [[CC:%.*]] = or i1 [[C0]], [[C1:%.*]] +; CHECK-NEXT: [[SEL_OTHER:%.*]] = select i1 [[CC]], i8 [[Y]], i8 [[B]] +; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[SELX]], 0 +; CHECK-NEXT: call void @use.i8(i8 [[SEL]]) +; CHECK-NEXT: call void @use.i8(i8 [[SEL_OTHER]]) +; CHECK-NEXT: ret i1 [[R]] +; + %not_c0 = icmp ugt i8 %a, %b + call void @use.i1(i1 %not_c0) + %c0 = xor i1 %not_c0, true + %y = add nuw i8 %yy, 1 + %sel = select i1 %c0, i8 0, i8 %y + %cc = or i1 %c0, %c1 + %sel_other = select i1 %cc, i8 %y, i8 %b + + %selx = or i8 %sel, %x + %r = icmp ne i8 %selx, 0 + call void @use.i8(i8 %sel) + call void @use.i8(i8 %sel_other) + ret i1 %r +} >From 6c1c79071149cb0feeb266f7673b0f79cbff5048 Mon Sep 17 00:00:00 2001 From: Noah Goldstein <goldstein....@gmail.com> Date: Tue, 9 Apr 2024 14:36:16 -0500 Subject: [PATCH 5/5] [InstCombine] Fold `(icmp eq/ne (or (select cond, 0/NZ, 0/NZ), X), 0)` Four cases: `(icmp eq (or (select cond, 0, NonZero), Other))` -> `(and cond, (icmp eq Other, 0))` `(icmp ne (or (select cond, NonZero, 0), Other))` -> `(or cond, (icmp ne Other, 0))` `(icmp ne (or (select cond, 0, NonZero), Other))` -> `(or (not cond), (icmp ne Other, 0))` `(icmp eq (or (select cond, NonZero, 0), Other))` -> `(and (not cond), (icmp eq Other, 0))` These cases came up in tests on: #88088 Proofs: https://alive2.llvm.org/ce/z/ojGo_J --- .../InstCombine/InstCombineCompares.cpp | 50 +++++++++++++++++++ .../icmp-or-of-select-with-zero.ll | 48 ++++++++---------- 2 files changed, 71 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index c60a290ce72e06..b1bf7cdd51f090 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3483,6 +3483,56 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant( Value *And = Builder.CreateAnd(BOp0, NotBOC); return new ICmpInst(Pred, And, NotBOC); } + // (icmp eq (or (select cond, 0, NonZero), Other)) + // -> (and cond, (icmp eq Other, 0)) + // (icmp ne (or (select cond, NonZero, 0), Other)) + // -> (or cond, (icmp ne Other, 0)) + // (icmp ne (or (select cond, 0, NonZero), Other)) + // -> (or (not cond), (icmp ne Other, 0)) + // (icmp eq (or (select cond, NonZero, 0), Other)) + // -> (and (not cond), (icmp eq Other, 0)) + Value *Cond, *TV, *FV, *Other; + if (C.isZero() && BO->hasOneUse() && + match(BO, m_c_Or(m_Select(m_Value(Cond), m_Value(TV), m_Value(FV)), + m_Value(Other)))) { + const SimplifyQuery Q = SQ.getWithInstruction(&Cmp); + // Easy case is if eq/ne matches whether 0 is trueval/falseval. + if (Pred == ICmpInst::ICMP_EQ + ? (match(TV, m_SpecificInt(C)) && isKnownNonZero(FV, Q)) + : (match(FV, m_SpecificInt(C)) && isKnownNonZero(TV, Q))) { + Value *Cmp = Builder.CreateICmp( + Pred, Other, Constant::getNullValue(Other->getType())); + return BinaryOperator::Create( + Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or, Cmp, + Cond); + } + // Harder case is if eq/ne matches whether 0 is falseval/trueval. In this + // case we need to invert the select condition so we need to be careful to + // avoid creating extra instructions. + if (Pred == ICmpInst::ICMP_EQ + ? (match(FV, m_SpecificInt(C)) && isKnownNonZero(TV, Q)) + : (match(TV, m_SpecificInt(C)) && isKnownNonZero(FV, Q))) { + Value *NotCond = nullptr; + // If the select is one use, we are essentially replacing select with + // `(not Cond)`. + if (match(BO, m_c_Or(m_OneUse(m_Select(m_Specific(Cond), m_Specific(TV), + m_Specific(FV))), + m_Value()))) + NotCond = Builder.CreateNot(Cond); + // Otherwise, see if we can get NotCond for free. + else + NotCond = + getFreelyInverted(Cond, /*WillInvertAllUses=*/false, &Builder); + + if (NotCond) { + Value *Cmp = Builder.CreateICmp( + Pred, Other, Constant::getNullValue(Other->getType())); + return BinaryOperator::Create( + Pred == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or, + Cmp, NotCond); + } + } + } break; } case Instruction::UDiv: diff --git a/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll b/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll index 45537c2f2fbfee..0742066d693560 100644 --- a/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll +++ b/llvm/test/Transforms/InstCombine/icmp-or-of-select-with-zero.ll @@ -5,11 +5,9 @@ declare void @use.i8(i8) declare void @use.i1(i1) define i1 @src_tv_eq(i1 %c0, i8 %x, i8 %yy) { ; CHECK-LABEL: @src_tv_eq( -; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]] -; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX:%.*]], 0 +; CHECK-NEXT: [[R1:%.*]] = and i1 [[R]], [[C0:%.*]] +; CHECK-NEXT: ret i1 [[R1]] ; %y = add nuw i8 %yy, 1 %sel = select i1 %c0, i8 0, i8 %y @@ -52,11 +50,9 @@ define i1 @src_tv_eq_fail_tv_nonzero(i1 %c0, i8 %x, i8 %yy) { define i1 @src_fv_ne(i1 %c0, i8 %x, i8 %yy) { ; CHECK-LABEL: @src_fv_ne( -; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0 -; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[SELX]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[SELX:%.*]], 0 +; CHECK-NEXT: [[R1:%.*]] = or i1 [[R]], [[C0:%.*]] +; CHECK-NEXT: ret i1 [[R1]] ; %y = add nuw i8 %yy, 1 %sel = select i1 %c0, i8 %y, i8 0 @@ -82,11 +78,10 @@ define i1 @src_fv_ne_fail_maybe_zero(i1 %c0, i8 %x, i8 %yy) { define i1 @src_tv_ne(i1 %c0, i8 %x, i8 %yy) { ; CHECK-LABEL: @src_tv_ne( -; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 0, i8 [[Y]] -; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[SELX]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[C0:%.*]], true +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[SELX:%.*]], 0 +; CHECK-NEXT: [[R1:%.*]] = or i1 [[R]], [[TMP1]] +; CHECK-NEXT: ret i1 [[R1]] ; %y = add nuw i8 %yy, 1 %sel = select i1 %c0, i8 0, i8 %y @@ -112,11 +107,10 @@ define i1 @src_tv_ne_fail_cmp_nonzero(i1 %c0, i8 %x, i8 %yy) { define i1 @src_fv_eq(i1 %c0, i8 %x, i8 %yy) { ; CHECK-LABEL: @src_fv_eq( -; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0:%.*]], i8 [[Y]], i8 0 -; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX]], 0 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[C0:%.*]], true +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX:%.*]], 0 +; CHECK-NEXT: [[R1:%.*]] = and i1 [[R]], [[TMP1]] +; CHECK-NEXT: ret i1 [[R1]] ; %y = add nuw i8 %yy, 1 %sel = select i1 %c0, i8 %y, i8 0 @@ -172,13 +166,13 @@ define i1 @src_fv_eq_invert2(i1 %c1, i8 %a, i8 %b, i8 %x, i8 %yy) { ; CHECK-LABEL: @src_fv_eq_invert2( ; CHECK-NEXT: [[C0:%.*]] = icmp ugt i8 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[Y:%.*]] = add nuw i8 [[YY:%.*]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C0]], i8 [[Y]], i8 0 ; CHECK-NEXT: [[CC:%.*]] = or i1 [[C0]], [[C1:%.*]] ; CHECK-NEXT: [[SEL_OTHER:%.*]] = select i1 [[CC]], i8 [[Y]], i8 [[B]] -; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[C0]], true +; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[SELX:%.*]], 0 +; CHECK-NEXT: [[R1:%.*]] = and i1 [[R]], [[TMP1]] ; CHECK-NEXT: call void @use.i8(i8 [[SEL_OTHER]]) -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 [[R1]] ; %c0 = icmp ugt i8 %a, %b %y = add nuw i8 %yy, 1 @@ -225,11 +219,11 @@ define i1 @src_tv_ne_invert(i1 %c1, i8 %a, i8 %b, i8 %x, i8 %yy) { ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT_C0]], i8 [[Y]], i8 0 ; CHECK-NEXT: [[CC:%.*]] = or i1 [[C0]], [[C1:%.*]] ; CHECK-NEXT: [[SEL_OTHER:%.*]] = select i1 [[CC]], i8 [[Y]], i8 [[B]] -; CHECK-NEXT: [[SELX:%.*]] = or i8 [[SEL]], [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[SELX]], 0 +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[SELX:%.*]], 0 +; CHECK-NEXT: [[R1:%.*]] = or i1 [[R]], [[NOT_C0]] ; CHECK-NEXT: call void @use.i8(i8 [[SEL]]) ; CHECK-NEXT: call void @use.i8(i8 [[SEL_OTHER]]) -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 [[R1]] ; %not_c0 = icmp ugt i8 %a, %b call void @use.i1(i1 %not_c0) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits