Author: Stephen Senran Zhang Date: 2024-12-31T18:40:17-08:00 New Revision: 2feffecb8853b1cdd38a0653df63d70412e65c12
URL: https://github.com/llvm/llvm-project/commit/2feffecb8853b1cdd38a0653df63d70412e65c12 DIFF: https://github.com/llvm/llvm-project/commit/2feffecb8853b1cdd38a0653df63d70412e65c12.diff LOG: [ConstantRange] Estimate tighter lower (upper) bounds for masked binary and (or) (#120352) Fixes #118108. Co-author: Yingwei Zheng (@dtcxzyw) Added: llvm/test/Transforms/SCCP/range-and-or-bit-masked.ll Modified: clang/test/CodeGen/AArch64/fpm-helpers.c llvm/lib/IR/ConstantRange.cpp llvm/unittests/IR/ConstantRangeTest.cpp Removed: ################################################################################ diff --git a/clang/test/CodeGen/AArch64/fpm-helpers.c b/clang/test/CodeGen/AArch64/fpm-helpers.c index 4bced01d5c71fa..6264b5caeb4f50 100644 --- a/clang/test/CodeGen/AArch64/fpm-helpers.c +++ b/clang/test/CodeGen/AArch64/fpm-helpers.c @@ -35,7 +35,7 @@ extern "C" { // fpm_t test_init() { return __arm_fpm_init(); } -// CHECK-LABEL: define dso_local noundef i64 @test_src1_1( +// CHECK-LABEL: define dso_local noundef range(i64 0, -6) i64 @test_src1_1( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 -8 @@ -44,7 +44,7 @@ fpm_t test_src1_1() { return __arm_set_fpm_src1_format(INIT_ONES, __ARM_FPM_E5M2); } -// CHECK-LABEL: define dso_local noundef i64 @test_src1_2( +// CHECK-LABEL: define dso_local noundef range(i64 0, -6) i64 @test_src1_2( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 1 @@ -53,7 +53,7 @@ fpm_t test_src1_2() { return __arm_set_fpm_src1_format(INIT_ZERO, __ARM_FPM_E4M3); } -// CHECK-LABEL: define dso_local noundef i64 @test_src2_1( +// CHECK-LABEL: define dso_local noundef range(i64 0, -48) i64 @test_src2_1( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 -57 @@ -62,7 +62,7 @@ fpm_t test_src2_1() { return __arm_set_fpm_src2_format(INIT_ONES, __ARM_FPM_E5M2); } -// CHECK-LABEL: define dso_local noundef i64 @test_src2_2( +// CHECK-LABEL: define dso_local noundef range(i64 0, -48) i64 @test_src2_2( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 8 @@ -71,7 +71,7 @@ fpm_t test_src2_2() { return __arm_set_fpm_src2_format(INIT_ZERO, __ARM_FPM_E4M3); } -// CHECK-LABEL: define dso_local noundef i64 @test_dst1_1( +// CHECK-LABEL: define dso_local noundef range(i64 0, -384) i64 @test_dst1_1( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 -449 @@ -80,7 +80,7 @@ fpm_t test_dst1_1() { return __arm_set_fpm_dst_format(INIT_ONES, __ARM_FPM_E5M2); } -// CHECK-LABEL: define dso_local noundef i64 @test_dst2_2( +// CHECK-LABEL: define dso_local noundef range(i64 0, -384) i64 @test_dst2_2( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 64 @@ -139,21 +139,21 @@ fpm_t test_lscale() { return __arm_set_fpm_lscale(INIT_ZERO, 127); } // fpm_t test_lscale2() { return __arm_set_fpm_lscale2(INIT_ZERO, 63); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_1( +// CHECK-LABEL: define dso_local noundef range(i64 0, 4278190081) i64 @test_nscale_1( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 2147483648 // fpm_t test_nscale_1() { return __arm_set_fpm_nscale(INIT_ZERO, -128); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_2( +// CHECK-LABEL: define dso_local noundef range(i64 0, 4278190081) i64 @test_nscale_2( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 2130706432 // fpm_t test_nscale_2() { return __arm_set_fpm_nscale(INIT_ZERO, 127); } -// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_3( +// CHECK-LABEL: define dso_local noundef range(i64 0, 4278190081) i64 @test_nscale_3( // CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: ret i64 4278190080 diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index d81a292916fdea..35664353989929 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -1520,15 +1520,72 @@ ConstantRange ConstantRange::binaryNot() const { return ConstantRange(APInt::getAllOnes(getBitWidth())).sub(*this); } +/// Estimate the 'bit-masked AND' operation's lower bound. +/// +/// E.g., given two ranges as follows (single quotes are separators and +/// have no meaning here), +/// +/// LHS = [10'00101'1, ; LLo +/// 10'10000'0] ; LHi +/// RHS = [10'11111'0, ; RLo +/// 10'11111'1] ; RHi +/// +/// we know that the higher 2 bits of the result is always 10; and we also +/// notice that RHS[1:6] are always 1, so the result[1:6] cannot be less than +/// LHS[1:6] (i.e., 00101). Thus, the lower bound is 10'00101'0. +/// +/// The algorithm is as follows, +/// 1. we first calculate a mask to find the higher common bits by +/// Mask = ~((LLo ^ LHi) | (RLo ^ RHi) | (LLo ^ RLo)); +/// Mask = clear all non-leading-ones bits in Mask; +/// in the example, the Mask is set to 11'00000'0; +/// 2. calculate a new mask by setting all common leading bits to 1 in RHS, and +/// keeping the longest leading ones (i.e., 11'11111'0 in the example); +/// 3. return (LLo & new mask) as the lower bound; +/// 4. repeat the step 2 and 3 with LHS and RHS swapped, and update the lower +/// bound with the larger one. +static APInt estimateBitMaskedAndLowerBound(const ConstantRange &LHS, + const ConstantRange &RHS) { + auto BitWidth = LHS.getBitWidth(); + // If either is full set or unsigned wrapped, then the range must contain '0' + // which leads the lower bound to 0. + if ((LHS.isFullSet() || RHS.isFullSet()) || + (LHS.isWrappedSet() || RHS.isWrappedSet())) + return APInt::getZero(BitWidth); + + auto LLo = LHS.getLower(); + auto LHi = LHS.getUpper() - 1; + auto RLo = RHS.getLower(); + auto RHi = RHS.getUpper() - 1; + + // Calculate the mask for the higher common bits. + auto Mask = ~((LLo ^ LHi) | (RLo ^ RHi) | (LLo ^ RLo)); + unsigned LeadingOnes = Mask.countLeadingOnes(); + Mask.clearLowBits(BitWidth - LeadingOnes); + + auto estimateBound = [BitWidth, &Mask](APInt ALo, const APInt &BLo, + const APInt &BHi) { + unsigned LeadingOnes = ((BLo & BHi) | Mask).countLeadingOnes(); + unsigned StartBit = BitWidth - LeadingOnes; + ALo.clearLowBits(StartBit); + return ALo; + }; + + auto LowerBoundByLHS = estimateBound(LLo, RLo, RHi); + auto LowerBoundByRHS = estimateBound(RLo, LLo, LHi); + + return APIntOps::umax(LowerBoundByLHS, LowerBoundByRHS); +} + ConstantRange ConstantRange::binaryAnd(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return getEmpty(); ConstantRange KnownBitsRange = fromKnownBits(toKnownBits() & Other.toKnownBits(), false); - ConstantRange UMinUMaxRange = - getNonEmpty(APInt::getZero(getBitWidth()), - APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()) + 1); + auto LowerBound = estimateBitMaskedAndLowerBound(*this, Other); + ConstantRange UMinUMaxRange = getNonEmpty( + LowerBound, APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax()) + 1); return KnownBitsRange.intersectWith(UMinUMaxRange); } @@ -1538,10 +1595,17 @@ ConstantRange ConstantRange::binaryOr(const ConstantRange &Other) const { ConstantRange KnownBitsRange = fromKnownBits(toKnownBits() | Other.toKnownBits(), false); + + // ~a & ~b >= x + // <=> ~(~a & ~b) <= ~x + // <=> a | b <= ~x + // <=> a | b < ~x + 1 = -x + // thus, UpperBound(a | b) == -LowerBound(~a & ~b) + auto UpperBound = + -estimateBitMaskedAndLowerBound(binaryNot(), Other.binaryNot()); // Upper wrapped range. - ConstantRange UMaxUMinRange = - getNonEmpty(APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()), - APInt::getZero(getBitWidth())); + ConstantRange UMaxUMinRange = getNonEmpty( + APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin()), UpperBound); return KnownBitsRange.intersectWith(UMaxUMinRange); } diff --git a/llvm/test/Transforms/SCCP/range-and-or-bit-masked.ll b/llvm/test/Transforms/SCCP/range-and-or-bit-masked.ll new file mode 100644 index 00000000000000..e81c5d739c6d29 --- /dev/null +++ b/llvm/test/Transforms/SCCP/range-and-or-bit-masked.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=ipsccp %s | FileCheck %s + +declare void @use(i1) + +define i1 @test1(i64 %x) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[COND:%.*]] = icmp ugt i64 [[X:%.*]], 65535 +; CHECK-NEXT: call void @llvm.assume(i1 [[COND]]) +; CHECK-NEXT: [[MASK:%.*]] = and i64 [[X]], -65521 +; CHECK-NEXT: ret i1 false +; +entry: + %cond = icmp ugt i64 %x, 65535 + call void @llvm.assume(i1 %cond) + %mask = and i64 %x, -65521 + %cmp = icmp eq i64 %mask, 0 + ret i1 %cmp +} + +define void @test.and(i64 %x, i64 %y) { +; CHECK-LABEL: @test.and( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = icmp uge i64 [[X:%.*]], 138 +; CHECK-NEXT: [[C1:%.*]] = icmp ule i64 [[X]], 161 +; CHECK-NEXT: call void @llvm.assume(i1 [[C0]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[C1]]) +; CHECK-NEXT: [[C2:%.*]] = icmp uge i64 [[Y:%.*]], 186 +; CHECK-NEXT: [[C3:%.*]] = icmp ule i64 [[Y]], 188 +; CHECK-NEXT: call void @llvm.assume(i1 [[C2]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[C3]]) +; CHECK-NEXT: [[AND:%.*]] = and i64 [[X]], [[Y]] +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: [[R1:%.*]] = icmp ult i64 [[AND]], 137 +; CHECK-NEXT: call void @use(i1 [[R1]]) +; CHECK-NEXT: ret void +; +entry: + %c0 = icmp uge i64 %x, 138 ; 0b10001010 + %c1 = icmp ule i64 %x, 161 ; 0b10100000 + call void @llvm.assume(i1 %c0) + call void @llvm.assume(i1 %c1) + %c2 = icmp uge i64 %y, 186 ; 0b10111010 + %c3 = icmp ule i64 %y, 188 ; 0b10111110 + call void @llvm.assume(i1 %c2) + call void @llvm.assume(i1 %c3) + %and = and i64 %x, %y + %r0 = icmp ult i64 %and, 136 ; 0b10001000 + call void @use(i1 %r0) ; false + %r1 = icmp ult i64 %and, 137 + call void @use(i1 %r1) ; unknown + ret void +} + +define void @test.or(i64 %x, i64 %y) { +; CHECK-LABEL: @test.or( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = icmp ule i64 [[X:%.*]], 117 +; CHECK-NEXT: [[C1:%.*]] = icmp uge i64 [[X]], 95 +; CHECK-NEXT: call void @llvm.assume(i1 [[C0]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[C1]]) +; CHECK-NEXT: [[C2:%.*]] = icmp ule i64 [[Y:%.*]], 69 +; CHECK-NEXT: [[C3:%.*]] = icmp uge i64 [[Y]], 67 +; CHECK-NEXT: call void @llvm.assume(i1 [[C2]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[C3]]) +; CHECK-NEXT: [[OR:%.*]] = or i64 [[X]], [[Y]] +; CHECK-NEXT: call void @use(i1 false) +; CHECK-NEXT: [[R1:%.*]] = icmp ugt i64 [[OR]], 118 +; CHECK-NEXT: call void @use(i1 [[R1]]) +; CHECK-NEXT: ret void +; +entry: + %c0 = icmp ule i64 %x, 117 ; 0b01110101 + %c1 = icmp uge i64 %x, 95 ; 0b01011111 + call void @llvm.assume(i1 %c0) + call void @llvm.assume(i1 %c1) + %c2 = icmp ule i64 %y, 69 ; 0b01000101 + %c3 = icmp uge i64 %y, 67 ; 0b01000011 + call void @llvm.assume(i1 %c2) + call void @llvm.assume(i1 %c3) + %or = or i64 %x, %y + %r0 = icmp ugt i64 %or, 119 ; 0b01110111 + call void @use(i1 %r0) ; false + %r1 = icmp ugt i64 %or, 118 + call void @use(i1 %r1) ; unknown + ret void +} diff --git a/llvm/unittests/IR/ConstantRangeTest.cpp b/llvm/unittests/IR/ConstantRangeTest.cpp index e1d9b3e387b200..c390ffea1c3523 100644 --- a/llvm/unittests/IR/ConstantRangeTest.cpp +++ b/llvm/unittests/IR/ConstantRangeTest.cpp @@ -2720,6 +2720,37 @@ TEST_F(ConstantRangeTest, binaryAnd) { EXPECT_EQ(R16_32.binaryAnd(R0_99), R0_32); EXPECT_EQ(R0_99.binaryAnd(R16_32), R0_32); + // 'And' with leading bits are masked (with common leading bits stripped) + ConstantRange RMaskedL(APInt(8, 0b10'00101'1), APInt(8, 0b10'10000'0 + 1)); + ConstantRange RMaskedR(APInt(8, 0b10'11111'0), APInt(8, 0b10'11111'1 + 1)); + EXPECT_EQ(RMaskedL.binaryAnd(RMaskedR).getLower(), APInt(8, 0b10'00101'0)); + EXPECT_EQ(RMaskedR.binaryAnd(RMaskedL).getLower(), APInt(8, 0b10'00101'0)); + + ConstantRange RMaskedL1(APInt(8, 0b00'011'010), APInt(8, 0b00'100'100 + 1)); + ConstantRange RMaskedR1(APInt(8, 0b00'111'010), APInt(8, 0b00'111'110 + 1)); + EXPECT_EQ(RMaskedL1.binaryAnd(RMaskedR1).getLower(), APInt(8, 0b00'011'000)); + EXPECT_EQ(RMaskedR1.binaryAnd(RMaskedL1).getLower(), APInt(8, 0b00'011'000)); + + ConstantRange RMaskedL2(APInt(8, 0b0000'0111u), APInt(8, 0b0000'1101u + 1u)); + ConstantRange RMaskedR2(APInt(8, 0xff), APInt(8, 0)); + EXPECT_EQ(RMaskedL2.binaryAnd(RMaskedR2), RMaskedL2); + EXPECT_EQ(RMaskedR2.binaryAnd(RMaskedL2), RMaskedL2); + + ConstantRange RMaskedL3(APInt(4, 0b0011u), APInt(4, 0)); + ConstantRange RMaskedR3(APInt(4, 0b1011u), APInt(4, 0)); + APInt Zero_4(4, 0); + EXPECT_EQ(RMaskedL3.binaryAnd(RMaskedR3).getLower().uge(Zero_4), true); + EXPECT_EQ(RMaskedR3.binaryAnd(RMaskedL3).getLower().uge(Zero_4), true); + + // wrapped set + APInt NegSeven(4, 9); // Also -7 + ConstantRange RMaskedL4(NegSeven, APInt(4, 1)); + ConstantRange RMaskedR4(NegSeven, APInt(4, 0)); + EXPECT_EQ(RMaskedL4.binaryAnd(RMaskedR4).contains(Zero_4), true); + EXPECT_EQ(RMaskedR4.binaryAnd(RMaskedL4).contains(Zero_4), true); + EXPECT_EQ(RMaskedL4.binaryAnd(RMaskedR4).contains(NegSeven), true); + EXPECT_EQ(RMaskedR4.binaryAnd(RMaskedL4).contains(NegSeven), true); + TestBinaryOpExhaustive( [](const ConstantRange &CR1, const ConstantRange &CR2) { return CR1.binaryAnd(CR2); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits