Author: Nikita Popov Date: 2024-07-09T12:25:53+02:00 New Revision: 6b76c1e64ca7689ac9d9de8a4529c7af51e8b472
URL: https://github.com/llvm/llvm-project/commit/6b76c1e64ca7689ac9d9de8a4529c7af51e8b472 DIFF: https://github.com/llvm/llvm-project/commit/6b76c1e64ca7689ac9d9de8a4529c7af51e8b472.diff LOG: [SCCP] Add support for vectors (#98026) Add preliminary support for vectors of integers by using the `ValueLatticeElement::asConstantRange()` helper instead of a custom implementation, and relxing various integer type checks. This enables just the part that works automatically, e.g. icmps with a constant vector operand aren't supported yet. The change in ssa.copy handling is because asConstantRange() returns an unknown LV for empty range, while SCCP's getConstantRange() returned a full range. I've made the change to preserve the existing behavior. Added: Modified: clang/test/CodeGen/isfpclass.c llvm/lib/Transforms/Utils/SCCPSolver.cpp llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll llvm/test/Transforms/SCCP/overdefined-ext.ll llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll Removed: ################################################################################ diff --git a/clang/test/CodeGen/isfpclass.c b/clang/test/CodeGen/isfpclass.c index fd35182a5dbbe..a0e04eaad5929 100644 --- a/clang/test/CodeGen/isfpclass.c +++ b/clang/test/CodeGen/isfpclass.c @@ -136,7 +136,7 @@ typedef double __attribute__((ext_vector_type(4))) double4; typedef int __attribute__((ext_vector_type(4))) int4; typedef long __attribute__((ext_vector_type(4))) long4; -// CHECK-LABEL: define dso_local noundef <4 x i32> @check_isfpclass_nan_v4f32 +// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_v4f32 // CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = fcmp uno <4 x float> [[X]], zeroinitializer @@ -147,7 +147,7 @@ int4 check_isfpclass_nan_v4f32(float4 x) { return __builtin_isfpclass(x, 3 /*NaN*/); } -// CHECK-LABEL: define dso_local noundef <4 x i32> @check_isfpclass_nan_strict_v4f32 +// CHECK-LABEL: define dso_local range(i32 0, 2) <4 x i32> @check_isfpclass_nan_strict_v4f32 // CHECK-SAME: (<4 x float> noundef [[X:%.*]]) local_unnamed_addr #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i1> @llvm.is.fpclass.v4f32(<4 x float> [[X]], i32 3) #[[ATTR5]] diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index db0d40b317d17..7bfff4dfa67ad 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -42,14 +42,6 @@ static ValueLatticeElement::MergeOptions getMaxWidenStepsOpts() { MaxNumRangeExtensions); } -static ConstantRange getConstantRange(const ValueLatticeElement &LV, Type *Ty, - bool UndefAllowed) { - assert(Ty->isIntOrIntVectorTy() && "Should be int or int vector"); - if (LV.isConstantRange(UndefAllowed)) - return LV.getConstantRange(); - return ConstantRange::getFull(Ty->getScalarSizeInBits()); -} - namespace llvm { bool SCCPSolver::isConstant(const ValueLatticeElement &LV) { @@ -109,14 +101,14 @@ static bool refineInstruction(SCCPSolver &Solver, Instruction &Inst) { bool Changed = false; auto GetRange = [&Solver, &InsertedValues](Value *Op) { - if (auto *Const = dyn_cast<ConstantInt>(Op)) - return ConstantRange(Const->getValue()); - if (isa<Constant>(Op) || InsertedValues.contains(Op)) { + if (auto *Const = dyn_cast<Constant>(Op)) + return Const->toConstantRange(); + if (InsertedValues.contains(Op)) { unsigned Bitwidth = Op->getType()->getScalarSizeInBits(); return ConstantRange::getFull(Bitwidth); } - return getConstantRange(Solver.getLatticeValueFor(Op), Op->getType(), - /*UndefAllowed=*/false); + return Solver.getLatticeValueFor(Op).asConstantRange( + Op->getType(), /*UndefAllowed=*/false); }; if (isa<OverflowingBinaryOperator>(Inst)) { @@ -819,7 +811,7 @@ class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> { } void trackValueOfArgument(Argument *A) { - if (A->getType()->isIntegerTy()) { + if (A->getType()->isIntOrIntVectorTy()) { if (std::optional<ConstantRange> Range = A->getRange()) { markConstantRange(ValueState[A], A, *Range); return; @@ -1296,11 +1288,12 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) { } // Ignore bitcasts, as they may change the number of vector elements. - if (I.getDestTy()->isIntegerTy() && I.getSrcTy()->isIntOrIntVectorTy() && + if (I.getDestTy()->isIntOrIntVectorTy() && + I.getSrcTy()->isIntOrIntVectorTy() && I.getOpcode() != Instruction::BitCast) { auto &LV = getValueState(&I); ConstantRange OpRange = - getConstantRange(OpSt, I.getSrcTy(), /*UndefAllowed=*/false); + OpSt.asConstantRange(I.getSrcTy(), /*UndefAllowed=*/false); Type *DestTy = I.getDestTy(); ConstantRange Res = @@ -1322,8 +1315,8 @@ void SCCPInstVisitor::handleExtractOfWithOverflow(ExtractValueInst &EVI, return; // Wait to resolve. Type *Ty = LHS->getType(); - ConstantRange LR = getConstantRange(L, Ty, /*UndefAllowed=*/false); - ConstantRange RR = getConstantRange(R, Ty, /*UndefAllowed=*/false); + ConstantRange LR = L.asConstantRange(Ty, /*UndefAllowed=*/false); + ConstantRange RR = R.asConstantRange(Ty, /*UndefAllowed=*/false); if (Idx == 0) { ConstantRange Res = LR.binaryOp(WO->getBinaryOp(), RR); mergeInValue(&EVI, ValueLatticeElement::getRange(Res)); @@ -1523,14 +1516,14 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) { } // Only use ranges for binary operators on integers. - if (!I.getType()->isIntegerTy()) + if (!I.getType()->isIntOrIntVectorTy()) return markOverdefined(&I); // Try to simplify to a constant range. ConstantRange A = - getConstantRange(V1State, I.getType(), /*UndefAllowed=*/false); + V1State.asConstantRange(I.getType(), /*UndefAllowed=*/false); ConstantRange B = - getConstantRange(V2State, I.getType(), /*UndefAllowed=*/false); + V2State.asConstantRange(I.getType(), /*UndefAllowed=*/false); auto *BO = cast<BinaryOperator>(&I); ConstantRange R = ConstantRange::getEmpty(I.getType()->getScalarSizeInBits()); @@ -1626,7 +1619,7 @@ void SCCPInstVisitor::visitStoreInst(StoreInst &SI) { } static ValueLatticeElement getValueFromMetadata(const Instruction *I) { - if (I->getType()->isIntegerTy()) { + if (I->getType()->isIntOrIntVectorTy()) { if (MDNode *Ranges = I->getMetadata(LLVMContext::MD_range)) return ValueLatticeElement::getRange( getConstantRangeFromMetadata(*Ranges)); @@ -1813,8 +1806,11 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { // Combine range info for the original value with the new range from the // condition. - auto CopyOfCR = getConstantRange(CopyOfVal, CopyOf->getType(), - /*UndefAllowed=*/true); + auto CopyOfCR = CopyOfVal.asConstantRange(CopyOf->getType(), + /*UndefAllowed=*/true); + // Treat an unresolved input like a full range. + if (CopyOfCR.isEmptySet()) + CopyOfCR = ConstantRange::getFull(CopyOfCR.getBitWidth()); auto NewCR = ImposedCR.intersectWith(CopyOfCR); // If the existing information is != x, do not use the information from // a chained predicate, as the != x information is more likely to be @@ -1860,7 +1856,7 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) { if (State.isUnknownOrUndef()) return; OpRanges.push_back( - getConstantRange(State, Op->getType(), /*UndefAllowed=*/false)); + State.asConstantRange(Op->getType(), /*UndefAllowed=*/false)); } ConstantRange Result = diff --git a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll index 05d9acd191962..1f2fbb6f53cdd 100644 --- a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll +++ b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll @@ -43,7 +43,7 @@ define <4 x i8> @range_from_lshr_vec(<4 x i8> %a) { ; CHECK-LABEL: @range_from_lshr_vec( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], <i8 1, i8 2, i8 3, i8 4> -; CHECK-NEXT: [[ADD_1:%.*]] = add <4 x i8> [[A_SHR]], <i8 1, i8 2, i8 3, i8 4> +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], <i8 1, i8 2, i8 3, i8 4> ; CHECK-NEXT: ret <4 x i8> [[ADD_1]] ; entry: @@ -56,7 +56,7 @@ define <4 x i8> @range_from_lshr_vec_2(<4 x i8> %a) { ; CHECK-LABEL: @range_from_lshr_vec_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], <i8 1, i8 1, i8 1, i8 1> -; CHECK-NEXT: [[ADD_1:%.*]] = add <4 x i8> [[A_SHR]], <i8 2, i8 2, i8 2, i8 2> +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], <i8 2, i8 2, i8 2, i8 2> ; CHECK-NEXT: ret <4 x i8> [[ADD_1]] ; entry: @@ -169,7 +169,7 @@ else: define <6 x i8> @vector_constant_replacement_in_add(<6 x i8> %a) { ; CHECK-LABEL: @vector_constant_replacement_in_add( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ADD:%.*]] = add <6 x i8> [[A:%.*]], zeroinitializer +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw <6 x i8> [[A:%.*]], zeroinitializer ; CHECK-NEXT: ret <6 x i8> [[ADD]] ; entry: diff --git a/llvm/test/Transforms/SCCP/overdefined-ext.ll b/llvm/test/Transforms/SCCP/overdefined-ext.ll index 217daa750cc1d..05819c32d522d 100644 --- a/llvm/test/Transforms/SCCP/overdefined-ext.ll +++ b/llvm/test/Transforms/SCCP/overdefined-ext.ll @@ -21,7 +21,7 @@ define i1 @zext_icmp(i1 %t0) { ret i1 %t2 } -; negative test. SCCP operates poorly with vector ranges +; TODO: SCCP operates poorly with vector ranges define <2 x i1> @zext_vector(<2 x i1> %t0) { ; CHECK-LABEL: @zext_vector( @@ -34,14 +34,11 @@ define <2 x i1> @zext_vector(<2 x i1> %t0) { ret <2 x i1> %t2 } -; negative test. SCCP operates poorly with vector ranges - define <2 x i1> @zext_vector2(<2 x i1> %t0) { ; CHECK-LABEL: @zext_vector2( ; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[T1]], <i32 2, i32 2> -; CHECK-NEXT: [[T3:%.*]] = icmp eq <2 x i32> [[T1]], [[T2]] -; CHECK-NEXT: ret <2 x i1> [[T3]] +; CHECK-NEXT: [[T2:%.*]] = add nuw nsw <2 x i32> [[T1]], <i32 2, i32 2> +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %t1 = zext <2 x i1> %t0 to <2 x i32> %t2 = add <2 x i32> %t1, <i32 2, i32 2> @@ -72,7 +69,7 @@ define i1 @sext_icmp(i1 %t0) { ret i1 %t2 } -; negative test. SCCP operates poorly with vector ranges +; TODO: SCCP operates poorly with vector ranges define <2 x i1> @sext_vector(<2 x i1> %t0) { ; CHECK-LABEL: @sext_vector( @@ -85,14 +82,11 @@ define <2 x i1> @sext_vector(<2 x i1> %t0) { ret <2 x i1> %t2 } -; negative test. SCCP operates poorly with vector ranges - define <2 x i1> @sext_vector2(<2 x i1> %t0) { ; CHECK-LABEL: @sext_vector2( ; CHECK-NEXT: [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32> -; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[T1]], <i32 2, i32 2> -; CHECK-NEXT: [[T3:%.*]] = icmp eq <2 x i32> [[T1]], [[T2]] -; CHECK-NEXT: ret <2 x i1> [[T3]] +; CHECK-NEXT: [[T2:%.*]] = add nsw <2 x i32> [[T1]], <i32 2, i32 2> +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %t1 = sext <2 x i1> %t0 to <2 x i32> %t2 = add <2 x i32> %t1, <i32 2, i32 2> diff --git a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll index d3bac0d68a979..92d84f71bd9d4 100644 --- a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll +++ b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll @@ -48,7 +48,7 @@ define <4 x i16> @range_from_and_nuw_vec(<4 x i32> %a) { ; CHECK-SAME: <4 x i32> [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A]], <i32 65535, i32 65535, i32 65535, i32 65535> -; CHECK-NEXT: [[TRUNC1:%.*]] = trunc <4 x i32> [[AND1]] to <4 x i16> +; CHECK-NEXT: [[TRUNC1:%.*]] = trunc nuw <4 x i32> [[AND1]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[TRUNC1]] ; entry: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits