Author: David Green Date: 2021-01-08T16:10:01Z New Revision: e185b1dd7b34c352167823295281f1bf1df09976
URL: https://github.com/llvm/llvm-project/commit/e185b1dd7b34c352167823295281f1bf1df09976 DIFF: https://github.com/llvm/llvm-project/commit/e185b1dd7b34c352167823295281f1bf1df09976.diff LOG: [ConstProp] Constant propagation for get.active.lane.mask instrinsics Similar to the Arm VCTP intrinsics, if the operands of an active.lane.mask are both known, the constant lane mask can be calculated. This can come up after unrolling the loops. Differential Revision: https://reviews.llvm.org/D94103 Added: llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll Modified: llvm/lib/Analysis/ConstantFolding.cpp Removed: ################################################################################ diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 7b0d4bd5172b..22b9acbc03b8 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1456,6 +1456,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::masked_load: + case Intrinsic::get_active_lane_mask: case Intrinsic::abs: case Intrinsic::smax: case Intrinsic::smin: @@ -2927,6 +2928,25 @@ static Constant *ConstantFoldVectorCall(StringRef Name, } break; } + case Intrinsic::get_active_lane_mask: { + auto *Op0 = dyn_cast<ConstantInt>(Operands[0]); + auto *Op1 = dyn_cast<ConstantInt>(Operands[1]); + if (Op0 && Op1) { + unsigned Lanes = FVTy->getNumElements(); + uint64_t Base = Op0->getZExtValue(); + uint64_t Limit = Op1->getZExtValue(); + + SmallVector<Constant *, 16> NCs; + for (unsigned i = 0; i < Lanes; i++) { + if (Base + i < Limit) + NCs.push_back(ConstantInt::getTrue(Ty)); + else + NCs.push_back(ConstantInt::getFalse(Ty)); + } + return ConstantVector::get(NCs); + } + break; + } default: break; } diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll new file mode 100644 index 000000000000..a6006bca169c --- /dev/null +++ b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll @@ -0,0 +1,300 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instsimplify -S -o - %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define <16 x i1> @v16i1_0() { +; CHECK-LABEL: @v16i1_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> zeroinitializer +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 0) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_1() { +; CHECK-LABEL: @v16i1_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 1) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_8() { +; CHECK-LABEL: @v16i1_8( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 8) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_15() { +; CHECK-LABEL: @v16i1_15( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 15) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_16() { +; CHECK-LABEL: @v16i1_16( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 16) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_100() { +; CHECK-LABEL: @v16i1_100( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 100) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_m1() { +; CHECK-LABEL: @v16i1_m1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 -1) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_10_11() { +; CHECK-LABEL: @v16i1_10_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false> +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 10, i32 11) + ret <16 x i1> %int +} + +define <16 x i1> @v16i1_12_11() { +; CHECK-LABEL: @v16i1_12_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <16 x i1> zeroinitializer +; +entry: + %int = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 12, i32 11) + ret <16 x i1> %int +} + + + +define <8 x i1> @v8i1_0() { +; CHECK-LABEL: @v8i1_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> zeroinitializer +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 0) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_1() { +; CHECK-LABEL: @v8i1_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 1) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_4() { +; CHECK-LABEL: @v8i1_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 4) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_7() { +; CHECK-LABEL: @v8i1_7( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 7) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_8() { +; CHECK-LABEL: @v8i1_8( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 8) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_100() { +; CHECK-LABEL: @v8i1_100( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 100) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_m1() { +; CHECK-LABEL: @v8i1_m1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 0, i32 -1) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_10_11() { +; CHECK-LABEL: @v8i1_10_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false> +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 10, i32 11) + ret <8 x i1> %int +} + +define <8 x i1> @v8i1_12_11() { +; CHECK-LABEL: @v8i1_12_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <8 x i1> zeroinitializer +; +entry: + %int = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 12, i32 11) + ret <8 x i1> %int +} + + + +define <4 x i1> @v4i1_0() { +; CHECK-LABEL: @v4i1_0( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> zeroinitializer +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 0) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_1() { +; CHECK-LABEL: @v4i1_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> <i1 true, i1 false, i1 false, i1 false> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 1) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_3() { +; CHECK-LABEL: @v4i1_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 false> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 3) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_4() { +; CHECK-LABEL: @v4i1_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 4) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_100() { +; CHECK-LABEL: @v4i1_100( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 100) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_m1() { +; CHECK-LABEL: @v4i1_m1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 -1) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_10_11() { +; CHECK-LABEL: @v4i1_10_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> <i1 true, i1 false, i1 false, i1 false> +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 10, i32 11) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_12_11() { +; CHECK-LABEL: @v4i1_12_11( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <4 x i1> zeroinitializer +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 12, i32 11) + ret <4 x i1> %int +} + + + +define <4 x i1> @v4i1_nc1(i32 %x) { +; CHECK-LABEL: @v4i1_nc1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[X:%.*]], i32 11) +; CHECK-NEXT: ret <4 x i1> [[INT]] +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %x, i32 11) + ret <4 x i1> %int +} + +define <4 x i1> @v4i1_nc2(i32 %x) { +; CHECK-LABEL: @v4i1_nc2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[INT:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 11, i32 [[X:%.*]]) +; CHECK-NEXT: ret <4 x i1> [[INT]] +; +entry: + %int = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 11, i32 %x) + ret <4 x i1> %int +} + + + + + +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) +declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) +declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits