[clang] 590884a - [Clang][CodeGen] Stoping emitting alignment assumes for `align_{up,down}`
Author: Noah Goldstein Date: 2023-11-07T00:31:04-06:00 New Revision: 590884a860ccc5fce50bf0a7eba91ec1bfe71fb6 URL: https://github.com/llvm/llvm-project/commit/590884a860ccc5fce50bf0a7eba91ec1bfe71fb6 DIFF: https://github.com/llvm/llvm-project/commit/590884a860ccc5fce50bf0a7eba91ec1bfe71fb6.diff LOG: [Clang][CodeGen] Stoping emitting alignment assumes for `align_{up,down}` Now that `align_{up,down}` use `llvm.ptrmask` (as of #71238), the assume doesn't preserve any information that is not still easily re-computable. Closes #71295 Added: Modified: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtin-align-array.c clang/test/CodeGen/builtin-align.c Removed: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 31edf575caae9fe..5ab81cc605819c3 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19696,10 +19696,6 @@ RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { Result = Builder.CreateIntrinsic( Intrinsic::ptrmask, {Args.SrcType, Args.IntType}, {SrcForMask, InvertedMask}, nullptr, "aligned_result"); - -// Emit an alignment assumption to ensure that the new alignment is -// propagated to loads/stores, etc. -emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); } else { Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); } diff --git a/clang/test/CodeGen/builtin-align-array.c b/clang/test/CodeGen/builtin-align-array.c index 18a77b9a710db40..cbe6641f672eb43 100644 --- a/clang/test/CodeGen/builtin-align-array.c +++ b/clang/test/CodeGen/builtin-align-array.c @@ -9,12 +9,10 @@ extern int func(char *c); // CHECK-NEXT:[[BUF:%.*]] = alloca [1024 x i8], align 16 // CHECK-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 44 // CHECK-NEXT:[[ALIGNED_RESULT:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARRAYIDX]], i64 -16) -// CHECK-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT]], i64 16) ] // CHECK-NEXT:[[CALL:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT]]) // CHECK-NEXT:[[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 22 // CHECK-NEXT:[[OVER_BOUNDARY:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 31 // CHECK-NEXT:[[ALIGNED_RESULT2:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[OVER_BOUNDARY]], i64 -32) -// CHECK-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT2]], i64 32) ] // CHECK-NEXT:[[CALL3:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT2]]) // CHECK-NEXT:[[ARRAYIDX4:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 16 // CHECK-NEXT:[[SRC_ADDR:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 @@ -35,12 +33,10 @@ int test_array(void) { // CHECK-NEXT:[[BUF:%.*]] = alloca [1024 x i8], align 32 // CHECK-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 64 // CHECK-NEXT:[[ALIGNED_RESULT:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[ARRAYIDX]], i64 -16) -// CHECK-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT]], i64 16) ] // CHECK-NEXT:[[CALL:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT]]) // CHECK-NEXT:[[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 32 // CHECK-NEXT:[[OVER_BOUNDARY:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX1]], i64 31 // CHECK-NEXT:[[ALIGNED_RESULT2:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[OVER_BOUNDARY]], i64 -32) -// CHECK-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT2]], i64 32) ] // CHECK-NEXT:[[CALL3:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT2]]) // CHECK-NEXT:ret i32 1 // diff --git a/clang/test/CodeGen/builtin-align.c b/clang/test/CodeGen/builtin-align.c index b58d47078799eae..932b93972a85e66 100644 --- a/clang/test/CodeGen/builtin-align.c +++ b/clang/test/CodeGen/builtin-align.c @@ -119,7 +119,6 @@ _Bool is_aligned(TYPE ptr, unsigned align) { // CHECK-VOID_PTR-NEXT:[[OVER_BOUNDARY:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[MASK]] // CHECK-VOID_PTR-NEXT:[[INVERTED_MASK:%.*]] = xor i64 [[MASK]], -1 // CHECK-VOID_PTR-NEXT:[[ALIGNED_RESULT:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[OVER_BOUNDARY]], i64 [[INVERTED_MASK]]) -// CHECK-VOID_PTR-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr [[ALIGNED_RESULT]], i64 [[ALIGNMENT]]) ] // CHECK-VOID_PTR-NEXT:ret ptr [[ALIGNED_RESULT]] // // CHECK-FLOAT_PTR-LABEL: define {{[^@]+}}@align_up @@ -130,7 +129,6 @@ _Bool is_aligned(TYPE ptr, unsigned align) { // CHECK-FLOAT_PTR-NEXT:[[OVER_BOUNDARY:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[MASK]] // CHECK-FLOAT_PTR-NEXT:[[INVERTED_MASK:%.*]] = xor i64 [
[clang] 51abbf9 - [InstCombine] Deduce `align` and `nonnull` return attributes for `llvm.ptrmask`
Author: Noah Goldstein Date: 2023-11-01T23:50:35-05:00 New Revision: 51abbf98d19cb1b89c6938811f2805bafe4b336e URL: https://github.com/llvm/llvm-project/commit/51abbf98d19cb1b89c6938811f2805bafe4b336e DIFF: https://github.com/llvm/llvm-project/commit/51abbf98d19cb1b89c6938811f2805bafe4b336e.diff LOG: [InstCombine] Deduce `align` and `nonnull` return attributes for `llvm.ptrmask` We can deduce the former based on the mask / incoming pointer alignment. We can set the latter based if know the result in non-zero (this is essentially just caching our analysis result). Differential Revision: https://reviews.llvm.org/D156636 Added: Modified: clang/test/CodeGen/arm64_32-vaarg.c llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp llvm/test/Transforms/InstCombine/align-addr.ll llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll llvm/test/Transforms/InstCombine/ptrmask.ll Removed: diff --git a/clang/test/CodeGen/arm64_32-vaarg.c b/clang/test/CodeGen/arm64_32-vaarg.c index 9fbcf88ecfdcc33..3f1f4443436da15 100644 --- a/clang/test/CodeGen/arm64_32-vaarg.c +++ b/clang/test/CodeGen/arm64_32-vaarg.c @@ -29,7 +29,7 @@ long long test_longlong(OneLongLong input, va_list *mylist) { // CHECK-LABEL: define{{.*}} i64 @test_longlong(i64 %input // CHECK: [[STARTPTR:%.*]] = load ptr, ptr %mylist // CHECK: [[ALIGN_TMP:%.+]] = getelementptr inbounds i8, ptr [[STARTPTR]], i32 7 - // CHECK: [[ALIGNED_ADDR:%.+]] = tail call ptr @llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8) + // CHECK: [[ALIGNED_ADDR:%.+]] = tail call align 8 ptr @llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8) // CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, ptr [[ALIGNED_ADDR]], i32 8 // CHECK: store ptr [[NEXT]], ptr %mylist diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5c08ab190eba476..10a8bff700b7366 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1978,6 +1978,27 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { *II, Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask, {InnerPtr, NewMask})); } +bool Changed = false; +// See if we can deduce non-null. +if (!CI.hasRetAttr(Attribute::NonNull) && +(Known.isNonZero() || + isKnownNonZero(II, DL, /*Depth*/ 0, &AC, II, &DT))) { + CI.addRetAttr(Attribute::NonNull); + Changed = true; +} + +unsigned NewAlignmentLog = +std::min(Value::MaxAlignmentExponent, + std::min(BitWidth - 1, Known.countMinTrailingZeros())); +// Known bits will capture if we had alignment information associated with +// the pointer argument. +if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) { + CI.addRetAttr(Attribute::getWithAlignment( + CI.getContext(), Align(uint64_t(1) << NewAlignmentLog))); + Changed = true; +} +if (Changed) + return &CI; break; } case Intrinsic::uadd_with_overflow: diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll index 1e49cddf7ffe79d..facb5df08a82f43 100644 --- a/llvm/test/Transforms/InstCombine/align-addr.ll +++ b/llvm/test/Transforms/InstCombine/align-addr.ll @@ -135,7 +135,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align1(ptr align 1 %ptr, i64 %mask) define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask) { ; CHECK-LABEL: @ptrmask_align_unknown_ptr_align8( -; CHECK-NEXT:[[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]]) +; CHECK-NEXT:[[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]]) ; CHECK-NEXT:[[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 ; CHECK-NEXT:ret <16 x i8> [[LOAD]] ; @@ -147,7 +147,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask) ; Increase load align from 1 to 2 define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) { ; CHECK-LABEL: @ptrmask_align2_ptr_align1( -; CHECK-NEXT:[[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2) +; CHECK-NEXT:[[ALIGNED:%.*]] = call align 2 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2) ; CHECK-NEXT:[[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 ; CHECK-NEXT:ret <16 x i8> [[LOAD]] ; @@ -159,7 +159,7 @@ define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) { ; Increase load align from 1 to 4 define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) { ; CHECK-LABEL: @ptrmask_align4_ptr_align1( -; CHECK-NEXT:[[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) +; CHECK-NEXT:[[ALIGNED:%.*]] = call align
[clang] 71be514 - [Clang][CodeGen] Emit `llvm.ptrmask` for `align_up` and `align_down`
Author: Noah Goldstein Date: 2023-11-04T14:20:54-05:00 New Revision: 71be514fa0af996745186816735d69fa8a26f3c9 URL: https://github.com/llvm/llvm-project/commit/71be514fa0af996745186816735d69fa8a26f3c9 DIFF: https://github.com/llvm/llvm-project/commit/71be514fa0af996745186816735d69fa8a26f3c9.diff LOG: [Clang][CodeGen] Emit `llvm.ptrmask` for `align_up` and `align_down` Since PR's #69343 and #67166 we probably have enough support for `llvm.ptrmask` to make it preferable to the GEP stategy. Closes #71238 Added: Modified: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtin-align-array.c clang/test/CodeGen/builtin-align.c Removed: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 972aa1c708e5f65..978f6ffd145741d 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19668,44 +19668,40 @@ RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) { /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the /// llvm.ptrmask intrinsic (with a GEP before in the align_up case). -/// TODO: actually use ptrmask once most optimization passes know about it. RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { BuiltinAlignArgs Args(E, *this); - llvm::Value *SrcAddr = Args.Src; - if (Args.Src->getType()->isPointerTy()) -SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr"); - llvm::Value *SrcForMask = SrcAddr; + llvm::Value *SrcForMask = Args.Src; if (AlignUp) { // When aligning up we have to first add the mask to ensure we go over the // next alignment value and then align down to the next valid multiple. // By adding the mask, we ensure that align_up on an already aligned // value will not change the value. -SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary"); +if (Args.Src->getType()->isPointerTy()) { + if (getLangOpts().isSignedOverflowDefined()) +SrcForMask = +Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary"); + else +SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask, +/*SignedIndices=*/true, +/*isSubtraction=*/false, +E->getExprLoc(), "over_boundary"); +} else { + SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary"); +} } // Invert the mask to only clear the lower bits. llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask"); - llvm::Value *Result = - Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); + llvm::Value *Result = nullptr; if (Args.Src->getType()->isPointerTy()) { -/// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well. -// Result = Builder.CreateIntrinsic( -// Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), Args.IntType}, -// {SrcForMask, NegatedMask}, nullptr, "aligned_result"); -Result->setName("aligned_intptr"); -llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, " diff "); -// The result must point to the same underlying allocation. This means we -// can use an inbounds GEP to enable better optimization. -if (getLangOpts().isSignedOverflowDefined()) - Result = - Builder.CreateGEP(Int8Ty, Args.Src, Difference, "aligned_result"); -else - Result = EmitCheckedInBoundsGEP(Int8Ty, Args.Src, Difference, - /*SignedIndices=*/true, - /*isSubtraction=*/!AlignUp, - E->getExprLoc(), "aligned_result"); +Result = Builder.CreateIntrinsic( +Intrinsic::ptrmask, {Args.SrcType, Args.IntType}, +{SrcForMask, InvertedMask}, nullptr, "aligned_result"); + // Emit an alignment assumption to ensure that the new alignment is // propagated to loads/stores, etc. emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment); + } else { +Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); } assert(Result->getType() == Args.SrcType); return RValue::get(Result); diff --git a/clang/test/CodeGen/builtin-align-array.c b/clang/test/CodeGen/builtin-align-array.c index 5d1377b98d2814f..18a77b9a710db40 100644 --- a/clang/test/CodeGen/builtin-align-array.c +++ b/clang/test/CodeGen/builtin-align-array.c @@ -8,22 +8,16 @@ extern int func(char *c); // CHECK-NEXT: entry: // CHECK-NEXT:[[BUF:%.*]] = alloca [1024 x i8], align 16 // CHECK-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BUF]], i64 0, i64 44 -// CHECK-NEXT:[[INTPTR:%.*]] = ptrtoint ptr [[ARRAYIDX]] to
[clang] d80d5b9 - [InstCombine] Canonicalize `(sitofp x)` -> `(uitofp x)` if `x >= 0`
Author: Noah Goldstein Date: 2024-03-13T18:26:21-05:00 New Revision: d80d5b923c6f611590a12543bdb33e0c16044d44 URL: https://github.com/llvm/llvm-project/commit/d80d5b923c6f611590a12543bdb33e0c16044d44 DIFF: https://github.com/llvm/llvm-project/commit/d80d5b923c6f611590a12543bdb33e0c16044d44.diff LOG: [InstCombine] Canonicalize `(sitofp x)` -> `(uitofp x)` if `x >= 0` Just a standard canonicalization. Proofs: https://alive2.llvm.org/ce/z/9W4VFm Closes #82404 Added: Modified: clang/test/Headers/__clang_hip_math.hip llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp llvm/test/Transforms/InstCombine/add-sitofp.ll llvm/test/Transforms/InstCombine/binop-itofp.ll llvm/test/Transforms/InstCombine/clamp-to-minmax.ll llvm/test/Transforms/InstCombine/fpcast.ll llvm/test/Transforms/InstCombine/minmax-fold.ll llvm/test/Transforms/InstCombine/minmax-fp.ll llvm/test/Transforms/InstCombine/pr27236.ll llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll llvm/test/Transforms/LoopVectorize/float-induction.ll Removed: diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 37099de74fb8ec..701f93853ab93c 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -1685,7 +1685,7 @@ extern "C" __device__ double test_j1(double x) { // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // DEFAULT-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1718,7 +1718,7 @@ extern "C" __device__ double test_j1(double x) { // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// FINITEONLY-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// FINITEONLY-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float // FINITEONLY-NEXT:[[DIV_I:%.*]] = fdiv nnan ninf contract float [[CONV_I]], [[Y]] // FINITEONLY-NEXT:[[MUL8_I:%.*]] = fmul nnan ninf contract float [[__X1_0_I3]], [[DIV_I]] // FINITEONLY-NEXT:[[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1751,7 +1751,7 @@ extern "C" __device__ double test_j1(double x) { // APPROX-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // APPROX-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // APPROX-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// APPROX-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// APPROX-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float // APPROX-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] // APPROX-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // APPROX-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1788,7 +1788,7 @@ extern "C" __device__ float test_jnf(int x, float y) { // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double +// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to double // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], [[DIV_I]] // DEFAULT-NEXT:[[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] @@ -1821,7 +1821,7 @@ extern "C" __device__ float test_jnf(int x, float y) { // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// FINITEONLY-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double +// FINITEONLY-NEXT:[[CONV_I:%.*]] = uito
[clang] 6960ace - Revert "[InstCombine] Canonicalize `(sitofp x)` -> `(uitofp x)` if `x >= 0`"
Author: Noah Goldstein Date: 2024-03-20T00:50:45-05:00 New Revision: 6960ace534c4021301dd5a9933ca06ba96edea23 URL: https://github.com/llvm/llvm-project/commit/6960ace534c4021301dd5a9933ca06ba96edea23 DIFF: https://github.com/llvm/llvm-project/commit/6960ace534c4021301dd5a9933ca06ba96edea23.diff LOG: Revert "[InstCombine] Canonicalize `(sitofp x)` -> `(uitofp x)` if `x >= 0`" This reverts commit d80d5b923c6f611590a12543bdb33e0c16044d44. It wasn't a particularly important transform to begin with and caused some codegen regressions on targets that prefer `sitofp` so dropping. Might re-visit along with adding `nneg` flag to `uitofp` so its easily reversable for the backend. Added: Modified: clang/test/Headers/__clang_hip_math.hip llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp llvm/test/Transforms/InstCombine/add-sitofp.ll llvm/test/Transforms/InstCombine/binop-itofp.ll llvm/test/Transforms/InstCombine/clamp-to-minmax.ll llvm/test/Transforms/InstCombine/fpcast.ll llvm/test/Transforms/InstCombine/minmax-fold.ll llvm/test/Transforms/InstCombine/minmax-fp.ll llvm/test/Transforms/InstCombine/pr27236.ll llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll llvm/test/Transforms/LoopVectorize/float-induction.ll Removed: diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 701f93853ab93c..37099de74fb8ec 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -1685,7 +1685,7 @@ extern "C" __device__ double test_j1(double x) { // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float +// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // DEFAULT-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1718,7 +1718,7 @@ extern "C" __device__ double test_j1(double x) { // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// FINITEONLY-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float +// FINITEONLY-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float // FINITEONLY-NEXT:[[DIV_I:%.*]] = fdiv nnan ninf contract float [[CONV_I]], [[Y]] // FINITEONLY-NEXT:[[MUL8_I:%.*]] = fmul nnan ninf contract float [[__X1_0_I3]], [[DIV_I]] // FINITEONLY-NEXT:[[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1751,7 +1751,7 @@ extern "C" __device__ double test_j1(double x) { // APPROX-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // APPROX-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // APPROX-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// APPROX-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float +// APPROX-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float // APPROX-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] // APPROX-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // APPROX-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1788,7 +1788,7 @@ extern "C" __device__ float test_jnf(int x, float y) { // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to double +// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], [[DIV_I]] // DEFAULT-NEXT:[[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] @@ -1821,7 +1821,7 @@ extern "C" __device__ float test_jnf(int x, float y) { // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [
[clang] b6bd41d - [InstCombine] Add canonicalization of `sitofp` -> `uitofp nneg`
Author: Noah Goldstein Date: 2024-04-16T15:26:25-05:00 New Revision: b6bd41db31c798f3fc82368381fad6d42795f512 URL: https://github.com/llvm/llvm-project/commit/b6bd41db31c798f3fc82368381fad6d42795f512 DIFF: https://github.com/llvm/llvm-project/commit/b6bd41db31c798f3fc82368381fad6d42795f512.diff LOG: [InstCombine] Add canonicalization of `sitofp` -> `uitofp nneg` This is essentially the same as #82404 but has the `nneg` flag which allows the backend to reliably undo the transform. Closes #88299 Added: Modified: clang/test/Headers/__clang_hip_math.hip llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp llvm/test/Transforms/InstCombine/add-sitofp.ll llvm/test/Transforms/InstCombine/binop-itofp.ll llvm/test/Transforms/InstCombine/clamp-to-minmax.ll llvm/test/Transforms/InstCombine/fpcast.ll llvm/test/Transforms/InstCombine/minmax-fold.ll llvm/test/Transforms/InstCombine/minmax-fp.ll llvm/test/Transforms/InstCombine/pr27236.ll llvm/test/Transforms/InstCombine/sitofp.ll llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll llvm/test/Transforms/LoopVectorize/float-induction.ll Removed: diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 2e5f521a5feaed..1271868a53b866 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -1685,7 +1685,7 @@ extern "C" __device__ double test_j1(double x) { // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // DEFAULT-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1718,7 +1718,7 @@ extern "C" __device__ double test_j1(double x) { // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// FINITEONLY-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// FINITEONLY-NEXT:[[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // FINITEONLY-NEXT:[[DIV_I:%.*]] = fdiv nnan ninf contract float [[CONV_I]], [[Y]] // FINITEONLY-NEXT:[[MUL8_I:%.*]] = fmul nnan ninf contract float [[__X1_0_I3]], [[DIV_I]] // FINITEONLY-NEXT:[[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1751,7 +1751,7 @@ extern "C" __device__ double test_j1(double x) { // APPROX-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // APPROX-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // APPROX-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// APPROX-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float +// APPROX-NEXT:[[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float // APPROX-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]] // APPROX-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], [[DIV_I]] // APPROX-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]] @@ -1788,7 +1788,7 @@ extern "C" __device__ float test_jnf(int x, float y) { // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double +// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]] // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], [[DIV_I]] // DEFAULT-NEXT:[[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]] @@ -1821,7 +1821,7 @@ extern "C" __device__ float test_jnf(int x, float y) { // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], [[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], [[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ] // FINITEONLY-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1 -// FINITEONL