[clang] 590884a - [Clang][CodeGen] Stoping emitting alignment assumes for `align_{up,down}`

2023-11-06 Thread Noah Goldstein via cfe-commits

Author: Noah Goldstein
Date: 2023-11-07T00:31:04-06:00
New Revision: 590884a860ccc5fce50bf0a7eba91ec1bfe71fb6

URL: 
https://github.com/llvm/llvm-project/commit/590884a860ccc5fce50bf0a7eba91ec1bfe71fb6
DIFF: 
https://github.com/llvm/llvm-project/commit/590884a860ccc5fce50bf0a7eba91ec1bfe71fb6.diff

LOG: [Clang][CodeGen] Stoping emitting alignment assumes for `align_{up,down}`

Now that `align_{up,down}` use `llvm.ptrmask` (as of #71238), the
assume doesn't preserve any information that is not still easily
re-computable.

Closes #71295

Added: 


Modified: 
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/builtin-align-array.c
clang/test/CodeGen/builtin-align.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 31edf575caae9fe..5ab81cc605819c3 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19696,10 +19696,6 @@ RValue CodeGenFunction::EmitBuiltinAlignTo(const 
CallExpr *E, bool AlignUp) {
 Result = Builder.CreateIntrinsic(
 Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
 {SrcForMask, InvertedMask}, nullptr, "aligned_result");
-
-// Emit an alignment assumption to ensure that the new alignment is
-// propagated to loads/stores, etc.
-emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
   } else {
 Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
   }

diff  --git a/clang/test/CodeGen/builtin-align-array.c 
b/clang/test/CodeGen/builtin-align-array.c
index 18a77b9a710db40..cbe6641f672eb43 100644
--- a/clang/test/CodeGen/builtin-align-array.c
+++ b/clang/test/CodeGen/builtin-align-array.c
@@ -9,12 +9,10 @@ extern int func(char *c);
 // CHECK-NEXT:[[BUF:%.*]] = alloca [1024 x i8], align 16
 // CHECK-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr 
[[BUF]], i64 0, i64 44
 // CHECK-NEXT:[[ALIGNED_RESULT:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr 
[[ARRAYIDX]], i64 -16)
-// CHECK-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr 
[[ALIGNED_RESULT]], i64 16) ]
 // CHECK-NEXT:[[CALL:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT]])
 // CHECK-NEXT:[[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr 
[[BUF]], i64 0, i64 22
 // CHECK-NEXT:[[OVER_BOUNDARY:%.*]] = getelementptr inbounds i8, ptr 
[[ARRAYIDX1]], i64 31
 // CHECK-NEXT:[[ALIGNED_RESULT2:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr 
[[OVER_BOUNDARY]], i64 -32)
-// CHECK-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr 
[[ALIGNED_RESULT2]], i64 32) ]
 // CHECK-NEXT:[[CALL3:%.*]] = call i32 @func(ptr noundef 
[[ALIGNED_RESULT2]])
 // CHECK-NEXT:[[ARRAYIDX4:%.*]] = getelementptr inbounds [1024 x i8], ptr 
[[BUF]], i64 0, i64 16
 // CHECK-NEXT:[[SRC_ADDR:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64
@@ -35,12 +33,10 @@ int test_array(void) {
 // CHECK-NEXT:[[BUF:%.*]] = alloca [1024 x i8], align 32
 // CHECK-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr 
[[BUF]], i64 0, i64 64
 // CHECK-NEXT:[[ALIGNED_RESULT:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr 
[[ARRAYIDX]], i64 -16)
-// CHECK-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr 
[[ALIGNED_RESULT]], i64 16) ]
 // CHECK-NEXT:[[CALL:%.*]] = call i32 @func(ptr noundef [[ALIGNED_RESULT]])
 // CHECK-NEXT:[[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr 
[[BUF]], i64 0, i64 32
 // CHECK-NEXT:[[OVER_BOUNDARY:%.*]] = getelementptr inbounds i8, ptr 
[[ARRAYIDX1]], i64 31
 // CHECK-NEXT:[[ALIGNED_RESULT2:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr 
[[OVER_BOUNDARY]], i64 -32)
-// CHECK-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr 
[[ALIGNED_RESULT2]], i64 32) ]
 // CHECK-NEXT:[[CALL3:%.*]] = call i32 @func(ptr noundef 
[[ALIGNED_RESULT2]])
 // CHECK-NEXT:ret i32 1
 //

diff  --git a/clang/test/CodeGen/builtin-align.c 
b/clang/test/CodeGen/builtin-align.c
index b58d47078799eae..932b93972a85e66 100644
--- a/clang/test/CodeGen/builtin-align.c
+++ b/clang/test/CodeGen/builtin-align.c
@@ -119,7 +119,6 @@ _Bool is_aligned(TYPE ptr, unsigned align) {
 // CHECK-VOID_PTR-NEXT:[[OVER_BOUNDARY:%.*]] = getelementptr inbounds i8, 
ptr [[PTR:%.*]], i64 [[MASK]]
 // CHECK-VOID_PTR-NEXT:[[INVERTED_MASK:%.*]] = xor i64 [[MASK]], -1
 // CHECK-VOID_PTR-NEXT:[[ALIGNED_RESULT:%.*]] = call ptr 
@llvm.ptrmask.p0.i64(ptr [[OVER_BOUNDARY]], i64 [[INVERTED_MASK]])
-// CHECK-VOID_PTR-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr 
[[ALIGNED_RESULT]], i64 [[ALIGNMENT]]) ]
 // CHECK-VOID_PTR-NEXT:ret ptr [[ALIGNED_RESULT]]
 //
 // CHECK-FLOAT_PTR-LABEL: define {{[^@]+}}@align_up
@@ -130,7 +129,6 @@ _Bool is_aligned(TYPE ptr, unsigned align) {
 // CHECK-FLOAT_PTR-NEXT:[[OVER_BOUNDARY:%.*]] = getelementptr inbounds i8, 
ptr [[PTR:%.*]], i64 [[MASK]]
 // CHECK-FLOAT_PTR-NEXT:[[INVERTED_MASK:%.*]] = xor i64 [

[clang] 51abbf9 - [InstCombine] Deduce `align` and `nonnull` return attributes for `llvm.ptrmask`

2023-11-01 Thread Noah Goldstein via cfe-commits

Author: Noah Goldstein
Date: 2023-11-01T23:50:35-05:00
New Revision: 51abbf98d19cb1b89c6938811f2805bafe4b336e

URL: 
https://github.com/llvm/llvm-project/commit/51abbf98d19cb1b89c6938811f2805bafe4b336e
DIFF: 
https://github.com/llvm/llvm-project/commit/51abbf98d19cb1b89c6938811f2805bafe4b336e.diff

LOG: [InstCombine] Deduce `align` and `nonnull` return attributes for 
`llvm.ptrmask`

We can deduce the former based on the mask / incoming pointer
alignment.  We can set the latter based if know the result in non-zero
(this is essentially just caching our analysis result).

Differential Revision: https://reviews.llvm.org/D156636

Added: 


Modified: 
clang/test/CodeGen/arm64_32-vaarg.c
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/test/Transforms/InstCombine/align-addr.ll
llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
llvm/test/Transforms/InstCombine/ptrmask.ll

Removed: 




diff  --git a/clang/test/CodeGen/arm64_32-vaarg.c 
b/clang/test/CodeGen/arm64_32-vaarg.c
index 9fbcf88ecfdcc33..3f1f4443436da15 100644
--- a/clang/test/CodeGen/arm64_32-vaarg.c
+++ b/clang/test/CodeGen/arm64_32-vaarg.c
@@ -29,7 +29,7 @@ long long test_longlong(OneLongLong input, va_list *mylist) {
   // CHECK-LABEL: define{{.*}} i64 @test_longlong(i64 %input
   // CHECK: [[STARTPTR:%.*]] = load ptr, ptr %mylist
   // CHECK: [[ALIGN_TMP:%.+]] = getelementptr inbounds i8, ptr [[STARTPTR]], 
i32 7
-  // CHECK: [[ALIGNED_ADDR:%.+]] = tail call ptr @llvm.ptrmask.p0.i32(ptr 
nonnull [[ALIGN_TMP]], i32 -8)
+  // CHECK: [[ALIGNED_ADDR:%.+]] = tail call align 8 ptr 
@llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8)
   // CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, ptr [[ALIGNED_ADDR]], 
i32 8
   // CHECK: store ptr [[NEXT]], ptr %mylist
 

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 5c08ab190eba476..10a8bff700b7366 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1978,6 +1978,27 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst 
&CI) {
   *II, Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask,
{InnerPtr, NewMask}));
 }
+bool Changed = false;
+// See if we can deduce non-null.
+if (!CI.hasRetAttr(Attribute::NonNull) &&
+(Known.isNonZero() ||
+ isKnownNonZero(II, DL, /*Depth*/ 0, &AC, II, &DT))) {
+  CI.addRetAttr(Attribute::NonNull);
+  Changed = true;
+}
+
+unsigned NewAlignmentLog =
+std::min(Value::MaxAlignmentExponent,
+ std::min(BitWidth - 1, Known.countMinTrailingZeros()));
+// Known bits will capture if we had alignment information associated with
+// the pointer argument.
+if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
+  CI.addRetAttr(Attribute::getWithAlignment(
+  CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
+  Changed = true;
+}
+if (Changed)
+  return &CI;
 break;
   }
   case Intrinsic::uadd_with_overflow:

diff  --git a/llvm/test/Transforms/InstCombine/align-addr.ll 
b/llvm/test/Transforms/InstCombine/align-addr.ll
index 1e49cddf7ffe79d..facb5df08a82f43 100644
--- a/llvm/test/Transforms/InstCombine/align-addr.ll
+++ b/llvm/test/Transforms/InstCombine/align-addr.ll
@@ -135,7 +135,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align1(ptr 
align 1 %ptr, i64 %mask)
 
 define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 
%mask) {
 ; CHECK-LABEL: @ptrmask_align_unknown_ptr_align8(
-; CHECK-NEXT:[[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr 
[[PTR:%.*]], i64 [[MASK:%.*]])
+; CHECK-NEXT:[[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr 
[[PTR:%.*]], i64 [[MASK:%.*]])
 ; CHECK-NEXT:[[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
 ; CHECK-NEXT:ret <16 x i8> [[LOAD]]
 ;
@@ -147,7 +147,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr 
align 8 %ptr, i64 %mask)
 ; Increase load align from 1 to 2
 define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) {
 ; CHECK-LABEL: @ptrmask_align2_ptr_align1(
-; CHECK-NEXT:[[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr 
[[PTR:%.*]], i64 -2)
+; CHECK-NEXT:[[ALIGNED:%.*]] = call align 2 ptr @llvm.ptrmask.p0.i64(ptr 
[[PTR:%.*]], i64 -2)
 ; CHECK-NEXT:[[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
 ; CHECK-NEXT:ret <16 x i8> [[LOAD]]
 ;
@@ -159,7 +159,7 @@ define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 
%ptr) {
 ; Increase load align from 1 to 4
 define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) {
 ; CHECK-LABEL: @ptrmask_align4_ptr_align1(
-; CHECK-NEXT:[[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr 
[[PTR:%.*]], i64 -4)
+; CHECK-NEXT:[[ALIGNED:%.*]] = call align

[clang] 71be514 - [Clang][CodeGen] Emit `llvm.ptrmask` for `align_up` and `align_down`

2023-11-04 Thread Noah Goldstein via cfe-commits

Author: Noah Goldstein
Date: 2023-11-04T14:20:54-05:00
New Revision: 71be514fa0af996745186816735d69fa8a26f3c9

URL: 
https://github.com/llvm/llvm-project/commit/71be514fa0af996745186816735d69fa8a26f3c9
DIFF: 
https://github.com/llvm/llvm-project/commit/71be514fa0af996745186816735d69fa8a26f3c9.diff

LOG: [Clang][CodeGen] Emit `llvm.ptrmask` for `align_up` and `align_down`

Since PR's #69343 and #67166 we probably have enough support for
`llvm.ptrmask` to make it preferable to the GEP stategy.

Closes #71238

Added: 


Modified: 
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/builtin-align-array.c
clang/test/CodeGen/builtin-align.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 972aa1c708e5f65..978f6ffd145741d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19668,44 +19668,40 @@ RValue CodeGenFunction::EmitBuiltinIsAligned(const 
CallExpr *E) {
 /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.
 /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the
 /// llvm.ptrmask intrinsic (with a GEP before in the align_up case).
-/// TODO: actually use ptrmask once most optimization passes know about it.
 RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {
   BuiltinAlignArgs Args(E, *this);
-  llvm::Value *SrcAddr = Args.Src;
-  if (Args.Src->getType()->isPointerTy())
-SrcAddr = Builder.CreatePtrToInt(Args.Src, Args.IntType, "intptr");
-  llvm::Value *SrcForMask = SrcAddr;
+  llvm::Value *SrcForMask = Args.Src;
   if (AlignUp) {
 // When aligning up we have to first add the mask to ensure we go over the
 // next alignment value and then align down to the next valid multiple.
 // By adding the mask, we ensure that align_up on an already aligned
 // value will not change the value.
-SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
+if (Args.Src->getType()->isPointerTy()) {
+  if (getLangOpts().isSignedOverflowDefined())
+SrcForMask =
+Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");
+  else
+SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,
+/*SignedIndices=*/true,
+/*isSubtraction=*/false,
+E->getExprLoc(), "over_boundary");
+} else {
+  SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");
+}
   }
   // Invert the mask to only clear the lower bits.
   llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");
-  llvm::Value *Result =
-  Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
+  llvm::Value *Result = nullptr;
   if (Args.Src->getType()->isPointerTy()) {
-/// TODO: Use ptrmask instead of ptrtoint+gep once it is optimized well.
-// Result = Builder.CreateIntrinsic(
-//  Intrinsic::ptrmask, {Args.SrcType, SrcForMask->getType(), 
Args.IntType},
-//  {SrcForMask, NegatedMask}, nullptr, "aligned_result");
-Result->setName("aligned_intptr");
-llvm::Value *Difference = Builder.CreateSub(Result, SrcAddr, "
diff ");
-// The result must point to the same underlying allocation. This means we
-// can use an inbounds GEP to enable better optimization.
-if (getLangOpts().isSignedOverflowDefined())
-  Result =
-  Builder.CreateGEP(Int8Ty, Args.Src, Difference, "aligned_result");
-else
-  Result = EmitCheckedInBoundsGEP(Int8Ty, Args.Src, Difference,
-  /*SignedIndices=*/true,
-  /*isSubtraction=*/!AlignUp,
-  E->getExprLoc(), "aligned_result");
+Result = Builder.CreateIntrinsic(
+Intrinsic::ptrmask, {Args.SrcType, Args.IntType},
+{SrcForMask, InvertedMask}, nullptr, "aligned_result");
+
 // Emit an alignment assumption to ensure that the new alignment is
 // propagated to loads/stores, etc.
 emitAlignmentAssumption(Result, E, E->getExprLoc(), Args.Alignment);
+  } else {
+Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
   }
   assert(Result->getType() == Args.SrcType);
   return RValue::get(Result);

diff  --git a/clang/test/CodeGen/builtin-align-array.c 
b/clang/test/CodeGen/builtin-align-array.c
index 5d1377b98d2814f..18a77b9a710db40 100644
--- a/clang/test/CodeGen/builtin-align-array.c
+++ b/clang/test/CodeGen/builtin-align-array.c
@@ -8,22 +8,16 @@ extern int func(char *c);
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[BUF:%.*]] = alloca [1024 x i8], align 16
 // CHECK-NEXT:[[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr 
[[BUF]], i64 0, i64 44
-// CHECK-NEXT:[[INTPTR:%.*]] = ptrtoint ptr [[ARRAYIDX]] to

[clang] d80d5b9 - [InstCombine] Canonicalize `(sitofp x)` -> `(uitofp x)` if `x >= 0`

2024-03-13 Thread Noah Goldstein via cfe-commits

Author: Noah Goldstein
Date: 2024-03-13T18:26:21-05:00
New Revision: d80d5b923c6f611590a12543bdb33e0c16044d44

URL: 
https://github.com/llvm/llvm-project/commit/d80d5b923c6f611590a12543bdb33e0c16044d44
DIFF: 
https://github.com/llvm/llvm-project/commit/d80d5b923c6f611590a12543bdb33e0c16044d44.diff

LOG: [InstCombine] Canonicalize `(sitofp x)` -> `(uitofp x)` if `x >= 0`

Just a standard canonicalization.

Proofs: https://alive2.llvm.org/ce/z/9W4VFm

Closes #82404

Added: 


Modified: 
clang/test/Headers/__clang_hip_math.hip
llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
llvm/test/Transforms/InstCombine/add-sitofp.ll
llvm/test/Transforms/InstCombine/binop-itofp.ll
llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
llvm/test/Transforms/InstCombine/fpcast.ll
llvm/test/Transforms/InstCombine/minmax-fold.ll
llvm/test/Transforms/InstCombine/minmax-fp.ll
llvm/test/Transforms/InstCombine/pr27236.ll
llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
llvm/test/Transforms/LoopVectorize/float-induction.ll

Removed: 




diff  --git a/clang/test/Headers/__clang_hip_math.hip 
b/clang/test/Headers/__clang_hip_math.hip
index 37099de74fb8ec..701f93853ab93c 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -1685,7 +1685,7 @@ extern "C" __device__ double test_j1(double x) {
 // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float
+// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float
 // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
 // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], 
[[DIV_I]]
 // DEFAULT-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
@@ -1718,7 +1718,7 @@ extern "C" __device__ double test_j1(double x) {
 // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // FINITEONLY-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// FINITEONLY-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float
+// FINITEONLY-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float
 // FINITEONLY-NEXT:[[DIV_I:%.*]] = fdiv nnan ninf contract float 
[[CONV_I]], [[Y]]
 // FINITEONLY-NEXT:[[MUL8_I:%.*]] = fmul nnan ninf contract float 
[[__X1_0_I3]], [[DIV_I]]
 // FINITEONLY-NEXT:[[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], 
[[__X0_0_I2]]
@@ -1751,7 +1751,7 @@ extern "C" __device__ double test_j1(double x) {
 // APPROX-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // APPROX-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // APPROX-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// APPROX-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float
+// APPROX-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float
 // APPROX-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
 // APPROX-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], 
[[DIV_I]]
 // APPROX-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
@@ -1788,7 +1788,7 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double
+// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to double
 // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]]
 // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], 
[[DIV_I]]
 // DEFAULT-NEXT:[[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]]
@@ -1821,7 +1821,7 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // FINITEONLY-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// FINITEONLY-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double
+// FINITEONLY-NEXT:[[CONV_I:%.*]] = uito

[clang] 6960ace - Revert "[InstCombine] Canonicalize `(sitofp x)` -> `(uitofp x)` if `x >= 0`"

2024-03-19 Thread Noah Goldstein via cfe-commits

Author: Noah Goldstein
Date: 2024-03-20T00:50:45-05:00
New Revision: 6960ace534c4021301dd5a9933ca06ba96edea23

URL: 
https://github.com/llvm/llvm-project/commit/6960ace534c4021301dd5a9933ca06ba96edea23
DIFF: 
https://github.com/llvm/llvm-project/commit/6960ace534c4021301dd5a9933ca06ba96edea23.diff

LOG: Revert "[InstCombine] Canonicalize `(sitofp x)` -> `(uitofp x)` if `x >= 
0`"

This reverts commit d80d5b923c6f611590a12543bdb33e0c16044d44.

It wasn't a particularly important transform to begin with and caused
some codegen regressions on targets that prefer `sitofp` so dropping.

Might re-visit along with adding `nneg` flag to `uitofp` so its easily
reversable for the backend.

Added: 


Modified: 
clang/test/Headers/__clang_hip_math.hip
llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
llvm/test/Transforms/InstCombine/add-sitofp.ll
llvm/test/Transforms/InstCombine/binop-itofp.ll
llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
llvm/test/Transforms/InstCombine/fpcast.ll
llvm/test/Transforms/InstCombine/minmax-fold.ll
llvm/test/Transforms/InstCombine/minmax-fp.ll
llvm/test/Transforms/InstCombine/pr27236.ll
llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
llvm/test/Transforms/LoopVectorize/float-induction.ll

Removed: 




diff  --git a/clang/test/Headers/__clang_hip_math.hip 
b/clang/test/Headers/__clang_hip_math.hip
index 701f93853ab93c..37099de74fb8ec 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -1685,7 +1685,7 @@ extern "C" __device__ double test_j1(double x) {
 // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float
+// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float
 // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
 // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], 
[[DIV_I]]
 // DEFAULT-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
@@ -1718,7 +1718,7 @@ extern "C" __device__ double test_j1(double x) {
 // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // FINITEONLY-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// FINITEONLY-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float
+// FINITEONLY-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float
 // FINITEONLY-NEXT:[[DIV_I:%.*]] = fdiv nnan ninf contract float 
[[CONV_I]], [[Y]]
 // FINITEONLY-NEXT:[[MUL8_I:%.*]] = fmul nnan ninf contract float 
[[__X1_0_I3]], [[DIV_I]]
 // FINITEONLY-NEXT:[[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], 
[[__X0_0_I2]]
@@ -1751,7 +1751,7 @@ extern "C" __device__ double test_j1(double x) {
 // APPROX-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // APPROX-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // APPROX-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// APPROX-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to float
+// APPROX-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float
 // APPROX-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
 // APPROX-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], 
[[DIV_I]]
 // APPROX-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
@@ -1788,7 +1788,7 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp i32 [[MUL_I]] to double
+// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double
 // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]]
 // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], 
[[DIV_I]]
 // DEFAULT-NEXT:[[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]]
@@ -1821,7 +1821,7 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ 

[clang] b6bd41d - [InstCombine] Add canonicalization of `sitofp` -> `uitofp nneg`

2024-04-16 Thread Noah Goldstein via cfe-commits

Author: Noah Goldstein
Date: 2024-04-16T15:26:25-05:00
New Revision: b6bd41db31c798f3fc82368381fad6d42795f512

URL: 
https://github.com/llvm/llvm-project/commit/b6bd41db31c798f3fc82368381fad6d42795f512
DIFF: 
https://github.com/llvm/llvm-project/commit/b6bd41db31c798f3fc82368381fad6d42795f512.diff

LOG: [InstCombine] Add canonicalization of `sitofp` -> `uitofp nneg`

This is essentially the same as #82404 but has the `nneg` flag which
allows the backend to reliably undo the transform.

Closes #88299

Added: 


Modified: 
clang/test/Headers/__clang_hip_math.hip
llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
llvm/test/Transforms/InstCombine/add-sitofp.ll
llvm/test/Transforms/InstCombine/binop-itofp.ll
llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
llvm/test/Transforms/InstCombine/fpcast.ll
llvm/test/Transforms/InstCombine/minmax-fold.ll
llvm/test/Transforms/InstCombine/minmax-fp.ll
llvm/test/Transforms/InstCombine/pr27236.ll
llvm/test/Transforms/InstCombine/sitofp.ll
llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
llvm/test/Transforms/LoopVectorize/float-induction.ll

Removed: 




diff  --git a/clang/test/Headers/__clang_hip_math.hip 
b/clang/test/Headers/__clang_hip_math.hip
index 2e5f521a5feaed..1271868a53b866 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -1685,7 +1685,7 @@ extern "C" __device__ double test_j1(double x) {
 // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float
+// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
 // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], 
[[DIV_I]]
 // DEFAULT-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
@@ -1718,7 +1718,7 @@ extern "C" __device__ double test_j1(double x) {
 // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // FINITEONLY-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// FINITEONLY-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float
+// FINITEONLY-NEXT:[[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // FINITEONLY-NEXT:[[DIV_I:%.*]] = fdiv nnan ninf contract float 
[[CONV_I]], [[Y]]
 // FINITEONLY-NEXT:[[MUL8_I:%.*]] = fmul nnan ninf contract float 
[[__X1_0_I3]], [[DIV_I]]
 // FINITEONLY-NEXT:[[SUB_I]] = fsub nnan ninf contract float [[MUL8_I]], 
[[__X0_0_I2]]
@@ -1751,7 +1751,7 @@ extern "C" __device__ double test_j1(double x) {
 // APPROX-NEXT:[[__X1_0_I3:%.*]] = phi float [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // APPROX-NEXT:[[__X0_0_I2:%.*]] = phi float [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // APPROX-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// APPROX-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to float
+// APPROX-NEXT:[[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to float
 // APPROX-NEXT:[[DIV_I:%.*]] = fdiv contract float [[CONV_I]], [[Y]]
 // APPROX-NEXT:[[MUL8_I:%.*]] = fmul contract float [[__X1_0_I3]], 
[[DIV_I]]
 // APPROX-NEXT:[[SUB_I]] = fsub contract float [[MUL8_I]], [[__X0_0_I2]]
@@ -1788,7 +1788,7 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // DEFAULT-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // DEFAULT-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// DEFAULT-NEXT:[[CONV_I:%.*]] = sitofp i32 [[MUL_I]] to double
+// DEFAULT-NEXT:[[CONV_I:%.*]] = uitofp nneg i32 [[MUL_I]] to double
 // DEFAULT-NEXT:[[DIV_I:%.*]] = fdiv contract double [[CONV_I]], [[Y]]
 // DEFAULT-NEXT:[[MUL8_I:%.*]] = fmul contract double [[__X1_0_I3]], 
[[DIV_I]]
 // DEFAULT-NEXT:[[SUB_I]] = fsub contract double [[MUL8_I]], [[__X0_0_I2]]
@@ -1821,7 +1821,7 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // FINITEONLY-NEXT:[[__X1_0_I3:%.*]] = phi double [ [[SUB_I:%.*]], 
[[FOR_BODY_I]] ], [ [[CALL_I21_I]], [[IF_END4_I]] ]
 // FINITEONLY-NEXT:[[__X0_0_I2:%.*]] = phi double [ [[__X1_0_I3]], 
[[FOR_BODY_I]] ], [ [[CALL_I_I]], [[IF_END4_I]] ]
 // FINITEONLY-NEXT:[[MUL_I:%.*]] = shl nuw nsw i32 [[__I_0_I4]], 1
-// FINITEONL