hliao updated this revision to Diff 272621.
hliao added a comment.

Rebase to trunk. All prerequisites are landed.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81938/new/

https://reviews.llvm.org/D81938

Files:
  clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
  llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
  llvm/test/Transforms/InferAddressSpaces/noop-ptrint-pair.ll

Index: llvm/test/Transforms/InferAddressSpaces/noop-ptrint-pair.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/InferAddressSpaces/noop-ptrint-pair.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -o - -infer-address-spaces %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+; CHECK-LABEL: @noop_ptrint_pair(
+; CHECK-NEXT: store i32 0, i32 addrspace(1)* %{{.*}}
+; CHECK-NEXT: ret void
+define void @noop_ptrint_pair(i32 addrspace(1)* %x.coerce) {
+  %1 = ptrtoint i32 addrspace(1)* %x.coerce to i64
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; CHECK-LABEL: @non_noop_ptrint_pair(
+; CHECK-NEXT: ptrtoint i32 addrspace(3)* %{{.*}} to i64
+; CHECK-NEXT: inttoptr i64 %{{.*}} to i32*
+; CHECK-NEXT: store i32 0, i32* %{{.*}}
+; CHECK-NEXT: ret void
+define void @non_noop_ptrint_pair(i32 addrspace(3)* %x.coerce) {
+  %1 = ptrtoint i32 addrspace(3)* %x.coerce to i64
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+@g = addrspace(1) global i32 0, align 4
+@l = addrspace(3) global i32 0, align 4
+
+; CHECK-LABEL: @noop_ptrint_pair_ce(
+; CHECK-NEXT: store i32 0, i32 addrspace(1)* @g
+; CHECK-NEXT: ret void
+define void @noop_ptrint_pair_ce() {
+  store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
+  ret void
+}
+
+; CHECK-LABEL: @non_noop_ptrint_pair_ce(
+; CHECK-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+; CHECK-NEXT: ret void
+define void @non_noop_ptrint_pair_ce() {
+  store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+  ret void
+}
Index: llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -143,6 +143,7 @@
 /// InferAddressSpaces
 class InferAddressSpaces : public FunctionPass {
   const TargetTransformInfo *TTI = nullptr;
+  const DataLayout *DL = nullptr;
 
   /// Target specific address space which uses of should be replaced if
   /// possible.
@@ -219,10 +220,30 @@
 INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
                 false, false)
 
+static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout *DL,
+                                 const TargetTransformInfo *TTI) {
+  assert(I2P->getOpcode() == Instruction::IntToPtr);
+  auto *P2I = dyn_cast<Operator>(I2P->getOperand(0));
+  if (!P2I || P2I->getOpcode() != Instruction::PtrToInt)
+    return false;
+  // The pair of `ptrtoint`/`inttoptr` is a no-op cast if both of them are
+  // no-op casts.
+  return CastInst::isNoopCast(Instruction::CastOps(I2P->getOpcode()),
+                              I2P->getOperand(0)->getType(), I2P->getType(),
+                              *DL) &&
+         CastInst::isNoopCast(Instruction::CastOps(P2I->getOpcode()),
+                              P2I->getOperand(0)->getType(), P2I->getType(),
+                              *DL) &&
+         TTI->isNoopAddrSpaceCast(
+             P2I->getOperand(0)->getType()->getPointerAddressSpace(),
+             I2P->getType()->getPointerAddressSpace());
+}
+
 // Returns true if V is an address expression.
 // TODO: Currently, we consider only phi, bitcast, addrspacecast, and
 // getelementptr operators.
-static bool isAddressExpression(const Value &V) {
+static bool isAddressExpression(const Value &V, const DataLayout *DL,
+                                const TargetTransformInfo *TTI) {
   const Operator *Op = dyn_cast<Operator>(&V);
   if (!Op)
     return false;
@@ -241,6 +262,8 @@
     const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&V);
     return II && II->getIntrinsicID() == Intrinsic::ptrmask;
   }
+  case Instruction::IntToPtr:
+    return isNoopPtrIntCastPair(Op, DL, TTI);
   default:
     return false;
   }
@@ -249,7 +272,9 @@
 // Returns the pointer operands of V.
 //
 // Precondition: V is an address expression.
-static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
+static SmallVector<Value *, 2>
+getPointerOperands(const Value &V, const DataLayout *DL,
+                   const TargetTransformInfo *TTI) {
   const Operator &Op = cast<Operator>(V);
   switch (Op.getOpcode()) {
   case Instruction::PHI: {
@@ -269,6 +294,11 @@
            "unexpected intrinsic call");
     return {II.getArgOperand(0)};
   }
+  case Instruction::IntToPtr: {
+    assert(isNoopPtrIntCastPair(&Op, DL, TTI));
+    auto *P2I = cast<Operator>(Op.getOperand(0));
+    return {P2I->getOperand(0)};
+  }
   default:
     llvm_unreachable("Unexpected instruction type.");
   }
@@ -337,13 +367,13 @@
   // expressions.
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     // TODO: Look in non-address parts, like icmp operands.
-    if (isAddressExpression(*CE) && Visited.insert(CE).second)
+    if (isAddressExpression(*CE, DL, TTI) && Visited.insert(CE).second)
       PostorderStack.emplace_back(CE, false);
 
     return;
   }
 
-  if (isAddressExpression(*V) &&
+  if (isAddressExpression(*V, DL, TTI) &&
       V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
     if (Visited.insert(V).second) {
       PostorderStack.emplace_back(V, false);
@@ -351,7 +381,7 @@
       Operator *Op = cast<Operator>(V);
       for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
         if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
-          if (isAddressExpression(*CE) && Visited.insert(CE).second)
+          if (isAddressExpression(*CE, DL, TTI) && Visited.insert(CE).second)
             PostorderStack.emplace_back(CE, false);
         }
       }
@@ -407,6 +437,10 @@
     } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
       if (!ASC->getType()->isVectorTy())
         PushPtrOperand(ASC->getPointerOperand());
+    } else if (auto *I2P = dyn_cast<IntToPtrInst>(&I)) {
+      if (isNoopPtrIntCastPair(cast<Operator>(I2P), DL, TTI))
+        PushPtrOperand(
+            cast<PtrToIntInst>(I2P->getOperand(0))->getPointerOperand());
     }
   }
 
@@ -423,7 +457,7 @@
     }
     // Otherwise, adds its operands to the stack and explores them.
     PostorderStack.back().setInt(true);
-    for (Value *PtrOperand : getPointerOperands(*TopVal)) {
+    for (Value *PtrOperand : getPointerOperands(*TopVal, DL, TTI)) {
       appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
                                                    Visited);
     }
@@ -536,6 +570,14 @@
     assert(I->getType()->isPointerTy());
     return SelectInst::Create(I->getOperand(0), NewPointerOperands[1],
                               NewPointerOperands[2], "", nullptr, I);
+  case Instruction::IntToPtr: {
+    assert(isNoopPtrIntCastPair(cast<Operator>(I), DL, TTI));
+    Value *Src = cast<Operator>(I->getOperand(0))->getOperand(0);
+    assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
+    if (Src->getType() != NewPtrType)
+      return new BitCastInst(Src, NewPtrType);
+    return Src;
+  }
   default:
     llvm_unreachable("Unexpected opcode");
   }
@@ -545,8 +587,9 @@
 // constant expression `CE` with its operands replaced as specified in
 // ValueWithNewAddrSpace.
 static Value *cloneConstantExprWithNewAddressSpace(
-  ConstantExpr *CE, unsigned NewAddrSpace,
-  const ValueToValueMapTy &ValueWithNewAddrSpace) {
+    ConstantExpr *CE, unsigned NewAddrSpace,
+    const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,
+    const TargetTransformInfo *TTI) {
   Type *TargetType =
     CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
 
@@ -577,6 +620,13 @@
     }
   }
 
+  if (CE->getOpcode() == Instruction::IntToPtr) {
+    assert(isNoopPtrIntCastPair(cast<Operator>(CE), DL, TTI));
+    Constant *Src = cast<ConstantExpr>(CE->getOperand(0))->getOperand(0);
+    assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
+    return ConstantExpr::getBitCast(Src, TargetType);
+  }
+
   // Computes the operands of the new constant expression.
   bool IsNew = false;
   SmallVector<Constant *, 4> NewOperands;
@@ -594,7 +644,7 @@
     }
     if (auto CExpr = dyn_cast<ConstantExpr>(Operand))
       if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
-              CExpr, NewAddrSpace, ValueWithNewAddrSpace)) {
+              CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) {
         IsNew = true;
         NewOperands.push_back(cast<Constant>(NewOperand));
         continue;
@@ -629,7 +679,7 @@
   const ValueToValueMapTy &ValueWithNewAddrSpace,
   SmallVectorImpl<const Use *> *UndefUsesToFix) const {
   // All values in Postorder are flat address expressions.
-  assert(isAddressExpression(*V) &&
+  assert(isAddressExpression(*V, DL, TTI) &&
          V->getType()->getPointerAddressSpace() == FlatAddrSpace);
 
   if (Instruction *I = dyn_cast<Instruction>(V)) {
@@ -645,7 +695,7 @@
   }
 
   return cloneConstantExprWithNewAddressSpace(
-    cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace);
+      cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace, DL, TTI);
 }
 
 // Defines the join operation on the address space lattice (see the file header
@@ -669,6 +719,7 @@
     return false;
 
   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  DL = &F.getParent()->getDataLayout();
 
   if (FlatAddrSpace == UninitializedAddressSpace) {
     FlatAddrSpace = TTI->getFlatAddressSpace();
@@ -773,7 +824,7 @@
     else
       NewAS = joinAddressSpaces(Src0AS, Src1AS);
   } else {
-    for (Value *PtrOperand : getPointerOperands(V)) {
+    for (Value *PtrOperand : getPointerOperands(V, DL, TTI)) {
       auto I = InferredAddrSpace.find(PtrOperand);
       unsigned OperandAS = I != InferredAddrSpace.end() ?
         I->second : PtrOperand->getType()->getPointerAddressSpace();
Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
===================================================================
--- clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -1,37 +1,52 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
 
 #include "Inputs/cuda.h"
 
 // Coerced struct from `struct S` without all generic pointers lowered into
 // global ones.
-// CHECK: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* }
-// CHECK: %struct.T.coerce = type { [2 x float addrspace(1)*] }
+// COMMON: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* }
+// COMMON: %struct.T.coerce = type { [2 x float addrspace(1)*] }
 
 // On the host-side compilation, generic pointer won't be coerced.
 // HOST-NOT: %struct.S.coerce
 // HOST-NOT: %struct.T.coerce
 
-// CHECK: define amdgpu_kernel void  @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
 // HOST: define void @_Z22__device_stub__kernel1Pi(i32* %x)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce)
+// CHECK:     = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// OPT-NOT: alloca
+// OPT-NOT: inttoptr
 __global__ void kernel1(int *x) {
   x[0]++;
 }
 
-// CHECK: define amdgpu_kernel void  @_Z7kernel2Ri(i32 addrspace(1)* nonnull align 4 dereferenceable(4) %x.coerce)
 // HOST: define void @_Z22__device_stub__kernel2Ri(i32* nonnull align 4 dereferenceable(4) %x)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce)
+// CHECK:     = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// OPT-NOT: alloca
+// OPT-NOT: ptrtoint
+// OPT-NOT: inttoptr
 __global__ void kernel2(int &x) {
   x++;
 }
 
-// CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
 // HOST: define void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
+// CHECK-LABEL: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y)
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
 __global__ void kernel3(__attribute__((address_space(2))) int *x,
                         __attribute__((address_space(1))) int *y) {
   y[0] = x[0];
 }
 
-// CHECK: define void @_Z4funcPi(i32* %x)
+// COMMON-LABEL: define void @_Z4funcPi(i32*{{.*}} %x)
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// OPT-NOT: alloca
+// OPT-NOT: ptrtoint
+// OPT-NOT: inttoptr
 __device__ void func(int *x) {
   x[0]++;
 }
@@ -42,16 +57,22 @@
 };
 // `by-val` struct will be coerced into a similar struct with all generic
 // pointers lowerd into global ones.
-// CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
 // HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
+// OPT-NOT: alloca
+// OPT-NOT: ptrtoint
+// OPT-NOT: inttoptr
 __global__ void kernel4(struct S s) {
   s.x[0]++;
   s.y[0] += 1.f;
 }
 
 // If a pointer to struct is passed, only the pointer itself is coerced into the global one.
-// CHECK: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)* %s.coerce)
 // HOST: define void @_Z22__device_stub__kernel5P1S(%struct.S* %s)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)*{{.*}} %s.coerce)
+// OPT-NOT: alloca
+// OPT-NOT: ptrtoint
+// OPT-NOT: inttoptr
 __global__ void kernel5(struct S *s) {
   s->x[0]++;
   s->y[0] += 1.f;
@@ -61,16 +82,22 @@
   float *x[2];
 };
 // `by-val` array is also coerced.
-// CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
 // HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
+// OPT-NOT: alloca
+// OPT-NOT: ptrtoint
+// OPT-NOT: inttoptr
 __global__ void kernel6(struct T t) {
   t.x[0][0] += 1.f;
   t.x[1][0] += 2.f;
 }
 
 // Check that coerced pointers retain the noalias attribute when qualified with __restrict.
-// CHECK: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias %x.coerce)
 // HOST: define void @_Z22__device_stub__kernel7Pi(i32* noalias %x)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias{{.*}} %x.coerce)
+// OPT-NOT: alloca
+// OPT-NOT: ptrtoint
+// OPT-NOT: inttoptr
 __global__ void kernel7(int *__restrict x) {
   x[0]++;
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to