hliao updated this revision to Diff 227795.
hliao added a comment.
- revise member function name.
- add the test case for by-val array types.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D69826/new/
https://reviews.llvm.org/D69826
Files:
clang/lib/CodeGen/CGCall.cpp
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm -x hip %s -o - | FileCheck %s
+#include "Inputs/cuda.h"
+
+// Coerced struct from `struct S` without all generic pointers lowered into
+// global ones.
+// CHECK: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* }
+// CHECK: %struct.T.coerce = type { [2 x float addrspace(1)*] }
+
+// CHECK: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
+__global__ void kernel1(int *x) {
+ x[0]++;
+}
+
+// CHECK: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce)
+__global__ void kernel2(int &x) {
+ x++;
+}
+
+// CHECK: define amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
+__global__ void kernel3(__attribute__((address_space(2))) int *x,
+ __attribute__((address_space(1))) int *y) {
+ y[0] = x[0];
+}
+
+// CHECK: define void @_Z4funcPi(i32* %x)
+__device__ void func(int *x) {
+ x[0]++;
+}
+
+struct S {
+ int *x;
+ float *y;
+};
+// `by-val` struct will be coerced into a similar struct with all generic
+// pointers lowerd into global ones.
+// CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
+__global__ void kernel4(struct S s) {
+ s.x[0]++;
+ s.y[0] += 1.f;
+}
+
+// If a pointer to struct is passed, only the pointer itself is coerced into the global one.
+// CHECK: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)* %s.coerce)
+__global__ void kernel5(struct S *s) {
+ s->x[0]++;
+ s->y[0] += 1.f;
+}
+
+struct T {
+ float *x[2];
+};
+// `by-val` array is also coerced.
+// CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
+__global__ void kernel6(struct T t) {
+ t.x[0][0] += 1.f;
+ t.x[1][0] += 2.f;
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7685,6 +7685,42 @@
bool isHomogeneousAggregateSmallEnough(const Type *Base,
uint64_t Members) const override;
+ // Coerce HIP pointer arguments from generic pointers to global ones.
+ llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned DefaultAS,
+ unsigned GlobalAS) const {
+ // Structure types.
+ if (auto STy = dyn_cast<llvm::StructType>(Ty)) {
+ SmallVector<llvm::Type *, 8> EltTys;
+ bool Changed = false;
+ for (auto T : STy->elements()) {
+ auto NT = coerceKernelArgumentType(T, DefaultAS, GlobalAS);
+ EltTys.push_back(NT);
+ Changed |= (NT != T);
+ }
+ // Skip if there is no change in element types.
+ if (!Changed)
+ return STy;
+ if (STy->hasName())
+ return llvm::StructType::create(
+ EltTys, (STy->getName() + ".coerce").str(), STy->isPacked());
+ return llvm::StructType::get(getVMContext(), EltTys, STy->isPacked());
+ }
+ // Arrary types.
+ if (auto ATy = dyn_cast<llvm::ArrayType>(Ty)) {
+ auto T = ATy->getElementType();
+ auto NT = coerceKernelArgumentType(T, DefaultAS, GlobalAS);
+ // Skip if there is no change in that element type.
+ if (NT == T)
+ return ATy;
+ return llvm::ArrayType::get(NT, ATy->getNumElements());
+ }
+ // Single value types.
+ if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == DefaultAS)
+ return llvm::PointerType::get(
+ cast<llvm::PointerType>(Ty)->getElementType(), GlobalAS);
+ return Ty;
+ }
+
public:
explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
DefaultABIInfo(CGT) {}
@@ -7812,14 +7848,22 @@
// TODO: Can we omit empty structs?
- // Coerce single element structs to its element.
+ llvm::Type *LTy = nullptr;
if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
- return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+ LTy = CGT.ConvertType(QualType(SeltTy, 0));
+
+ if (getContext().getLangOpts().HIP) {
+ if (!LTy)
+ LTy = CGT.ConvertType(Ty);
+ LTy = coerceKernelArgumentType(
+ LTy, getContext().getTargetAddressSpace(LangAS::Default),
+ getContext().getTargetAddressSpace(LangAS::cuda_device));
+ }
// If we set CanBeFlattened to true, CodeGen will expand the struct to its
// individual elements, which confuses the Clover OpenCL backend; therefore we
// have to set it to false here. Other args of getDirect() are just defaults.
- return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
+ return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1305,6 +1305,15 @@
DstTy = Dst.getType()->getElementType();
}
+ llvm::PointerType *SrcPtrTy = llvm::dyn_cast<llvm::PointerType>(SrcTy);
+ llvm::PointerType *DstPtrTy = llvm::dyn_cast<llvm::PointerType>(DstTy);
+ if (SrcPtrTy && DstPtrTy &&
+ SrcPtrTy->getAddressSpace() != DstPtrTy->getAddressSpace()) {
+ Src = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DstTy);
+ CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
+ return;
+ }
+
// If the source and destination are integer or pointer types, just do an
// extension or truncation to the desired type.
if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits