https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/128166
>From 77db8a15d472e112be93cb7566cf56e26dc80501 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Fri, 21 Feb 2025 13:47:42 +0200 Subject: [PATCH 1/5] Handle HLL casts on sret args, remove ill advised cast strip and replace it with later AS cast. --- clang/lib/CodeGen/CGCall.cpp | 18 +++++++-- clang/lib/CodeGen/CGExprAgg.cpp | 11 +----- clang/lib/CodeGen/CGExprScalar.cpp | 17 +++++++++ .../sret_cast_with_nonzero_alloca_as.cpp | 32 ++++++++++++++++ clang/test/OpenMP/amdgcn_sret_ctor.cpp | 38 +++++++++++++++++++ 5 files changed, 104 insertions(+), 12 deletions(-) create mode 100644 clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp create mode 100644 clang/test/OpenMP/amdgcn_sret_ctor.cpp diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 47bfd470dbafb..b5a4435e75ea0 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5163,6 +5163,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } } if (IRFunctionArgs.hasSRetArg()) { + // A mismatch between the allocated return value's AS and the target's + // chosen IndirectAS can happen e.g. when passing the this pointer through + // a chain involving stores to / loads from the DefaultAS; we address this + // here, symmetrically with the handling we have for normal pointer args. + if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) + SRetPtr = SRetPtr.withPointer( + getTargetHooks().performAddrSpaceCast( + *this, SRetPtr.getBasePointer(), + getLangASFromTargetAS(SRetPtr.getAddressSpace()), + getLangASFromTargetAS(RetAI.getIndirectAddrSpace()), + llvm::PointerType::get(getLLVMContext(), + RetAI.getIndirectAddrSpace()), + true), + SRetPtr.isKnownNonNull()); IRCallArgs[IRFunctionArgs.getSRetArgNo()] = getAsNaturalPointerTo(SRetPtr, RetTy); } else if (RetAI.isInAlloca()) { @@ -5394,9 +5408,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, V->getType()->isIntegerTy()) V = Builder.CreateZExt(V, ArgInfo.getCoerceToType()); - // The only plausible mismatch here would be for pointer address spaces, - // which can happen e.g. when passing a sret arg that is in the AllocaAS - // to a function that takes a pointer to and argument in the DefaultAS. + // The only plausible mismatch here would be for pointer address spaces. // We assume that the target has a reasonable mapping for the DefaultAS // (it can be casted to from incoming specific ASes), and insert an AS // cast to address the mismatch. diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 625ca363d9019..200fb7fd756fd 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -302,15 +302,8 @@ void AggExprEmitter::withReturnValueSlot( llvm::Value *LifetimeSizePtr = nullptr; llvm::IntrinsicInst *LifetimeStartInst = nullptr; if (!UseTemp) { - // It is possible for the existing slot we are using directly to have been - // allocated in the correct AS for an indirect return, and then cast to - // the default AS (this is the behaviour of CreateMemTemp), however we know - // that the return address is expected to point to the uncasted AS, hence we - // strip possible pointer casts here. - if (Dest.getAddress().isValid()) - RetAddr = Dest.getAddress().withPointer( - Dest.getAddress().getBasePointer()->stripPointerCasts(), - Dest.getAddress().isKnownNonNull()); + RetAddr = Dest.getAddress(); + RawAddress RetAllocaAddr = RawAddress::invalid(); } else { RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp"); llvm::TypeSize Size = diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 5ee8a1bfa8175..7f8d438e36d6a 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -30,6 +30,7 @@ #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/APFixedPoint.h" +#include "llvm/IR/Argument.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -2352,6 +2353,22 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { Value *Src = Visit(const_cast<Expr*>(E)); llvm::Type *SrcTy = Src->getType(); llvm::Type *DstTy = ConvertType(DestTy); + + // FIXME: this is a gross but seemingly necessary workaround for an issue + // manifesting when a target uses a non-default AS for indirect sret args, + // but the source HLL is generic, wherein a valid C-cast or reinterpret_cast + // on the address of a local struct that gets returned by value yields an + // invalid bitcast from the a pointer to the IndirectAS to a pointer to the + // DefaultAS. We can only do this subversive thing because sret args are + // manufactured and them residing in the IndirectAS is a target specific + // detail, and doing an AS cast here still retains the semantics the user + // expects. It is desirable to remove this iff a better solution is found. + if (SrcTy != DstTy && isa<llvm::Argument>(Src) && + cast<llvm::Argument>(Src)->hasStructRetAttr()) + return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast( + CGF, Src, E->getType().getAddressSpace(), DestTy.getAddressSpace(), + DstTy); + assert( (!SrcTy->isPtrOrPtrVectorTy() || !DstTy->isPtrOrPtrVectorTy() || SrcTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace()) && diff --git a/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp b/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp new file mode 100644 index 0000000000000..320c712b665de --- /dev/null +++ b/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp @@ -0,0 +1,32 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s + +struct X { int z[17]; }; + +// CHECK-LABEL: define dso_local void @_Z3foocc( +// CHECK-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_X:%.*]]) align 4 [[AGG_RESULT:%.*]], i8 noundef signext [[X:%.*]], i8 noundef signext [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i8, align 1, addrspace(5) +// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i8, align 1, addrspace(5) +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y_ADDR]] to ptr +// CHECK-NEXT: store i8 [[X]], ptr [[X_ADDR_ASCAST]], align 1 +// CHECK-NEXT: store i8 [[Y]], ptr [[Y_ADDR_ASCAST]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[X_ADDR_ASCAST]], align 1 +// CHECK-NEXT: [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr +// CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST]], i64 1 +// CHECK-NEXT: store i8 [[TMP0]], ptr [[ADD_PTR]], align 1 +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[Y_ADDR_ASCAST]], align 1 +// CHECK-NEXT: [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr +// CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[AGG_RESULT_ASCAST1]], i64 2 +// CHECK-NEXT: store i8 [[TMP1]], ptr [[ADD_PTR2]], align 1 +// CHECK-NEXT: ret void +// +X foo(char x, char y) { + X r; + + *(((char*)&r) + 1) = x; + *(reinterpret_cast<char*>(&r) + 2) = y; + + return r; +} diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp new file mode 100644 index 0000000000000..0eb4748571d56 --- /dev/null +++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp @@ -0,0 +1,38 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s + +#pragma omp begin declare target +struct S { + ~S(); +}; +S s(); +struct E { + S foo; + E() noexcept; +}; +E::E() noexcept : foo(s()) {} +#pragma omp end declare target +// CHECK-LABEL: define hidden void @_ZN1EC2Ev( +// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr addrspace(5) +// CHECK-NEXT: call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret void +// +// CHECK-LABEL: declare void @_Z1sv( +// CHECK-SAME: ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) align 1) #[[ATTR1:[0-9]+]] +// +// CHECK-LABEL: define hidden void @_ZN1EC1Ev( +// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] align 2 { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr +// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_ZN1EC2Ev(ptr noundef nonnull align 1 dereferenceable(1) [[THIS1]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: ret void +// >From 5b186a3f00b83631476e5c131ab73ec6a4345350 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Fri, 21 Feb 2025 14:54:27 +0200 Subject: [PATCH 2/5] Fix test. --- clang/test/CodeGen/partial-reinitialization2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/partial-reinitialization2.c b/clang/test/CodeGen/partial-reinitialization2.c index 7949a69555031..8d8e04f24541a 100644 --- a/clang/test/CodeGen/partial-reinitialization2.c +++ b/clang/test/CodeGen/partial-reinitialization2.c @@ -91,8 +91,8 @@ void test5(void) // CHECK-LABEL: test6 void test6(void) { - // CHECK: [[VAR:%[a-z0-9]+]] = alloca - // CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[VAR]]) + // CHECK: [[LP:%[a-z0-9]+]] = getelementptr{{.*}}%struct.LLP2P2, ptr{{.*}}, i32 0, i32 0 + // CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[LP]]) // CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get235() // CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9.]+]] >From 2dd8052e82ad721d059bddf7f356aba25f5fd408 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Fri, 21 Feb 2025 14:59:19 +0200 Subject: [PATCH 3/5] Unwrap/expand. --- clang/lib/CodeGen/CGCall.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index b5a4435e75ea0..88b1a8aebfa50 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5167,16 +5167,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // chosen IndirectAS can happen e.g. when passing the this pointer through // a chain involving stores to / loads from the DefaultAS; we address this // here, symmetrically with the handling we have for normal pointer args. - if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) + if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) { + llvm::Value *V = SRetPtr.getBasePointer(); + LangAS SAS = getLangASFromTargetAS(SRetPtr.getAddressSpace()); + LangAS DAS = getLangASFromTargetAS(RetAI.getIndirectAddrSpace()); + llvm::Type *Ty = llvm::PointerType::get(getLLVMContext(), + RetAI.getIndirectAddrSpace()); + SRetPtr = SRetPtr.withPointer( - getTargetHooks().performAddrSpaceCast( - *this, SRetPtr.getBasePointer(), - getLangASFromTargetAS(SRetPtr.getAddressSpace()), - getLangASFromTargetAS(RetAI.getIndirectAddrSpace()), - llvm::PointerType::get(getLLVMContext(), - RetAI.getIndirectAddrSpace()), - true), + getTargetHooks().performAddrSpaceCast(*this, V, SAS, DAS, Ty, true), SRetPtr.isKnownNonNull()); + } IRCallArgs[IRFunctionArgs.getSRetArgNo()] = getAsNaturalPointerTo(SRetPtr, RetTy); } else if (RetAI.isInAlloca()) { >From b845baaa2cc922f96b315acd35725fdc58c3ba97 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Fri, 21 Feb 2025 15:13:18 +0200 Subject: [PATCH 4/5] Clean up `-cc1` invocation in OMP test. --- clang/test/OpenMP/amdgcn_sret_ctor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp b/clang/test/OpenMP/amdgcn_sret_ctor.cpp index 0eb4748571d56..99ca31b78e1fc 100644 --- a/clang/test/OpenMP/amdgcn_sret_ctor.cpp +++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 -// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - | FileCheck %s #pragma omp begin declare target struct S { >From 10e75d6255a0f139e3c5c1199073b4954a804ce1 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Wed, 26 Feb 2025 16:50:22 +0000 Subject: [PATCH 5/5] Use exciting and modern C++17. --- clang/lib/CodeGen/CGExprScalar.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 7f8d438e36d6a..6646057b9d772 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2363,8 +2363,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // manufactured and them residing in the IndirectAS is a target specific // detail, and doing an AS cast here still retains the semantics the user // expects. It is desirable to remove this iff a better solution is found. - if (SrcTy != DstTy && isa<llvm::Argument>(Src) && - cast<llvm::Argument>(Src)->hasStructRetAttr()) + if (auto A = dyn_cast<llvm::Argument>(Src); A && A->hasStructRetAttr()) return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast( CGF, Src, E->getType().getAddressSpace(), DestTy.getAddressSpace(), DstTy); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits