https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/114062
>From d2d2d3d5db3f639aab178f9ca9a20db2842d2b65 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Tue, 29 Oct 2024 14:20:44 +0000 Subject: [PATCH 01/10] `sret` args should always point to the `alloca` AS, so we can use that. --- clang/lib/CodeGen/CGCall.cpp | 15 ++++++++------- clang/test/CodeGen/partial-reinitialization2.c | 4 ++-- clang/test/CodeGen/sret.c | 11 +++++++++++ 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 8f4f5d3ed81601..56acfae7ae9e51 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1672,8 +1672,7 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { // Add type for sret argument. if (IRFunctionArgs.hasSRetArg()) { - QualType Ret = FI.getReturnType(); - unsigned AddressSpace = CGM.getTypes().getTargetAddressSpace(Ret); + unsigned AddressSpace = CGM.getDataLayout().getAllocaAddrSpace(); ArgTypes[IRFunctionArgs.getSRetArgNo()] = llvm::PointerType::get(getLLVMContext(), AddressSpace); } @@ -5145,7 +5144,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If the call returns a temporary with struct return, create a temporary // alloca to hold the result, unless one is given to us. Address SRetPtr = Address::invalid(); - RawAddress SRetAlloca = RawAddress::invalid(); llvm::Value *UnusedReturnSizePtr = nullptr; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { // For virtual function pointer thunks and musttail calls, we must always @@ -5159,16 +5157,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } else if (!ReturnValue.isNull()) { SRetPtr = ReturnValue.getAddress(); } else { - SRetPtr = CreateMemTemp(RetTy, "tmp", &SRetAlloca); + SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp"); if (HaveInsertPoint() && ReturnValue.isUnused()) { llvm::TypeSize size = CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy)); - UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer()); + UnusedReturnSizePtr = EmitLifetimeStart(size, SRetPtr.getBasePointer()); } } if (IRFunctionArgs.hasSRetArg()) { + // If the caller allocated the return slot, it is possible that the + // alloca was AS casted to the default as, so we ensure the cast is + // stripped before binding to the sret arg, which is in the allocaAS. IRCallArgs[IRFunctionArgs.getSRetArgNo()] = - getAsNaturalPointerTo(SRetPtr, RetTy); + getAsNaturalPointerTo(SRetPtr, RetTy)->stripPointerCasts(); } else if (RetAI.isInAlloca()) { Address Addr = Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex()); @@ -5740,7 +5741,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // pop this cleanup later on. Being eager about this is OK, since this // temporary is 'invisible' outside of the callee. if (UnusedReturnSizePtr) - pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetAlloca, + pushFullExprCleanup<CallLifetimeEnd>(NormalEHLifetimeMarker, SRetPtr, UnusedReturnSizePtr); llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); diff --git a/clang/test/CodeGen/partial-reinitialization2.c b/clang/test/CodeGen/partial-reinitialization2.c index e709c1d4ad1ee1..7949a69555031e 100644 --- a/clang/test/CodeGen/partial-reinitialization2.c +++ b/clang/test/CodeGen/partial-reinitialization2.c @@ -91,8 +91,8 @@ void test5(void) // CHECK-LABEL: test6 void test6(void) { - // CHECK: [[LP:%[a-z0-9]+]] = getelementptr{{.*}}%struct.LLP2P2, ptr{{.*}}, i32 0, i32 0 - // CHECK: call {{.*}}get456789(ptr {{.*}}[[LP]]) + // CHECK: [[VAR:%[a-z0-9]+]] = alloca + // CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[VAR]]) // CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get235() // CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9.]+]] diff --git a/clang/test/CodeGen/sret.c b/clang/test/CodeGen/sret.c index 6d905e89b2c6fd..3b4914f29d2bfe 100644 --- a/clang/test/CodeGen/sret.c +++ b/clang/test/CodeGen/sret.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 %s -Wno-strict-prototypes -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -Wno-strict-prototypes -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefix=NONZEROALLOCAAS %s struct abc { long a; @@ -6,18 +7,28 @@ struct abc { long c; long d; long e; + long f; + long g; + long h; + long i; + long j; }; struct abc foo1(void); // CHECK-DAG: declare {{.*}} @foo1(ptr dead_on_unwind writable sret(%struct.abc) +// NONZEROALLOCAAS-DAG: declare {{.*}} @foo1(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc) struct abc foo2(); // CHECK-DAG: declare {{.*}} @foo2(ptr dead_on_unwind writable sret(%struct.abc) +// NONZEROALLOCAAS-DAG: declare {{.*}} @foo2(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc) struct abc foo3(void){} // CHECK-DAG: define {{.*}} @foo3(ptr dead_on_unwind noalias writable sret(%struct.abc) +// NONZEROALLOCAAS-DAG: define {{.*}} @foo3(ptr addrspace(5) dead_on_unwind noalias writable sret(%struct.abc) void bar(void) { struct abc dummy1 = foo1(); // CHECK-DAG: call {{.*}} @foo1(ptr dead_on_unwind writable sret(%struct.abc) + // NONZEROALLOCAAS-DAG: call {{.*}} @foo1(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc) struct abc dummy2 = foo2(); // CHECK-DAG: call {{.*}} @foo2(ptr dead_on_unwind writable sret(%struct.abc) + // NONZEROALLOCAAS-DAG: call {{.*}} @foo2(ptr addrspace(5) dead_on_unwind writable sret(%struct.abc) } >From b5a7df0a771cb70d60e58a8727a5d856219dacb3 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Tue, 29 Oct 2024 17:16:17 +0000 Subject: [PATCH 02/10] Fix broken tests. --- clang/test/CodeGenOpenCL/addr-space-struct-arg.cl | 4 ++-- clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index 57d056b0ff9d51..4a1db2c3564a57 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -250,7 +250,7 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { // AMDGCN-NEXT: ret void // // AMDGCN20-LABEL: define dso_local void @foo_large( -// AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN20-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN20-NEXT: [[ENTRY:.*:]] // AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) // AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr @@ -335,7 +335,7 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN20-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN20-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] // AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false) // AMDGCN20-NEXT: ret void // diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl index 084281a8cada46..c2b2e00d15e13f 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl @@ -91,7 +91,7 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { } // AMDGCN-LABEL: define dso_local void @foo_large( -// AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { +// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] { // AMDGCN-NEXT: [[ENTRY:.*:]] // AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5) // AMDGCN-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr @@ -120,7 +120,7 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) -// AMDGCN-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] +// AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] // AMDGCN-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false) // AMDGCN-NEXT: ret void // >From 2de33d4cfb210dc50a55b9ba87fa0d086d4b8d9f Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Wed, 30 Oct 2024 00:10:59 +0000 Subject: [PATCH 03/10] Handle passing an `alloca`ed `sret` arg directly to a callee that expects a pointer to the default AS. --- clang/lib/CodeGen/CGCall.cpp | 16 ++++++++++++---- clang/test/CodeGenCXX/no-elide-constructors.cpp | 4 ++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 56acfae7ae9e51..7171d85b0d0ab0 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5391,11 +5391,19 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, V->getType()->isIntegerTy()) V = Builder.CreateZExt(V, ArgInfo.getCoerceToType()); - // If the argument doesn't match, perform a bitcast to coerce it. This - // can happen due to trivial type mismatches. + // If the argument doesn't match, we are either trying to pass an + // alloca-ed sret argument directly, and the alloca AS does not match + // the default AS, case in which we AS cast it, or we have a trivial + // type mismatch, and thus perform a bitcast to coerce it. if (FirstIRArg < IRFuncTy->getNumParams() && - V->getType() != IRFuncTy->getParamType(FirstIRArg)) - V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg)); + V->getType() != IRFuncTy->getParamType(FirstIRArg)) { + auto IRTy = IRFuncTy->getParamType(FirstIRArg); + auto MaybeSRetArg = dyn_cast_or_null<llvm::Argument>(V); + if (MaybeSRetArg && MaybeSRetArg->hasStructRetAttr()) + V = Builder.CreateAddrSpaceCast(V, IRTy); + else + V = Builder.CreateBitCast(V, IRTy); + } if (ArgHasMaybeUndefAttr) V = Builder.CreateFreeze(V); diff --git a/clang/test/CodeGenCXX/no-elide-constructors.cpp b/clang/test/CodeGenCXX/no-elide-constructors.cpp index 750392a43e05cc..098163f957f759 100644 --- a/clang/test/CodeGenCXX/no-elide-constructors.cpp +++ b/clang/test/CodeGenCXX/no-elide-constructors.cpp @@ -1,7 +1,9 @@ // RUN: %clang_cc1 -std=c++98 -triple i386-unknown-unknown -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX98 // RUN: %clang_cc1 -std=c++11 -triple i386-unknown-unknown -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11 +// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa -fno-elide-constructors -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK --check-prefix=CHECK-CXX11-NONZEROALLOCAAS // RUN: %clang_cc1 -std=c++98 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX98-ELIDE // RUN: %clang_cc1 -std=c++11 -triple i386-unknown-unknown -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11-ELIDE +// RUN: %clang_cc1 -std=c++11 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CXX11-NONZEROALLOCAAS-ELIDE // Reduced from PR12208 class X { @@ -23,8 +25,10 @@ X Test() // sret argument. // CHECK-CXX98: call void @_ZN1XC1ERKS_( // CHECK-CXX11: call void @_ZN1XC1EOS_( + // CHECK-CXX11-NONZEROALLOCAAS: call void @_ZN1XC1EOS_( // CHECK-CXX98-ELIDE-NOT: call void @_ZN1XC1ERKS_( // CHECK-CXX11-ELIDE-NOT: call void @_ZN1XC1EOS_( + // CHECK-CXX11-NONZEROALLOCAAS-ELIDE-NOT: call void @_ZN1XC1EOS_( // Make sure that the destructor for X is called. // FIXME: This call is present even in the -ELIDE runs, but is guarded by a >From b209d6779cccaa9c2f272d839263cf7ca139b945 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Sat, 2 Nov 2024 00:57:17 +0000 Subject: [PATCH 04/10] Add query for a possible target specific indirect arg AS. --- clang/include/clang/Basic/TargetInfo.h | 8 ++++++++ clang/lib/CodeGen/CGCall.cpp | 6 ++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 25eda907d20a7b..fa5021baf667b5 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1780,6 +1780,14 @@ class TargetInfo : public TransferrableTargetInfo, return 0; } + /// \returns Target specific address space for indirect (e.g. sret) arguments. + /// If such an address space exists, it must be convertible to and from the + /// alloca address space. If it does not, std::nullopt is returned and the + /// alloca address space will be used. + virtual std::optional<unsigned> getIndirectArgAddressSpace() const { + return std::nullopt; + } + /// \returns If a target requires an address within a target specific address /// space \p AddressSpace to be converted in order to be used, then return the /// corresponding target specific DWARF address space. diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 7171d85b0d0ab0..87e70df795a986 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1672,9 +1672,11 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { // Add type for sret argument. if (IRFunctionArgs.hasSRetArg()) { - unsigned AddressSpace = CGM.getDataLayout().getAllocaAddrSpace(); + auto AddressSpace = CGM.getTarget().getIndirectArgAddressSpace(); + if (!AddressSpace) + AddressSpace = getDataLayout().getAllocaAddrSpace(); ArgTypes[IRFunctionArgs.getSRetArgNo()] = - llvm::PointerType::get(getLLVMContext(), AddressSpace); + llvm::PointerType::get(getLLVMContext(), *AddressSpace); } // Add type for inalloca argument. >From ac6367be734abec8f2c46f4fe8a13e950e13578f Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Sat, 2 Nov 2024 01:20:12 +0000 Subject: [PATCH 05/10] Add more context to test. --- clang/test/CodeGenCXX/no-elide-constructors.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/test/CodeGenCXX/no-elide-constructors.cpp b/clang/test/CodeGenCXX/no-elide-constructors.cpp index 098163f957f759..994282debb0d08 100644 --- a/clang/test/CodeGenCXX/no-elide-constructors.cpp +++ b/clang/test/CodeGenCXX/no-elide-constructors.cpp @@ -17,6 +17,7 @@ class X { }; // CHECK-LABEL: define{{.*}} void @_Z4Testv( +// CHECK-SAME: ptr {{.*}}dead_on_unwind noalias writable sret([[CLASS_X:%.*]]) align 1 [[AGG_RESULT:%.*]]) X Test() { X x; @@ -25,7 +26,8 @@ X Test() // sret argument. // CHECK-CXX98: call void @_ZN1XC1ERKS_( // CHECK-CXX11: call void @_ZN1XC1EOS_( - // CHECK-CXX11-NONZEROALLOCAAS: call void @_ZN1XC1EOS_( + // CHECK-CXX11-NONZEROALLOCAAS: [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[AGG_RESULT]] to ptr + // CHECK-CXX11-NONZEROALLOCAAS-NEXT: call void @_ZN1XC1EOS_(ptr noundef nonnull align 1 dereferenceable(1) [[TMP0]] // CHECK-CXX98-ELIDE-NOT: call void @_ZN1XC1ERKS_( // CHECK-CXX11-ELIDE-NOT: call void @_ZN1XC1EOS_( // CHECK-CXX11-NONZEROALLOCAAS-ELIDE-NOT: call void @_ZN1XC1EOS_( >From 9ff1d0dd16bbda206753348ab9671dcfe0b5eb7b Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Wed, 6 Nov 2024 13:16:03 +0200 Subject: [PATCH 06/10] Extend Indirect Args to carry an address space. --- clang/include/clang/CodeGen/CGFunctionInfo.h | 11 ++++++----- clang/lib/CodeGen/ABIInfo.cpp | 2 +- clang/lib/CodeGen/ItaniumCXXABI.cpp | 2 +- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 2 +- clang/lib/CodeGen/SwiftCallingConv.cpp | 4 ++-- clang/lib/CodeGen/Targets/AMDGPU.cpp | 5 +++++ clang/lib/CodeGen/Targets/ARC.cpp | 2 +- clang/lib/CodeGen/Targets/ARM.cpp | 4 ++-- clang/lib/CodeGen/Targets/Lanai.cpp | 2 +- clang/lib/CodeGen/Targets/PPC.cpp | 4 ++-- clang/lib/CodeGen/Targets/X86.cpp | 16 ++++++++-------- 11 files changed, 30 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h index 9d785d878b61dc..4ca5d2b6548124 100644 --- a/clang/include/clang/CodeGen/CGFunctionInfo.h +++ b/clang/include/clang/CodeGen/CGFunctionInfo.h @@ -206,8 +206,8 @@ class ABIArgInfo { static ABIArgInfo getIgnore() { return ABIArgInfo(Ignore); } - static ABIArgInfo getIndirect(CharUnits Alignment, bool ByVal = true, - bool Realign = false, + static ABIArgInfo getIndirect(CharUnits Alignment, unsigned AddrSpace = 0, + bool ByVal = true, bool Realign = false, llvm::Type *Padding = nullptr) { auto AI = ABIArgInfo(Indirect); AI.setIndirectAlign(Alignment); @@ -215,6 +215,7 @@ class ABIArgInfo { AI.setIndirectRealign(Realign); AI.setSRetAfterThis(false); AI.setPaddingType(Padding); + AI.setIndirectAddrSpace(AddrSpace); return AI; } @@ -232,7 +233,7 @@ class ABIArgInfo { static ABIArgInfo getIndirectInReg(CharUnits Alignment, bool ByVal = true, bool Realign = false) { - auto AI = getIndirect(Alignment, ByVal, Realign); + auto AI = getIndirect(Alignment, 0, ByVal, Realign); AI.setInReg(true); return AI; } @@ -422,12 +423,12 @@ class ABIArgInfo { } unsigned getIndirectAddrSpace() const { - assert(isIndirectAliased() && "Invalid kind!"); + assert((isIndirect() || isIndirectAliased()) && "Invalid kind!"); return IndirectAttr.AddrSpace; } void setIndirectAddrSpace(unsigned AddrSpace) { - assert(isIndirectAliased() && "Invalid kind!"); + assert((isIndirect() || isIndirectAliased()) && "Invalid kind!"); IndirectAttr.AddrSpace = AddrSpace; } diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp index edd7146dc1ac76..7ab9f0aeb60993 100644 --- a/clang/lib/CodeGen/ABIInfo.cpp +++ b/clang/lib/CodeGen/ABIInfo.cpp @@ -174,7 +174,7 @@ bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const { ABIArgInfo ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByVal, bool Realign, llvm::Type *Padding) const { - return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty), ByVal, + return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty), 0, ByVal, Realign, Padding); } diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 9b3c2f1b2af677..f5e2b096212f4d 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1351,7 +1351,7 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { // If C++ prohibits us from making a copy, return by address. if (!RD->canPassInRegisters()) { auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, 0, /*ByVal=*/false); return true; } return false; diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 3802dc8bcafc49..3b5b860a1b087f 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1171,7 +1171,7 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { if (isIndirectReturn) { CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); - FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, 0, /*ByVal=*/false); // MSVC always passes `this` before the `sret` parameter. FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod()); diff --git a/clang/lib/CodeGen/SwiftCallingConv.cpp b/clang/lib/CodeGen/SwiftCallingConv.cpp index ab2e2bd0b30646..e178c0fab5910d 100644 --- a/clang/lib/CodeGen/SwiftCallingConv.cpp +++ b/clang/lib/CodeGen/SwiftCallingConv.cpp @@ -801,7 +801,7 @@ static ABIArgInfo classifyExpandedType(SwiftAggLowering &lowering, if (lowering.empty()) { return ABIArgInfo::getIgnore(); } else if (lowering.shouldPassIndirectly(forReturn)) { - return ABIArgInfo::getIndirect(alignmentForIndirect, /*byval*/ false); + return ABIArgInfo::getIndirect(alignmentForIndirect, 0, /*byval*/ false); } else { auto types = lowering.getCoerceAndExpandTypes(); return ABIArgInfo::getCoerceAndExpand(types.first, types.second); @@ -815,7 +815,7 @@ static ABIArgInfo classifyType(CodeGenModule &CGM, CanQualType type, auto &layout = CGM.getContext().getASTRecordLayout(record); if (mustPassRecordIndirectly(CGM, record)) - return ABIArgInfo::getIndirect(layout.getAlignment(), /*byval*/ false); + return ABIArgInfo::getIndirect(layout.getAlignment(), 0, /*byval*/ false); SwiftAggLowering lowering(CGM); lowering.addTypedData(recordType->getDecl(), CharUnits::Zero(), layout); diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 56ad0503a11ab2..c45e7020de3f52 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -105,6 +105,11 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + // srets / indirect returns are unconditionally in the alloca AS. + if (FI.getReturnInfo().isIndirect()) + FI.getReturnInfo().setIndirectAddrSpace( + getDataLayout().getAllocaAddrSpace()); + unsigned ArgumentIndex = 0; const unsigned numFixedArguments = FI.getNumRequiredArgs(); diff --git a/clang/lib/CodeGen/Targets/ARC.cpp b/clang/lib/CodeGen/Targets/ARC.cpp index 1904e8fdb3888a..ee0db9778bdcb0 100644 --- a/clang/lib/CodeGen/Targets/ARC.cpp +++ b/clang/lib/CodeGen/Targets/ARC.cpp @@ -77,7 +77,7 @@ ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const { // Compute the byval alignment. const unsigned MinABIStackAlignInBytes = 4; unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), 0, /*ByVal=*/true, TypeAlign > MinABIStackAlignInBytes); } diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp index 2d858fa2f3c3a3..d89a0bdff56a35 100644 --- a/clang/lib/CodeGen/Targets/ARM.cpp +++ b/clang/lib/CodeGen/Targets/ARM.cpp @@ -397,7 +397,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, // bigger than 128-bits, they get placed in space allocated by the caller, // and a pointer is passed. return ABIArgInfo::getIndirect( - CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false); + CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), 0, false); } // Support byval for ARM. @@ -415,7 +415,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, } if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) { assert(getABIKind() != ARMABIKind::AAPCS16_VFP && "unexpected byval"); - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), 0, /*ByVal=*/true, /*Realign=*/TyAlign > ABIAlign); } diff --git a/clang/lib/CodeGen/Targets/Lanai.cpp b/clang/lib/CodeGen/Targets/Lanai.cpp index 2578fc0291e760..ffacb0ccbea53f 100644 --- a/clang/lib/CodeGen/Targets/Lanai.cpp +++ b/clang/lib/CodeGen/Targets/Lanai.cpp @@ -78,7 +78,7 @@ ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal, // Compute the byval alignment. const unsigned MinABIStackAlignInBytes = 4; unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true, + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), 0, /*ByVal=*/true, /*Realign=*/TypeAlign > MinABIStackAlignInBytes); } diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp index 989e46f4b66a7d..c8796036b214f5 100644 --- a/clang/lib/CodeGen/Targets/PPC.cpp +++ b/clang/lib/CodeGen/Targets/PPC.cpp @@ -213,7 +213,7 @@ ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const { CharUnits CCAlign = getParamTypeAlignment(Ty); CharUnits TyAlign = getContext().getTypeAlignInChars(Ty); - return ABIArgInfo::getIndirect(CCAlign, /*ByVal*/ true, + return ABIArgInfo::getIndirect(CCAlign, 0, /*ByVal*/ true, /*Realign*/ TyAlign > CCAlign); } @@ -887,7 +887,7 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const { } // All other aggregates are passed ByVal. - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), 0, /*ByVal=*/true, /*Realign=*/TyAlign > ABIAlign); } diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 7f73bf2a65266e..f097c27bd89478 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -606,12 +606,12 @@ ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); if (StackAlign == 0) - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true); + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), 0, /*ByVal=*/true); // If the stack alignment is less than the type alignment, realign the // argument. bool Realign = TypeAlign > StackAlign; - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign), + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign), 0, /*ByVal=*/true, Realign); } @@ -2247,7 +2247,7 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty, Size)); } - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align)); + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align), 0); } /// The ABI specifies that a value should be passed in a full vector XMM/YMM @@ -3304,7 +3304,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, return ABIArgInfo::getDirect(); return ABIArgInfo::getExpand(); } - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + return ABIArgInfo::getIndirect(Align, 0, /*ByVal=*/false); } else if (IsVectorCall) { if (FreeSSERegs >= NumElts && (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { @@ -3314,7 +3314,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, return ABIArgInfo::getExpand(); } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { // HVAs are delayed and reclassified in the 2nd step. - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + return ABIArgInfo::getIndirect(Align, 0, /*ByVal=*/false); } } } @@ -3350,7 +3350,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, if (IsMingw64) { const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat(); if (LDF == &llvm::APFloat::x87DoubleExtended()) - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + return ABIArgInfo::getIndirect(Align, 0, /*ByVal=*/false); } break; @@ -3360,7 +3360,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, // than 8 bytes are passed indirectly. GCC follows it. We follow it too, // even though it isn't particularly efficient. if (!IsReturnType) - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + return ABIArgInfo::getIndirect(Align, 0, /*ByVal=*/false); // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. // Clang matches them for compatibility. @@ -3380,7 +3380,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, // the power of 2. if (Width <= 64) return ABIArgInfo::getDirect(); - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + return ABIArgInfo::getIndirect(Align, 0, /*ByVal=*/false); } return ABIArgInfo::getDirect(); >From 1c3e67cdebf6025aacd1900c22f033504d8e7963 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Wed, 6 Nov 2024 13:21:51 +0200 Subject: [PATCH 07/10] Fix formatting. --- clang/lib/CodeGen/Targets/X86.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index f097c27bd89478..6e5b46d5f91c8a 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -606,7 +606,8 @@ ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal, unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); if (StackAlign == 0) - return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), 0, /*ByVal=*/true); + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), 0, + /*ByVal=*/true); // If the stack alignment is less than the type alignment, realign the // argument. >From c9288fc9d38c603ef120714343b2a57611fda424 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Fri, 8 Nov 2024 01:13:11 +0200 Subject: [PATCH 08/10] Drop vestigial target hook. --- clang/include/clang/Basic/TargetInfo.h | 8 -------- clang/lib/CodeGen/CGCall.cpp | 7 ++----- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index fa5021baf667b5..25eda907d20a7b 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1780,14 +1780,6 @@ class TargetInfo : public TransferrableTargetInfo, return 0; } - /// \returns Target specific address space for indirect (e.g. sret) arguments. - /// If such an address space exists, it must be convertible to and from the - /// alloca address space. If it does not, std::nullopt is returned and the - /// alloca address space will be used. - virtual std::optional<unsigned> getIndirectArgAddressSpace() const { - return std::nullopt; - } - /// \returns If a target requires an address within a target specific address /// space \p AddressSpace to be converted in order to be used, then return the /// corresponding target specific DWARF address space. diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 87e70df795a986..32200ada7cf7de 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1672,11 +1672,8 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { // Add type for sret argument. if (IRFunctionArgs.hasSRetArg()) { - auto AddressSpace = CGM.getTarget().getIndirectArgAddressSpace(); - if (!AddressSpace) - AddressSpace = getDataLayout().getAllocaAddrSpace(); - ArgTypes[IRFunctionArgs.getSRetArgNo()] = - llvm::PointerType::get(getLLVMContext(), *AddressSpace); + ArgTypes[IRFunctionArgs.getSRetArgNo()] = llvm::PointerType::get( + getLLVMContext(), FI.getReturnInfo().getIndirectAddrSpace()); } // Add type for inalloca argument. >From 013790c1ad46cbcc143fb30fccc1ead25947da24 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Fri, 15 Nov 2024 23:04:53 +0000 Subject: [PATCH 09/10] Tweak handling potential AS mismatches. --- clang/lib/CodeGen/CGCall.cpp | 24 ++++++++++--------- clang/lib/CodeGen/CGExprAgg.cpp | 19 ++++++++++----- .../CodeGenOpenCL/addr-space-struct-arg.cl | 10 ++++---- .../amdgpu-abi-struct-arg-byref.cl | 10 ++++---- 4 files changed, 34 insertions(+), 29 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 32200ada7cf7de..41105262b5f14d 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5168,7 +5168,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // alloca was AS casted to the default as, so we ensure the cast is // stripped before binding to the sret arg, which is in the allocaAS. IRCallArgs[IRFunctionArgs.getSRetArgNo()] = - getAsNaturalPointerTo(SRetPtr, RetTy)->stripPointerCasts(); + getAsNaturalPointerTo(SRetPtr, RetTy); } else if (RetAI.isInAlloca()) { Address Addr = Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex()); @@ -5390,18 +5390,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, V->getType()->isIntegerTy()) V = Builder.CreateZExt(V, ArgInfo.getCoerceToType()); - // If the argument doesn't match, we are either trying to pass an - // alloca-ed sret argument directly, and the alloca AS does not match - // the default AS, case in which we AS cast it, or we have a trivial - // type mismatch, and thus perform a bitcast to coerce it. + // The only plausible mismatch here would be for pointer address spaces, + // which can happen e.g. when passing a sret arg that is in the AllocaAS + // to a function that takes a pointer to and argument in the DefaultAS. + // We assume that the target has a reasonable mapping for the DefaultAS + // (it can be casted to from incoming specific ASes), and insert an AS + // cast to address the mismatch. if (FirstIRArg < IRFuncTy->getNumParams() && V->getType() != IRFuncTy->getParamType(FirstIRArg)) { - auto IRTy = IRFuncTy->getParamType(FirstIRArg); - auto MaybeSRetArg = dyn_cast_or_null<llvm::Argument>(V); - if (MaybeSRetArg && MaybeSRetArg->hasStructRetAttr()) - V = Builder.CreateAddrSpaceCast(V, IRTy); - else - V = Builder.CreateBitCast(V, IRTy); + assert(V->getType()->isPointerTy() && "Only pointers can mismatch!"); + auto FormalAS = + CallInfo.arguments()[ArgNo].type.getQualifiers().getAddressSpace(); + auto ActualAS = I->Ty.getAddressSpace(); + V = getTargetHooks().performAddrSpaceCast( + *this, V, ActualAS, FormalAS, IRFuncTy->getParamType(FirstIRArg)); } if (ArgHasMaybeUndefAttr) diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 2ad6587089f101..f9c9c5df801631 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -296,18 +296,25 @@ void AggExprEmitter::withReturnValueSlot( (RequiresDestruction && Dest.isIgnored()); Address RetAddr = Address::invalid(); - RawAddress RetAllocaAddr = RawAddress::invalid(); EHScopeStack::stable_iterator LifetimeEndBlock; llvm::Value *LifetimeSizePtr = nullptr; llvm::IntrinsicInst *LifetimeStartInst = nullptr; if (!UseTemp) { - RetAddr = Dest.getAddress(); + // It is possible for the existing slot we are using directly to have been + // allocated in the correct AS for an indirect return, and then cast to + // the default AS (this is the behaviour of CreateMemTemp), however we know + // that the return address is expected to point to the uncasted AS, hence we + // strip possible pointer casts here. + if (Dest.getAddress().isValid()) + RetAddr = Dest.getAddress().withPointer( + Dest.getAddress().getBasePointer()->stripPointerCasts(), + Dest.getAddress().isKnownNonNull()); } else { - RetAddr = CGF.CreateMemTemp(RetTy, "tmp", &RetAllocaAddr); + RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp"); llvm::TypeSize Size = CGF.CGM.getDataLayout().getTypeAllocSize(CGF.ConvertTypeForMem(RetTy)); - LifetimeSizePtr = CGF.EmitLifetimeStart(Size, RetAllocaAddr.getPointer()); + LifetimeSizePtr = CGF.EmitLifetimeStart(Size, RetAddr.getBasePointer()); if (LifetimeSizePtr) { LifetimeStartInst = cast<llvm::IntrinsicInst>(std::prev(Builder.GetInsertPoint())); @@ -316,7 +323,7 @@ void AggExprEmitter::withReturnValueSlot( "Last insertion wasn't a lifetime.start?"); CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>( - NormalEHLifetimeMarker, RetAllocaAddr, LifetimeSizePtr); + NormalEHLifetimeMarker, RetAddr, LifetimeSizePtr); LifetimeEndBlock = CGF.EHStack.stable_begin(); } } @@ -337,7 +344,7 @@ void AggExprEmitter::withReturnValueSlot( // Since we're not guaranteed to be in an ExprWithCleanups, clean up // eagerly. CGF.DeactivateCleanupBlock(LifetimeEndBlock, LifetimeStartInst); - CGF.EmitLifetimeEnd(LifetimeSizePtr, RetAllocaAddr.getPointer()); + CGF.EmitLifetimeEnd(LifetimeSizePtr, RetAddr.getBasePointer()); } } diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl index 4a1db2c3564a57..effdeb9546800d 100644 --- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl +++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl @@ -154,7 +154,6 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { // AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) // AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr // AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr // AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 // AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 @@ -164,10 +163,10 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { // AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 // AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 // AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0 +// AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 // AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4 -// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false) +// AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_kernel void @ker( @@ -327,7 +326,6 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) // AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr // AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr // AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 // AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 @@ -336,7 +334,7 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) // AMDGCN20-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false) +// AMDGCN20-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) // AMDGCN20-NEXT: ret void // // SPIR-LABEL: define dso_local spir_kernel void @ker_large( diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl index c2b2e00d15e13f..2f8ba99a3e4160 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl @@ -70,7 +70,6 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr // AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr // AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 @@ -80,10 +79,10 @@ Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4 // AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]] -// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0 +// AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0 // AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0 -// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4 -// AMDGCN-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false) +// AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4 +// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false) // AMDGCN-NEXT: ret void // kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { @@ -112,7 +111,6 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5) // AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr // AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr -// AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr // AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 @@ -121,7 +119,7 @@ Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) { // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false) // AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]] -// AMDGCN-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false) +// AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false) // AMDGCN-NEXT: ret void // kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) { >From c4bdeabaddd3b8fd6fbd269644ce6dfb8bd49739 Mon Sep 17 00:00:00 2001 From: Alex Voicu <alexandru.vo...@amd.com> Date: Fri, 15 Nov 2024 23:12:40 +0000 Subject: [PATCH 10/10] Fix formatting. --- clang/lib/CodeGen/CGCall.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 41105262b5f14d..40c41e0895d66f 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5399,8 +5399,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, if (FirstIRArg < IRFuncTy->getNumParams() && V->getType() != IRFuncTy->getParamType(FirstIRArg)) { assert(V->getType()->isPointerTy() && "Only pointers can mismatch!"); - auto FormalAS = - CallInfo.arguments()[ArgNo].type.getQualifiers().getAddressSpace(); + auto FormalAS = CallInfo.arguments()[ArgNo] + .type.getQualifiers() + .getAddressSpace(); auto ActualAS = I->Ty.getAddressSpace(); V = getTargetHooks().performAddrSpaceCast( *this, V, ActualAS, FormalAS, IRFuncTy->getParamType(FirstIRArg)); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits