https://github.com/AlexVlx updated 
https://github.com/llvm/llvm-project/pull/128166

>From 77db8a15d472e112be93cb7566cf56e26dc80501 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.vo...@amd.com>
Date: Fri, 21 Feb 2025 13:47:42 +0200
Subject: [PATCH 1/3] Handle HLL casts on sret args, remove ill advised cast
 strip and replace it with later AS cast.

---
 clang/lib/CodeGen/CGCall.cpp                  | 18 +++++++--
 clang/lib/CodeGen/CGExprAgg.cpp               | 11 +-----
 clang/lib/CodeGen/CGExprScalar.cpp            | 17 +++++++++
 .../sret_cast_with_nonzero_alloca_as.cpp      | 32 ++++++++++++++++
 clang/test/OpenMP/amdgcn_sret_ctor.cpp        | 38 +++++++++++++++++++
 5 files changed, 104 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp
 create mode 100644 clang/test/OpenMP/amdgcn_sret_ctor.cpp

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 47bfd470dbafb..b5a4435e75ea0 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5163,6 +5163,20 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
       }
     }
     if (IRFunctionArgs.hasSRetArg()) {
+      // A mismatch between the allocated return value's AS and the target's
+      // chosen IndirectAS can happen e.g. when passing the this pointer 
through
+      // a chain involving stores to / loads from the DefaultAS; we address 
this
+      // here, symmetrically with the handling we have for normal pointer args.
+      if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace())
+        SRetPtr = SRetPtr.withPointer(
+            getTargetHooks().performAddrSpaceCast(
+                *this, SRetPtr.getBasePointer(),
+                getLangASFromTargetAS(SRetPtr.getAddressSpace()),
+                getLangASFromTargetAS(RetAI.getIndirectAddrSpace()),
+                llvm::PointerType::get(getLLVMContext(),
+                                       RetAI.getIndirectAddrSpace()),
+                true),
+            SRetPtr.isKnownNonNull());
       IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
           getAsNaturalPointerTo(SRetPtr, RetTy);
     } else if (RetAI.isInAlloca()) {
@@ -5394,9 +5408,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
             V->getType()->isIntegerTy())
           V = Builder.CreateZExt(V, ArgInfo.getCoerceToType());
 
-        // The only plausible mismatch here would be for pointer address 
spaces,
-        // which can happen e.g. when passing a sret arg that is in the 
AllocaAS
-        // to a function that takes a pointer to and argument in the DefaultAS.
+        // The only plausible mismatch here would be for pointer address 
spaces.
         // We assume that the target has a reasonable mapping for the DefaultAS
         // (it can be casted to from incoming specific ASes), and insert an AS
         // cast to address the mismatch.
diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index 625ca363d9019..200fb7fd756fd 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -302,15 +302,8 @@ void AggExprEmitter::withReturnValueSlot(
   llvm::Value *LifetimeSizePtr = nullptr;
   llvm::IntrinsicInst *LifetimeStartInst = nullptr;
   if (!UseTemp) {
-    // It is possible for the existing slot we are using directly to have been
-    // allocated in the correct AS for an indirect return, and then cast to
-    // the default AS (this is the behaviour of CreateMemTemp), however we know
-    // that the return address is expected to point to the uncasted AS, hence 
we
-    // strip possible pointer casts here.
-    if (Dest.getAddress().isValid())
-      RetAddr = Dest.getAddress().withPointer(
-          Dest.getAddress().getBasePointer()->stripPointerCasts(),
-          Dest.getAddress().isKnownNonNull());
+    RetAddr = Dest.getAddress();
+    RawAddress RetAllocaAddr = RawAddress::invalid();
   } else {
     RetAddr = CGF.CreateMemTempWithoutCast(RetTy, "tmp");
     llvm::TypeSize Size =
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 5ee8a1bfa8175..7f8d438e36d6a 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -30,6 +30,7 @@
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/APFixedPoint.h"
+#include "llvm/IR/Argument.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -2352,6 +2353,22 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     Value *Src = Visit(const_cast<Expr*>(E));
     llvm::Type *SrcTy = Src->getType();
     llvm::Type *DstTy = ConvertType(DestTy);
+
+    // FIXME: this is a gross but seemingly necessary workaround for an issue
+    // manifesting when a target uses a non-default AS for indirect sret args,
+    // but the source HLL is generic, wherein a valid C-cast or 
reinterpret_cast
+    // on the address of a local struct that gets returned by value yields an
+    // invalid bitcast from the a pointer to the IndirectAS to a pointer to the
+    // DefaultAS. We can only do this subversive thing because sret args are
+    // manufactured and them residing in the IndirectAS is a target specific
+    // detail, and doing an AS cast here still retains the semantics the user
+    // expects. It is desirable to remove this iff a better solution is found.
+    if (SrcTy != DstTy && isa<llvm::Argument>(Src) &&
+        cast<llvm::Argument>(Src)->hasStructRetAttr())
+      return CGF.CGM.getTargetCodeGenInfo().performAddrSpaceCast(
+          CGF, Src, E->getType().getAddressSpace(), DestTy.getAddressSpace(),
+          DstTy);
+
     assert(
         (!SrcTy->isPtrOrPtrVectorTy() || !DstTy->isPtrOrPtrVectorTy() ||
          SrcTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace()) &&
diff --git a/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp 
b/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp
new file mode 100644
index 0000000000000..320c712b665de
--- /dev/null
+++ b/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp
@@ -0,0 +1,32 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm 
-o - %s | FileCheck %s
+
+struct X { int z[17]; };
+
+// CHECK-LABEL: define dso_local void @_Z3foocc(
+// CHECK-SAME: ptr addrspace(5) dead_on_unwind noalias writable 
sret([[STRUCT_X:%.*]]) align 4 [[AGG_RESULT:%.*]], i8 noundef signext 
[[X:%.*]], i8 noundef signext [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT:    [[Y_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-NEXT:    [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[X_ADDR]] to ptr
+// CHECK-NEXT:    [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[Y_ADDR]] to ptr
+// CHECK-NEXT:    store i8 [[X]], ptr [[X_ADDR_ASCAST]], align 1
+// CHECK-NEXT:    store i8 [[Y]], ptr [[Y_ADDR_ASCAST]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[X_ADDR_ASCAST]], align 1
+// CHECK-NEXT:    [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
+// CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr 
[[AGG_RESULT_ASCAST]], i64 1
+// CHECK-NEXT:    store i8 [[TMP0]], ptr [[ADD_PTR]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[Y_ADDR_ASCAST]], align 1
+// CHECK-NEXT:    [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
+// CHECK-NEXT:    [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr 
[[AGG_RESULT_ASCAST1]], i64 2
+// CHECK-NEXT:    store i8 [[TMP1]], ptr [[ADD_PTR2]], align 1
+// CHECK-NEXT:    ret void
+//
+X foo(char x, char y) {
+    X r;
+
+    *(((char*)&r) + 1) = x;
+    *(reinterpret_cast<char*>(&r) + 2) = y;
+
+    return r;
+}
diff --git a/clang/test/OpenMP/amdgcn_sret_ctor.cpp 
b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
new file mode 100644
index 0000000000000..0eb4748571d56
--- /dev/null
+++ b/clang/test/OpenMP/amdgcn_sret_ctor.cpp
@@ -0,0 +1,38 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --version 5
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa 
-fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -o - 
| FileCheck %s
+
+#pragma omp begin declare target
+struct S {
+  ~S();
+};
+S s();
+struct E {
+  S foo;
+  E() noexcept;
+};
+E::E() noexcept : foo(s()) {}
+#pragma omp end declare target
+// CHECK-LABEL: define hidden void @_ZN1EC2Ev(
+// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) 
unnamed_addr #[[ATTR0:[0-9]+]] align 2 {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[THIS_ADDR]] to ptr
+// CHECK-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[THIS1_ASCAST:%.*]] = addrspacecast ptr [[THIS1]] to ptr 
addrspace(5)
+// CHECK-NEXT:    call void @_Z1sv(ptr addrspace(5) dead_on_unwind writable 
sret([[STRUCT_S:%.*]]) align 1 [[THIS1_ASCAST]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    ret void
+//
+// CHECK-LABEL: declare void @_Z1sv(
+// CHECK-SAME: ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_S]]) 
align 1) #[[ATTR1:[0-9]+]]
+//
+// CHECK-LABEL: define hidden void @_ZN1EC1Ev(
+// CHECK-SAME: ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]]) 
unnamed_addr #[[ATTR0]] align 2 {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[THIS_ADDR]] to ptr
+// CHECK-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    call void @_ZN1EC2Ev(ptr noundef nonnull align 1 
dereferenceable(1) [[THIS1]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT:    ret void
+//

>From 5b186a3f00b83631476e5c131ab73ec6a4345350 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.vo...@amd.com>
Date: Fri, 21 Feb 2025 14:54:27 +0200
Subject: [PATCH 2/3] Fix test.

---
 clang/test/CodeGen/partial-reinitialization2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGen/partial-reinitialization2.c 
b/clang/test/CodeGen/partial-reinitialization2.c
index 7949a69555031..8d8e04f24541a 100644
--- a/clang/test/CodeGen/partial-reinitialization2.c
+++ b/clang/test/CodeGen/partial-reinitialization2.c
@@ -91,8 +91,8 @@ void test5(void)
 // CHECK-LABEL: test6
 void test6(void)
 {
-  // CHECK: [[VAR:%[a-z0-9]+]] = alloca
-  // CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[VAR]])
+  // CHECK: [[LP:%[a-z0-9]+]] = getelementptr{{.*}}%struct.LLP2P2, ptr{{.*}}, 
i32 0, i32 0
+  // CHECK: call {{.*}}get456789(ptr {{.*}}sret{{.*}} [[LP]])
 
   // CHECK: [[CALL:%[a-z0-9]+]] = call {{.*}}@get235()
   // CHECK: store{{.*}}[[CALL]], {{.*}}[[TMP0:%[a-z0-9.]+]]

>From 2dd8052e82ad721d059bddf7f356aba25f5fd408 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.vo...@amd.com>
Date: Fri, 21 Feb 2025 14:59:19 +0200
Subject: [PATCH 3/3] Unwrap/expand.

---
 clang/lib/CodeGen/CGCall.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index b5a4435e75ea0..88b1a8aebfa50 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5167,16 +5167,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
       // chosen IndirectAS can happen e.g. when passing the this pointer 
through
       // a chain involving stores to / loads from the DefaultAS; we address 
this
       // here, symmetrically with the handling we have for normal pointer args.
-      if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace())
+      if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) {
+        llvm::Value *V = SRetPtr.getBasePointer();
+        LangAS SAS = getLangASFromTargetAS(SRetPtr.getAddressSpace());
+        LangAS DAS = getLangASFromTargetAS(RetAI.getIndirectAddrSpace());
+        llvm::Type *Ty = llvm::PointerType::get(getLLVMContext(),
+                                                RetAI.getIndirectAddrSpace());
+
         SRetPtr = SRetPtr.withPointer(
-            getTargetHooks().performAddrSpaceCast(
-                *this, SRetPtr.getBasePointer(),
-                getLangASFromTargetAS(SRetPtr.getAddressSpace()),
-                getLangASFromTargetAS(RetAI.getIndirectAddrSpace()),
-                llvm::PointerType::get(getLLVMContext(),
-                                       RetAI.getIndirectAddrSpace()),
-                true),
+            getTargetHooks().performAddrSpaceCast(*this, V, SAS, DAS, Ty, 
true),
             SRetPtr.isKnownNonNull());
+      }
       IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
           getAsNaturalPointerTo(SRetPtr, RetTy);
     } else if (RetAI.isInAlloca()) {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to