https://github.com/macurtis-amd updated 
https://github.com/llvm/llvm-project/pull/154380

>From d82e5d4fb72083f0f288a7be1fc371483b3b2e18 Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macur...@amd.com>
Date: Mon, 18 Aug 2025 06:26:49 -0500
Subject: [PATCH 1/3] [clang][CodeGen] add addr space cast if needed when
 storing ptrs

---
 clang/lib/CodeGen/CGExpr.cpp               | 12 ++++++
 clang/test/CodeGenCXX/amdgcn-func-arg.cpp  |  7 ++--
 clang/test/CodeGenHIP/store-addr-space.hip | 47 ++++++++++++++++++++++
 3 files changed, 63 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CodeGenHIP/store-addr-space.hip

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index d229d81d6b934..c02e84eb753e9 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2209,6 +2209,18 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value 
*Value, Address Addr,
     }
   }
 
+  // When storing a pointer, perform address space cast if needed.
+  if (auto *ValueTy = dyn_cast<llvm::PointerType>(Value->getType())) {
+    if (auto *MemTy = dyn_cast<llvm::PointerType>(Addr.getElementType())) {
+      LangAS ValueAS = getLangASFromTargetAS(ValueTy->getAddressSpace());
+      LangAS MemAS = getLangASFromTargetAS(MemTy->getAddressSpace());
+      if (ValueAS != MemAS) {
+        Value =
+            getTargetHooks().performAddrSpaceCast(*this, Value, ValueAS, 
MemTy);
+      }
+    }
+  }
+
   Value = EmitToMemory(Value, Ty);
 
   LValue AtomicLValue =
diff --git a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp 
b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
index a5f83dc91b038..21945bfc36677 100644
--- a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
+++ b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
@@ -24,9 +24,10 @@ void func_with_ref_arg(B &b);
 // CHECK-NEXT:    [[P:%.*]] = alloca ptr, align 8, addrspace(5)
 // CHECK-NEXT:    [[A_INDIRECT_ADDR_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[A_INDIRECT_ADDR]] to ptr
 // CHECK-NEXT:    [[P_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P]] to 
ptr
-// CHECK-NEXT:    store ptr addrspace(5) [[A:%.*]], ptr 
[[A_INDIRECT_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to 
ptr
-// CHECK-NEXT:    store ptr [[A_ASCAST]], ptr [[P_ASCAST]], align 8
+// CHECK-NEXT:    [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A:%.*]] 
to ptr
+// CHECK-NEXT:    store ptr [[A_ASCAST]], ptr [[A_INDIRECT_ADDR_ASCAST]], 
align 8
+// CHECK-NEXT:    [[A_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[A]] to 
ptr
+// CHECK-NEXT:    store ptr [[A_ASCAST1]], ptr [[P_ASCAST]], align 8
 // CHECK-NEXT:    ret void
 //
 void func_with_indirect_arg(A a) {
diff --git a/clang/test/CodeGenHIP/store-addr-space.hip 
b/clang/test/CodeGenHIP/store-addr-space.hip
new file mode 100644
index 0000000000000..46ab1157d0704
--- /dev/null
+++ b/clang/test/CodeGenHIP/store-addr-space.hip
@@ -0,0 +1,47 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --functions "bar" --version 5
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -emit-llvm 
-fcuda-is-device \
+// RUN:   -o - %s | FileCheck --check-prefix=AMDGCN --enable-var-scope %s
+
+struct Foo {
+  unsigned long long val;
+//
+  __attribute__((device)) inline Foo() { val = 0; }
+  __attribute__((device)) inline Foo(const Foo &src) { val = src.val; }
+  __attribute__((device)) inline Foo(const volatile Foo &src) { val = src.val; 
}
+};
+
+// AMDGCN-LABEL: define dso_local void @_Z3barPK3Foo(
+// AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable 
sret([[STRUCT_FOO:%.*]]) align 8 [[AGG_RESULT:%.*]], ptr noundef 
[[SRC_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN-NEXT:  [[ENTRY:.*:]]
+// AMDGCN-NEXT:    [[RESULT_PTR:%.*]] = alloca ptr addrspace(5), align 4, 
addrspace(5)
+// AMDGCN-NEXT:    [[SRC_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN-NEXT:    [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, 
addrspace(5)
+// AMDGCN-NEXT:    [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RESULT_PTR]] to ptr
+// AMDGCN-NEXT:    [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[SRC_PTR_ADDR]] to ptr
+// AMDGCN-NEXT:    [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] 
to ptr
+// AMDGCN-NEXT:    store ptr addrspace(5) [[AGG_RESULT]], ptr 
[[RESULT_PTR_ASCAST]], align 4
+// AMDGCN-NEXT:    store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8
+// AMDGCN-NEXT:    [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
+// AMDGCN-NEXT:    call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 
dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]]
+// AMDGCN-NEXT:    [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
+// AMDGCN-NEXT:    store ptr [[AGG_RESULT_ASCAST1]], ptr [[DST_ASCAST]], align 
8
+// AMDGCN-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 
8
+// AMDGCN-NEXT:    [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], 
ptr [[TMP0]], i32 0, i32 0
+// AMDGCN-NEXT:    [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8
+// AMDGCN-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[DST_ASCAST]], align 8
+// AMDGCN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr 
[[TMP2]], i64 0
+// AMDGCN-NEXT:    store i64 [[TMP1]], ptr [[ARRAYIDX]], align 8
+// AMDGCN-NEXT:    ret void
+//
+__attribute__((device)) Foo bar(const Foo *const src_ptr) {
+  Foo result;
+
+  union {
+    Foo* const ptr;
+    unsigned long long * const ptr64;
+  } dst = {&result};
+
+  dst.ptr64[0] = src_ptr->val;
+  return result;
+}

>From e05801316e555d401e9f86c3fa78cdd531f6354d Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macur...@amd.com>
Date: Wed, 20 Aug 2025 13:49:51 -0500
Subject: [PATCH 2/3] fixup! [clang][CodeGen] add addr space cast if needed
 when storing ptrs

---
 clang/lib/CodeGen/CGDecl.cpp                  |  3 +-
 clang/lib/CodeGen/CGExpr.cpp                  | 35 ++++++++-----------
 clang/lib/CodeGen/CodeGenFunction.h           |  7 ++++
 clang/test/CodeGenCXX/amdgcn-func-arg.cpp     |  7 ++--
 .../sret_cast_with_nonzero_alloca_as.cpp      |  7 ++--
 clang/test/CodeGenHIP/store-addr-space.hip    |  5 ++-
 .../CodeGenOpenCL/addr-space-struct-arg.cl    |  1 +
 .../amdgpu-abi-struct-arg-byref.cl            |  1 +
 8 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 9df1220c78623..8e8fa416ecbba 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1563,11 +1563,10 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
       // The named return value optimization: allocate this variable in the
       // return slot, so that we can elide the copy when returning this
       // variable (C++0x [class.copy]p34).
-      address = ReturnValue;
       AllocaAddr =
           RawAddress(ReturnValue.emitRawPointer(*this),
                      ReturnValue.getElementType(), ReturnValue.getAlignment());
-      ;
+      address = MaybeCastAllocaAddressSpace(AllocaAddr, Ty.getAddressSpace());
 
       if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
         const auto *RD = RecordTy->getOriginalDecl()->getDefinitionOrSelf();
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index c02e84eb753e9..079129dfe495f 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -108,13 +108,9 @@ CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type 
*Ty, CharUnits Align,
   return RawAddress(Alloca, Ty, Align, KnownNonNull);
 }
 
-RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS,
-                                             CharUnits Align, const Twine 
&Name,
-                                             llvm::Value *ArraySize,
-                                             RawAddress *AllocaAddr) {
-  RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
-  if (AllocaAddr)
-    *AllocaAddr = Alloca;
+RawAddress CodeGenFunction::MaybeCastAllocaAddressSpace(
+    RawAddress Alloca, LangAS DestLangAS, llvm::Value *ArraySize) {
+
   llvm::Value *V = Alloca.getPointer();
   // Alloca always returns a pointer in alloca address space, which may
   // be different from the type defined by the language. For example,
@@ -134,7 +130,18 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type 
*Ty, LangAS DestLangAS,
         /*IsNonNull=*/true);
   }
 
-  return RawAddress(V, Ty, Align, KnownNonNull);
+  return RawAddress(V, Alloca.getElementType(), Alloca.getAlignment(),
+                    KnownNonNull);
+}
+
+RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, LangAS DestLangAS,
+                                             CharUnits Align, const Twine 
&Name,
+                                             llvm::Value *ArraySize,
+                                             RawAddress *AllocaAddr) {
+  RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
+  if (AllocaAddr)
+    *AllocaAddr = Alloca;
+  return MaybeCastAllocaAddressSpace(Alloca, DestLangAS, ArraySize);
 }
 
 /// CreateTempAlloca - This creates an alloca and inserts it into the entry
@@ -2209,18 +2216,6 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value 
*Value, Address Addr,
     }
   }
 
-  // When storing a pointer, perform address space cast if needed.
-  if (auto *ValueTy = dyn_cast<llvm::PointerType>(Value->getType())) {
-    if (auto *MemTy = dyn_cast<llvm::PointerType>(Addr.getElementType())) {
-      LangAS ValueAS = getLangASFromTargetAS(ValueTy->getAddressSpace());
-      LangAS MemAS = getLangASFromTargetAS(MemTy->getAddressSpace());
-      if (ValueAS != MemAS) {
-        Value =
-            getTargetHooks().performAddrSpaceCast(*this, Value, ValueAS, 
MemTy);
-      }
-    }
-  }
-
   Value = EmitToMemory(Value, Ty);
 
   LValue AtomicLValue =
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index ad318f289ee83..7ac21025917ea 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2804,6 +2804,13 @@ class CodeGenFunction : public CodeGenTypeCache {
     AllocaTracker Tracker;
   };
 
+private:
+  /// If \p Alloca is not in the same address space as \p DestLangAS, insert an
+  /// address space cast and return a new RawAddress based on this value.
+  RawAddress MaybeCastAllocaAddressSpace(RawAddress Alloca, LangAS DestLangAS,
+                                         llvm::Value *ArraySize = nullptr);
+
+public:
   /// CreateTempAlloca - This creates an alloca and inserts it into the entry
   /// block if \p ArraySize is nullptr, otherwise inserts it at the current
   /// insertion point of the builder. The caller is responsible for setting an
diff --git a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp 
b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
index 21945bfc36677..a5f83dc91b038 100644
--- a/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
+++ b/clang/test/CodeGenCXX/amdgcn-func-arg.cpp
@@ -24,10 +24,9 @@ void func_with_ref_arg(B &b);
 // CHECK-NEXT:    [[P:%.*]] = alloca ptr, align 8, addrspace(5)
 // CHECK-NEXT:    [[A_INDIRECT_ADDR_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[A_INDIRECT_ADDR]] to ptr
 // CHECK-NEXT:    [[P_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P]] to 
ptr
-// CHECK-NEXT:    [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A:%.*]] 
to ptr
-// CHECK-NEXT:    store ptr [[A_ASCAST]], ptr [[A_INDIRECT_ADDR_ASCAST]], 
align 8
-// CHECK-NEXT:    [[A_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[A]] to 
ptr
-// CHECK-NEXT:    store ptr [[A_ASCAST1]], ptr [[P_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(5) [[A:%.*]], ptr 
[[A_INDIRECT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to 
ptr
+// CHECK-NEXT:    store ptr [[A_ASCAST]], ptr [[P_ASCAST]], align 8
 // CHECK-NEXT:    ret void
 //
 void func_with_indirect_arg(A a) {
diff --git a/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp 
b/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp
index 320c712b665de..a0ee54dc16ba3 100644
--- a/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp
+++ b/clang/test/CodeGenCXX/sret_cast_with_nonzero_alloca_as.cpp
@@ -10,16 +10,15 @@ struct X { int z[17]; };
 // CHECK-NEXT:    [[Y_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
 // CHECK-NEXT:    [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[X_ADDR]] to ptr
 // CHECK-NEXT:    [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[Y_ADDR]] to ptr
+// CHECK-NEXT:    [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
 // CHECK-NEXT:    store i8 [[X]], ptr [[X_ADDR_ASCAST]], align 1
 // CHECK-NEXT:    store i8 [[Y]], ptr [[Y_ADDR_ASCAST]], align 1
 // CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[X_ADDR_ASCAST]], align 1
-// CHECK-NEXT:    [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
 // CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr 
[[AGG_RESULT_ASCAST]], i64 1
 // CHECK-NEXT:    store i8 [[TMP0]], ptr [[ADD_PTR]], align 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[Y_ADDR_ASCAST]], align 1
-// CHECK-NEXT:    [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
-// CHECK-NEXT:    [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr 
[[AGG_RESULT_ASCAST1]], i64 2
-// CHECK-NEXT:    store i8 [[TMP1]], ptr [[ADD_PTR2]], align 1
+// CHECK-NEXT:    [[ADD_PTR1:%.*]] = getelementptr inbounds i8, ptr 
[[AGG_RESULT_ASCAST]], i64 2
+// CHECK-NEXT:    store i8 [[TMP1]], ptr [[ADD_PTR1]], align 1
 // CHECK-NEXT:    ret void
 //
 X foo(char x, char y) {
diff --git a/clang/test/CodeGenHIP/store-addr-space.hip 
b/clang/test/CodeGenHIP/store-addr-space.hip
index 46ab1157d0704..6103edba46274 100644
--- a/clang/test/CodeGenHIP/store-addr-space.hip
+++ b/clang/test/CodeGenHIP/store-addr-space.hip
@@ -19,13 +19,12 @@ struct Foo {
 // AMDGCN-NEXT:    [[DST:%.*]] = alloca [[UNION_ANON:%.*]], align 8, 
addrspace(5)
 // AMDGCN-NEXT:    [[RESULT_PTR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RESULT_PTR]] to ptr
 // AMDGCN-NEXT:    [[SRC_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[SRC_PTR_ADDR]] to ptr
+// AMDGCN-NEXT:    [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
 // AMDGCN-NEXT:    [[DST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DST]] 
to ptr
 // AMDGCN-NEXT:    store ptr addrspace(5) [[AGG_RESULT]], ptr 
[[RESULT_PTR_ASCAST]], align 4
 // AMDGCN-NEXT:    store ptr [[SRC_PTR]], ptr [[SRC_PTR_ADDR_ASCAST]], align 8
-// AMDGCN-NEXT:    [[AGG_RESULT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
 // AMDGCN-NEXT:    call void @_ZN3FooC1Ev(ptr noundef nonnull align 8 
dereferenceable(8) [[AGG_RESULT_ASCAST]]) #[[ATTR1:[0-9]+]]
-// AMDGCN-NEXT:    [[AGG_RESULT_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) 
[[AGG_RESULT]] to ptr
-// AMDGCN-NEXT:    store ptr [[AGG_RESULT_ASCAST1]], ptr [[DST_ASCAST]], align 
8
+// AMDGCN-NEXT:    store ptr [[AGG_RESULT_ASCAST]], ptr [[DST_ASCAST]], align 8
 // AMDGCN-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[SRC_PTR_ADDR_ASCAST]], align 
8
 // AMDGCN-NEXT:    [[VAL:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], 
ptr [[TMP0]], i32 0, i32 0
 // AMDGCN-NEXT:    [[TMP1:%.*]] = load i64, ptr [[VAL]], align 8
diff --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl 
b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index a70e9af75fa38..85157bdcf43f9 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -647,6 +647,7 @@ kernel void KernelLargeTwoMember(struct 
LargeStructTwoMember u) {
 // AMDGCN20-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, 
addrspace(5)
 // AMDGCN20-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // AMDGCN20-NEXT:    [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
+// AMDGCN20-NEXT:    [[RETVAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr 
[[RETVAL_ASCAST]] to ptr addrspace(5)
 // AMDGCN20-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
 // AMDGCN20-NEXT:    store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 
4
 // AMDGCN20-NEXT:    [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr 
[[RETVAL_ASCAST]], align 4
diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl 
b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
index a1a114ef129a1..bc65788c17352 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl
@@ -121,6 +121,7 @@ kernel void KernelLargeTwoMember(struct 
LargeStructTwoMember u) {
 // AMDGCN-NEXT:    [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, 
addrspace(5)
 // AMDGCN-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // AMDGCN-NEXT:    [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
+// AMDGCN-NEXT:    [[RETVAL_ASCAST_ASCAST:%.*]] = addrspacecast ptr 
[[RETVAL_ASCAST]] to ptr addrspace(5)
 // AMDGCN-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw 
[[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
 // AMDGCN-NEXT:    store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
 // AMDGCN-NEXT:    [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr 
[[RETVAL_ASCAST]], align 4

>From 35c1f1c8a65c00a030ae38e7e58cefae3063d3e3 Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macur...@amd.com>
Date: Thu, 21 Aug 2025 04:11:25 -0500
Subject: [PATCH 3/3] fixup! [clang][CodeGen] add addr space cast if needed
 when storing ptrs

---
 clang/lib/CodeGen/CGDecl.cpp        | 2 +-
 clang/lib/CodeGen/CGExpr.cpp        | 7 ++++---
 clang/lib/CodeGen/CodeGenFunction.h | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index 8e8fa416ecbba..8a1675848e13c 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -1566,7 +1566,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
       AllocaAddr =
           RawAddress(ReturnValue.emitRawPointer(*this),
                      ReturnValue.getElementType(), ReturnValue.getAlignment());
-      address = MaybeCastAllocaAddressSpace(AllocaAddr, Ty.getAddressSpace());
+      address = MaybeCastStackAddressSpace(AllocaAddr, Ty.getAddressSpace());
 
       if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
         const auto *RD = RecordTy->getOriginalDecl()->getDefinitionOrSelf();
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 079129dfe495f..2329fa20a2530 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -108,8 +108,9 @@ CodeGenFunction::CreateTempAllocaWithoutCast(llvm::Type 
*Ty, CharUnits Align,
   return RawAddress(Alloca, Ty, Align, KnownNonNull);
 }
 
-RawAddress CodeGenFunction::MaybeCastAllocaAddressSpace(
-    RawAddress Alloca, LangAS DestLangAS, llvm::Value *ArraySize) {
+RawAddress CodeGenFunction::MaybeCastStackAddressSpace(RawAddress Alloca,
+                                                       LangAS DestLangAS,
+                                                       llvm::Value *ArraySize) 
{
 
   llvm::Value *V = Alloca.getPointer();
   // Alloca always returns a pointer in alloca address space, which may
@@ -141,7 +142,7 @@ RawAddress CodeGenFunction::CreateTempAlloca(llvm::Type 
*Ty, LangAS DestLangAS,
   RawAddress Alloca = CreateTempAllocaWithoutCast(Ty, Align, Name, ArraySize);
   if (AllocaAddr)
     *AllocaAddr = Alloca;
-  return MaybeCastAllocaAddressSpace(Alloca, DestLangAS, ArraySize);
+  return MaybeCastStackAddressSpace(Alloca, DestLangAS, ArraySize);
 }
 
 /// CreateTempAlloca - This creates an alloca and inserts it into the entry
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 7ac21025917ea..fc65199a0f154 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2807,8 +2807,8 @@ class CodeGenFunction : public CodeGenTypeCache {
 private:
   /// If \p Alloca is not in the same address space as \p DestLangAS, insert an
   /// address space cast and return a new RawAddress based on this value.
-  RawAddress MaybeCastAllocaAddressSpace(RawAddress Alloca, LangAS DestLangAS,
-                                         llvm::Value *ArraySize = nullptr);
+  RawAddress MaybeCastStackAddressSpace(RawAddress Alloca, LangAS DestLangAS,
+                                        llvm::Value *ArraySize = nullptr);
 
 public:
   /// CreateTempAlloca - This creates an alloca and inserts it into the entry

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to