https://github.com/BStott6 updated 
https://github.com/llvm/llvm-project/pull/171173

>From 5f99cd51f08af43229c41b04f1865884b4e38d3a Mon Sep 17 00:00:00 2001
From: BStott <[email protected]>
Date: Fri, 5 Dec 2025 15:49:51 +0000
Subject: [PATCH] Introduce TBAA metadata for some structs returned via direct
 coerced store

---
 clang/lib/CodeGen/CGCall.cpp                | 74 ++++++++++++++++++++-
 clang/lib/CodeGen/CGExpr.cpp                | 37 ++---------
 clang/lib/CodeGen/CodeGenFunction.h         |  3 +-
 clang/lib/CodeGen/CodeGenModule.cpp         | 42 ++++++++++++
 clang/lib/CodeGen/CodeGenModule.h           |  5 ++
 clang/test/CodeGen/tbaa-returned-struct.cpp | 37 +++++++++++
 6 files changed, 162 insertions(+), 36 deletions(-)
 create mode 100644 clang/test/CodeGen/tbaa-returned-struct.cpp

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index efacb3cc04c01..a35cab83fd286 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -44,12 +44,49 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
 #include <optional>
 using namespace clang;
 using namespace CodeGen;
 
 /***/
 
+namespace {
+/// Creates a table of `FieldDecl` pointers for each `llvm::StructTy` element
+/// no, by working backwards from the `CGRecordLayout`.
+class LLVMToClangFieldLookup {
+public:
+  LLVMToClangFieldLookup(const llvm::StructType *LLVMType,
+                         const RecordDecl *RDecl, const CGRecordLayout 
&RLayout)
+      : Table(LLVMType->getNumElements(), nullptr) {
+    for (const auto *FDecl : RDecl->fields()) {
+      if (!isa<FieldDecl>(FDecl))
+        continue;
+      if (!RLayout.containsFieldDecl(FDecl))
+        continue;
+
+      unsigned FieldIndex = RLayout.getLLVMFieldNo(FDecl);
+      assert(FieldIndex < Table.size() &&
+             "Field index should not exceed num elements");
+
+      if (!Table[FieldIndex]) {
+        // If several LLVM fields correspond to the same Clang FieldDecl,
+        // arbitrarily pick the first.
+        Table[FieldIndex] = FDecl;
+      }
+    }
+  }
+
+  const FieldDecl *getFieldDeclForFieldNo(unsigned FieldNo) {
+    assert(FieldNo < Table.size());
+    return Table[FieldNo];
+  }
+
+private:
+  SmallVector<const FieldDecl *, 16> Table;
+};
+} // namespace
+
 unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
   switch (CC) {
   default:
@@ -1398,7 +1435,8 @@ static llvm::Value *CreateCoercedLoad(Address Src, 
llvm::Type *Ty,
 
 void CodeGenFunction::CreateCoercedStore(llvm::Value *Src, Address Dst,
                                          llvm::TypeSize DstSize,
-                                         bool DstIsVolatile) {
+                                         bool DstIsVolatile,
+                                         std::optional<QualType> QTy) {
   if (!DstSize)
     return;
 
@@ -1426,6 +1464,22 @@ void CodeGenFunction::CreateCoercedStore(llvm::Value 
*Src, Address Dst,
       addInstToCurrentSourceAtom(I, Src);
     } else if (llvm::StructType *STy =
                    dyn_cast<llvm::StructType>(Src->getType())) {
+      // For TBAA metadata, get the record layout
+      std::optional<LLVMToClangFieldLookup> FieldLookupForTBAA;
+      if (QTy && CGM.shouldUseTBAA()) {
+        if (const RecordDecl *RDecl = (*QTy)->getAsRecordDecl()) {
+          const CGRecordLayout &RLayout =
+              CGM.getTypes().getCGRecordLayout(RDecl);
+
+          if (RLayout.getLLVMType()->isLayoutIdentical(STy)) {
+            // There are cases where the returned LLVM struct type does not
+            // match the LLVM type corresponding to the record's layout, so we
+            // can't use it to work out the correct TBAA metadata.
+            FieldLookupForTBAA.emplace(STy, RDecl, RLayout);
+          }
+        }
+      }
+
       // Prefer scalar stores to first-class aggregate stores.
       Dst = Dst.withElementType(SrcTy);
       for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
@@ -1433,6 +1487,21 @@ void CodeGenFunction::CreateCoercedStore(llvm::Value 
*Src, Address Dst,
         llvm::Value *Elt = Builder.CreateExtractValue(Src, i);
         auto *I = Builder.CreateStore(Elt, EltPtr, DstIsVolatile);
         addInstToCurrentSourceAtom(I, Elt);
+
+        if (FieldLookupForTBAA) {
+          // Try to find the field declaration corresponding to this struct
+          // element no.
+          const FieldDecl *FDecl =
+              FieldLookupForTBAA->getFieldDeclForFieldNo(i);
+
+          if (FDecl && FDecl->getType()->isScalarType()) {
+            // FIXME Decide on a way to add TBAA MD for store to an aggregate
+            // type. Currently, TBAA MD requires that the access type is a
+            // scalar.
+            CGM.DecorateInstructionWithTBAA(
+                I, CGM.getTBAAInfoForField(TBAAAccessInfo(), *QTy, FDecl));
+          }
+        }
       }
     } else {
       auto *I =
@@ -6235,9 +6304,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
           CreateCoercedStore(
               CI, StorePtr,
               llvm::TypeSize::getFixed(DestSize - RetAI.getDirectOffset()),
-              DestIsVolatile);
+              DestIsVolatile, RetTy);
         }
-
         return convertTempToRValue(DestPtr, RetTy, SourceLocation());
       }
 
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index c8f669b69d991..65200a9a3dec7 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5390,38 +5390,8 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, 
const FieldDecl *field,
   // and unions.
   QualType FieldType = field->getType();
   const RecordDecl *rec = field->getParent();
-  AlignmentSource BaseAlignSource = BaseInfo.getAlignmentSource();
-  LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(BaseAlignSource));
-  TBAAAccessInfo FieldTBAAInfo;
-  if (base.getTBAAInfo().isMayAlias() ||
-          rec->hasAttr<MayAliasAttr>() || FieldType->isVectorType()) {
-    FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
-  } else if (rec->isUnion()) {
-    // TODO: Support TBAA for unions.
-    FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
-  } else {
-    // If no base type been assigned for the base access, then try to generate
-    // one for this base lvalue.
-    FieldTBAAInfo = base.getTBAAInfo();
-    if (!FieldTBAAInfo.BaseType) {
-        FieldTBAAInfo.BaseType = CGM.getTBAABaseTypeInfo(base.getType());
-        assert(!FieldTBAAInfo.Offset &&
-               "Nonzero offset for an access with no base type!");
-    }
-
-    // Adjust offset to be relative to the base type.
-    const ASTRecordLayout &Layout =
-        getContext().getASTRecordLayout(field->getParent());
-    unsigned CharWidth = getContext().getCharWidth();
-    if (FieldTBAAInfo.BaseType)
-      FieldTBAAInfo.Offset +=
-          Layout.getFieldOffset(field->getFieldIndex()) / CharWidth;
-
-    // Update the final access type and size.
-    FieldTBAAInfo.AccessType = CGM.getTBAATypeInfo(FieldType);
-    FieldTBAAInfo.Size =
-        getContext().getTypeSizeInChars(FieldType).getQuantity();
-  }
+  TBAAAccessInfo FieldTBAAInfo =
+      CGM.getTBAAInfoForField(base.getTBAAInfo(), base.getType(), field);
 
   Address addr = base.getAddress();
   if (hasBPFPreserveStaticOffset(rec))
@@ -5472,6 +5442,9 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, 
const FieldDecl *field,
       addr = emitPreserveStructAccess(*this, base, addr, field);
   }
 
+  AlignmentSource BaseAlignSource = base.getBaseInfo().getAlignmentSource();
+  LValueBaseInfo FieldBaseInfo(getFieldAlignmentSource(BaseAlignSource));
+
   // If this is a reference field, load the reference right now.
   if (FieldType->isReferenceType()) {
     LValue RefLVal =
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 8c4c1c8c2dc95..b7fbec3152be9 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -5038,7 +5038,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// Create a store to \arg DstPtr from \arg Src, truncating the stored value
   /// to at most \arg DstSize bytes.
   void CreateCoercedStore(llvm::Value *Src, Address Dst, llvm::TypeSize 
DstSize,
-                          bool DstIsVolatile);
+                          bool DstIsVolatile,
+                          std::optional<QualType> QTy = std::nullopt);
 
   /// EmitExtendGCLifetime - Given a pointer to an Objective-C object,
   /// make sure it survives garbage collection until this point.
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 4789c6b26797f..ec421eb91f0c4 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1720,6 +1720,48 @@ 
CodeGenModule::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
   return TBAA->mergeTBAAInfoForConditionalOperator(DestInfo, SrcInfo);
 }
 
+TBAAAccessInfo CodeGenModule::getTBAAInfoForField(TBAAAccessInfo BaseTBAAInfo,
+                                                  QualType BaseType,
+                                                  const FieldDecl *Field) {
+  // Fields of may-alias structures are may-alias themselves.
+  // FIXME: this should get propagated down through anonymous structs
+  // and unions.
+  const RecordDecl *Rec = Field->getParent();
+  QualType FieldType = Field->getType();
+  TBAAAccessInfo FieldTBAAInfo;
+  if (BaseTBAAInfo.isMayAlias() || Rec->hasAttr<MayAliasAttr>() ||
+      FieldType->isVectorType()) {
+    FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
+  } else if (Rec->isUnion()) {
+    // TODO: Support TBAA for unions.
+    FieldTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
+  } else {
+    // If no base type been assigned for the base access, then try to generate
+    // one for this base lvalue.
+    FieldTBAAInfo = BaseTBAAInfo;
+    if (!FieldTBAAInfo.BaseType) {
+      FieldTBAAInfo.BaseType = getTBAABaseTypeInfo(BaseType);
+      assert(!FieldTBAAInfo.Offset &&
+             "Nonzero offset for an access with no base type!");
+    }
+
+    // Adjust offset to be relative to the base type.
+    const ASTRecordLayout &Layout =
+        getContext().getASTRecordLayout(Field->getParent());
+    unsigned CharWidth = getContext().getCharWidth();
+    if (FieldTBAAInfo.BaseType)
+      FieldTBAAInfo.Offset +=
+          Layout.getFieldOffset(Field->getFieldIndex()) / CharWidth;
+
+    // Update the final access type and size.
+    FieldTBAAInfo.AccessType = getTBAATypeInfo(FieldType);
+    FieldTBAAInfo.Size =
+        getContext().getTypeSizeInChars(FieldType).getQuantity();
+  }
+
+  return FieldTBAAInfo;
+}
+
 void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst,
                                                 TBAAAccessInfo TBAAInfo) {
   if (llvm::MDNode *Tag = getTBAAAccessTagInfo(TBAAInfo))
diff --git a/clang/lib/CodeGen/CodeGenModule.h 
b/clang/lib/CodeGen/CodeGenModule.h
index a253bcda2d06c..1dc53c792230a 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -909,6 +909,11 @@ class CodeGenModule : public CodeGenTypeCache {
     return getTBAAAccessInfo(AccessType);
   }
 
+  /// getTBAAInfoForSubobject - Get TBAA information for an access to a field 
in
+  /// a record.
+  TBAAAccessInfo getTBAAInfoForField(TBAAAccessInfo BaseTBAAInfo,
+                                     QualType BaseType, const FieldDecl 
*Field);
+
   bool isPaddedAtomicType(QualType type);
   bool isPaddedAtomicType(const AtomicType *type);
 
diff --git a/clang/test/CodeGen/tbaa-returned-struct.cpp 
b/clang/test/CodeGen/tbaa-returned-struct.cpp
new file mode 100644
index 0000000000000..e276488b63c46
--- /dev/null
+++ b/clang/test/CodeGen/tbaa-returned-struct.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -triple x86_64-linux-unknown -emit-llvm -o - -O1 
-disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK
+
+// Checking that we generate TBAA metadata for returned aggregates.
+// Currently, TBAA metadata is only emitted when structs are returned directly 
and the returned LLVM struct exactly matches the LLVM struct representation of 
the type.
+// We should update this test when TBAA metadata is added for more cases. 
Cases which aren't covered include:
+//  - Direct return as scalar (e.g. { int x; int y; } returned as i64)
+//  - Indirect return via sret pointer
+
+struct S1 {
+    // Currently, only structs small enough to be returned directly, but large 
enough not to be returned as a scalar, will get TBAA metadata.
+    long x;
+    double y;
+};
+
+S1 returns_s1() {
+    return S1 {1, 2};
+}
+
+void receives_s1() {
+    S1 x = returns_s1();
+// CHECK: define dso_local void @_Z11receives_s1v()
+// CHECK: %call = call { i64, double } @_Z10returns_s1v()
+// CHECK-NEXT: %0 = getelementptr inbounds nuw { i64, double }, ptr %x, i32 0, 
i32 0
+// CHECK-NEXT: %1 = extractvalue { i64, double } %call, 0
+// CHECK-NEXT: store i64 %1, ptr %0, align 8, !tbaa ![[TBAA_LONG_IN_S1:[0-9]+]]
+// CHECK-NEXT: %2 = getelementptr inbounds nuw { i64, double }, ptr %x, i32 0, 
i32 1
+// CHECK-NEXT: %3 = extractvalue { i64, double } %call, 1
+// CHECK-NEXT: store double %3, ptr %2, align 8, !tbaa 
![[TBAA_DOUBLE_IN_S1:[0-9]+]]
+}
+
+// Validate TBAA MD
+// CHECK-DAG: ![[TBAA_CHAR:[0-9]+]] = !{!"omnipotent char",
+// CHECK-DAG: ![[TBAA_LONG:[0-9]+]] = !{!"long", ![[TBAA_CHAR]], i64 0}
+// CHECK-DAG: ![[TBAA_DOUBLE:[0-9]+]] = !{!"double", ![[TBAA_CHAR]], i64 0}
+// CHECK-DAG: ![[TBAA_S1:[0-9]+]] = !{!"_ZTS2S1", ![[TBAA_LONG]], i64 0, 
![[TBAA_DOUBLE]], i64 8}
+// CHECK-DAG: ![[TBAA_LONG_IN_S1]] = !{![[TBAA_S1]], ![[TBAA_LONG]], i64 0}
+// CHECK-DAG: ![[TBAA_DOUBLE_IN_S1]] = !{![[TBAA_S1]], ![[TBAA_DOUBLE]], i64 8}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to