Author: Bruno De Fraine
Date: 2025-06-05T13:37:18+02:00
New Revision: c3b8a15eab06fceb6f4d0f2a0f505d5290ff208a

URL: 
https://github.com/llvm/llvm-project/commit/c3b8a15eab06fceb6f4d0f2a0f505d5290ff208a
DIFF: 
https://github.com/llvm/llvm-project/commit/c3b8a15eab06fceb6f4d0f2a0f505d5290ff208a.diff

LOG: [CodeGen] Add TBAA struct path info for array members (#137719)

This enables the LLVM optimizer to view accesses to distinct struct
members as independent, also for array members. For example, the
following two stores no longer alias:

    struct S { int a[10]; int b; };
    void test(S *p, int i) {
      p->a[i] = ...;
      p->b = ...;
    }

Array members were already added to TBAA struct type nodes in commit
57493e29. Here, we extend a path tag for an array subscript expression.

Added: 
    

Modified: 
    clang/lib/CodeGen/CGExpr.cpp
    clang/lib/CodeGen/CodeGenTBAA.cpp
    clang/test/CodeGen/tbaa-array.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index eb569c04b047c..5fc98b6a692cc 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4595,7 +4595,32 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const 
ArraySubscriptExpr *E,
         E->getType(), !getLangOpts().PointerOverflowDefined, SignedIndices,
         E->getExprLoc(), &arrayType, E->getBase());
     EltBaseInfo = ArrayLV.getBaseInfo();
-    EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
+    if (!CGM.getCodeGenOpts().NewStructPathTBAA) {
+      // Since CodeGenTBAA::getTypeInfoHelper only handles array types for
+      // new struct path TBAA, we must a use a plain access.
+      EltTBAAInfo = CGM.getTBAAInfoForSubobject(ArrayLV, E->getType());
+    } else if (ArrayLV.getTBAAInfo().isMayAlias()) {
+      EltTBAAInfo = TBAAAccessInfo::getMayAliasInfo();
+    } else if (ArrayLV.getTBAAInfo().isIncomplete()) {
+      // The array element is complete, even if the array is not.
+      EltTBAAInfo = CGM.getTBAAAccessInfo(E->getType());
+    } else {
+      // The TBAA access info from the array (base) lvalue is ordinary. We will
+      // adapt it to create access info for the element.
+      EltTBAAInfo = ArrayLV.getTBAAInfo();
+
+      // We retain the TBAA struct path (BaseType and Offset members) from the
+      // array. In the TBAA representation, we map any array access to the
+      // element at index 0, as the index is generally a runtime value. This
+      // element has the same offset in the base type as the array itself.
+      // If the array lvalue had no base type, there is no point trying to
+      // generate one, since an array itself is not a valid base type.
+
+      // We also retain the access type from the base lvalue, but the access
+      // size must be updated to the size of an individual element.
+      EltTBAAInfo.Size =
+          getContext().getTypeSizeInChars(E->getType()).getQuantity();
+    }
   } else {
     // The base must be a pointer; emit it with an estimate of its alignment.
     Address BaseAddr =

diff  --git a/clang/lib/CodeGen/CodeGenTBAA.cpp 
b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 818b6dabaa144..a02a009158d12 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -130,6 +130,13 @@ static bool TypeHasMayAlias(QualType QTy) {
       return true;
     QTy = TT->desugar();
   }
+
+  // Also consider an array type as may_alias when its element type (at
+  // any level) is marked as such.
+  if (auto *ArrayTy = QTy->getAsArrayTypeUnsafe())
+    if (TypeHasMayAlias(ArrayTy->getElementType()))
+      return true;
+
   return false;
 }
 

diff  --git a/clang/test/CodeGen/tbaa-array.cpp 
b/clang/test/CodeGen/tbaa-array.cpp
index 4a6576e2eeb7f..35e525cfa67ca 100644
--- a/clang/test/CodeGen/tbaa-array.cpp
+++ b/clang/test/CodeGen/tbaa-array.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s \
+// RUN: %clang_cc1 -triple x86_64-linux -O1 %s \
 // RUN:     -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-linux -O1 -disable-llvm-passes %s \
+// RUN: %clang_cc1 -triple x86_64-linux -O1 %s \
 // RUN:     -new-struct-path-tbaa -emit-llvm -o - | \
 // RUN:     FileCheck -check-prefix=CHECK-NEW %s
 //
@@ -10,6 +10,12 @@
 struct A { int i; };
 struct B { A a[1]; };
 struct C { int i; int x[3]; };
+struct D { int n; int arr[]; }; // flexible array member
+extern int AA[];                // incomplete array type
+
+typedef int __attribute__((may_alias)) aliasing_int;
+typedef int __attribute__((may_alias)) aliasing_array[10];
+struct E { aliasing_int x[4]; aliasing_array y; };
 
 int foo(B *b) {
 // CHECK-LABEL: _Z3fooP1B
@@ -28,16 +34,42 @@ int bar(C *c) {
 
 int bar2(C *c) {
 // CHECK-NEW-LABEL: _Z4bar2P1C
-// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_C_x:!.*]]
   return c->x[2];
 }
 
 int bar3(C *c, int j) {
 // CHECK-NEW-LABEL: _Z4bar3P1Ci
-// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_C_x]]
   return c->x[j];
 }
 
+int bar4(D *d) {
+// CHECK-NEW-LABEL: _Z4bar4P1D
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+    return d->arr[d->n];
+}
+
+int bar5(int j) {
+// CHECK-NEW-LABEL: _Z4bar5i
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_int:!.*]]
+    return AA[2] + AA[j];
+}
+
+int bar6(E *e, int j) {
+// CHECK-NEW-LABEL: _Z4bar6P1Ei
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_E_x:!.*]]
+    return e->x[j];
+}
+
+int bar7(E *e, int j) {
+// CHECK-NEW-LABEL: _Z4bar7P1Ei
+// CHECK-NEW: load i32, {{.*}}, !tbaa [[TAG_E_y:!.*]]
+    return e->y[j];
+}
+
 // CHECK-DAG: [[TAG_A_i]] = !{[[TYPE_A:!.*]], [[TYPE_int:!.*]], i64 0}
 // CHECK-DAG: [[TYPE_A]] = !{!"_ZTS1A", !{{.*}}, i64 0}
 // CHECK-DAG: [[TYPE_int]] = !{!"int", !{{.*}}, i64 0}
@@ -45,8 +77,11 @@ int bar3(C *c, int j) {
 // CHECK-NEW-DAG: [[TYPE_char:!.*]] = !{{{.*}}, i64 1, !"omnipotent char"}
 // CHECK-NEW-DAG: [[TYPE_int:!.*]] = !{[[TYPE_char]], i64 4, !"int"}
 // CHECK-NEW-DAG: [[TAG_int]] = !{[[TYPE_int]], [[TYPE_int]], i64 0, i64 4}
-// CHECK-NEW-DAG: [[TYPE_pointer:!.*]] = !{[[TYPE_char]], i64 8, !"any 
pointer"}
 // CHECK-NEW-DAG: [[TYPE_A:!.*]] = !{[[TYPE_char]], i64 4, !"_ZTS1A", 
[[TYPE_int]], i64 0, i64 4}
 // CHECK-NEW-DAG: [[TAG_A_i]] = !{[[TYPE_A]], [[TYPE_int]], i64 0, i64 4}
 // CHECK-NEW-DAG: [[TYPE_C:!.*]] = !{[[TYPE_char]], i64 16, !"_ZTS1C", 
[[TYPE_int]], i64 0, i64 4, [[TYPE_int]], i64 4, i64 12}
-// CHECK-NEW-DAG: [[TAG_C_i]] = !{[[TYPE_C:!.*]], [[TYPE_int:!.*]], i64 0, i64 
4}
+// CHECK-NEW-DAG: [[TAG_C_i]] = !{[[TYPE_C]], [[TYPE_int]], i64 0, i64 4}
+// CHECK-NEW-DAG: [[TAG_C_x]] = !{[[TYPE_C]], [[TYPE_int]], i64 4, i64 4}
+// CHECK-NEW-DAG: [[TYPE_E:!.*]] = !{[[TYPE_char]], i64 56, !"_ZTS1E", 
[[TYPE_char]], i64 0, i64 16, [[TYPE_char]], i64 16, i64 40}
+// CHECK-NEW-DAG: [[TAG_E_x]] = !{[[TYPE_E]], [[TYPE_char]], i64 0, i64 4}
+// CHECK-NEW-DAG: [[TAG_E_y]] = !{[[TYPE_E]], [[TYPE_char]], i64 16, i64 4}


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to