https://github.com/dtcxzyw updated 
https://github.com/llvm/llvm-project/pull/130734

>From b9876822acdca4eaea6496a1d6471e2c0f4ad2e1 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2...@gmail.com>
Date: Tue, 11 Mar 2025 16:20:08 +0800
Subject: [PATCH 1/6] [Clang][CodeGen] Do not set inbounds flag for struct GEP
 with null base pointers

---
 clang/lib/CodeGen/CGBuilder.h                     | 15 ++++++++++-----
 ...nullptr-and-nonzero-offset-in-offsetof-idiom.c |  2 +-
 ...llptr-and-nonzero-offset-in-offsetof-idiom.cpp |  2 +-
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index b8036cf6e6a30..11e8818b33397 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -223,11 +223,16 @@ class CGBuilderTy : public CGBuilderBaseTy {
     const llvm::StructLayout *Layout = DL.getStructLayout(ElTy);
     auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
 
-    return Address(CreateStructGEP(Addr.getElementType(), 
Addr.getBasePointer(),
-                                   Index, Name),
-                   ElTy->getElementType(Index),
-                   Addr.getAlignment().alignmentAtOffset(Offset),
-                   Addr.isKnownNonNull());
+    // Specially, we don't add inbounds flags if the base pointer is null.
+    // This is a workaround for old-style offsetof macros.
+    llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap();
+    if (!isa<llvm::ConstantPointerNull>(Addr.getBasePointer()))
+      NWFlags |= llvm::GEPNoWrapFlags::inBounds();
+    return Address(
+        CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0,
+                           Index, Name, NWFlags),
+        ElTy->getElementType(Index),
+        Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull());
   }
 
   /// Given
diff --git 
a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c 
b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
index 68c0ee3a3a885..a7cfd77766712 100644
--- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
+++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
@@ -17,7 +17,7 @@ struct S {
 
 // CHECK-LABEL: @get_offset_of_y_naively(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr inbounds nuw 
([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], 
ptr null, i32 0, i32 1) to i64)
 //
 uintptr_t get_offset_of_y_naively(void) {
   return ((uintptr_t)(&(((struct S *)0)->y)));
diff --git 
a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp 
b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
index 34d4f4c9e34eb..f00a2c486574c 100644
--- 
a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
+++ 
b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
@@ -10,7 +10,7 @@ struct S {
 
 // CHECK-LABEL: @_Z23get_offset_of_y_naivelyv(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr inbounds nuw 
([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64)
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], 
ptr null, i32 0, i32 1) to i64)
 //
 uintptr_t get_offset_of_y_naively() {
   return ((uintptr_t)(&(((S *)nullptr)->y)));

>From 4b6cc2ba52c958468643d50a06e61ec3c04ab454 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2...@gmail.com>
Date: Fri, 14 Mar 2025 14:28:55 +0800
Subject: [PATCH 2/6] [Clang][CodeGen] Address review comments.

---
 clang/lib/CodeGen/CGBuilder.h       | 15 +++++----------
 clang/lib/CodeGen/CGExpr.cpp        | 27 +++++++++++++++++++--------
 clang/lib/CodeGen/CodeGenFunction.h |  3 ++-
 3 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index 11e8818b33397..b8036cf6e6a30 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -223,16 +223,11 @@ class CGBuilderTy : public CGBuilderBaseTy {
     const llvm::StructLayout *Layout = DL.getStructLayout(ElTy);
     auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
 
-    // Specially, we don't add inbounds flags if the base pointer is null.
-    // This is a workaround for old-style offsetof macros.
-    llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap();
-    if (!isa<llvm::ConstantPointerNull>(Addr.getBasePointer()))
-      NWFlags |= llvm::GEPNoWrapFlags::inBounds();
-    return Address(
-        CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0,
-                           Index, Name, NWFlags),
-        ElTy->getElementType(Index),
-        Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull());
+    return Address(CreateStructGEP(Addr.getElementType(), 
Addr.getBasePointer(),
+                                   Index, Name),
+                   ElTy->getElementType(Index),
+                   Addr.getAlignment().alignmentAtOffset(Offset),
+                   Addr.isKnownNonNull());
   }
 
   /// Given
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 5943ff9294e1a..a55cee2f274af 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4778,6 +4778,10 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr 
*E) {
   }
 
   Expr *BaseExpr = E->getBase();
+  Expr *UnderlyingBaseExpr = BaseExpr;
+  while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr))
+    UnderlyingBaseExpr = BaseMemberExpr->getBase();
+  bool IsBaseConstantNull = 
getContext().isSentinelNullExpr(UnderlyingBaseExpr);
   // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a scalar.
   LValue BaseLV;
   if (E->isArrow()) {
@@ -4799,7 +4803,7 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr 
*E) {
 
   NamedDecl *ND = E->getMemberDecl();
   if (auto *Field = dyn_cast<FieldDecl>(ND)) {
-    LValue LV = EmitLValueForField(BaseLV, Field);
+    LValue LV = EmitLValueForField(BaseLV, Field, IsBaseConstantNull);
     setObjCGCLValueClass(getContext(), E, LV);
     if (getLangOpts().OpenMP) {
       // If the member was explicitly marked as nontemporal, mark it as
@@ -4885,12 +4889,15 @@ unsigned CodeGenFunction::getDebugInfoFIndex(const 
RecordDecl *Rec,
 /// Get the address of a zero-sized field within a record. The resulting
 /// address doesn't necessarily have the right type.
 static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base,
-                                       const FieldDecl *Field) {
+                                       const FieldDecl *Field,
+                                       bool IsBaseConstantNull) {
   CharUnits Offset = CGF.getContext().toCharUnitsFromBits(
       CGF.getContext().getFieldOffset(Field));
   if (Offset.isZero())
     return Base;
   Base = Base.withElementType(CGF.Int8Ty);
+  if (IsBaseConstantNull)
+    return CGF.Builder.CreateConstByteGEP(Base, Offset);
   return CGF.Builder.CreateConstInBoundsByteGEP(Base, Offset);
 }
 
@@ -4899,15 +4906,18 @@ static Address emitAddrOfZeroSizeField(CodeGenFunction 
&CGF, Address Base,
 ///
 /// The resulting address doesn't necessarily have the right type.
 static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base,
-                                      const FieldDecl *field) {
+                                      const FieldDecl *field,
+                                      bool IsBaseConstantNull) {
   if (isEmptyFieldForLayout(CGF.getContext(), field))
-    return emitAddrOfZeroSizeField(CGF, base, field);
+    return emitAddrOfZeroSizeField(CGF, base, field, IsBaseConstantNull);
 
   const RecordDecl *rec = field->getParent();
 
   unsigned idx =
     CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
 
+  if (IsBaseConstantNull)
+    return CGF.Builder.CreateConstGEP(base, idx, field->getName());
   return CGF.Builder.CreateStructGEP(base, idx, field->getName());
 }
 
@@ -4943,8 +4953,8 @@ static bool hasAnyVptr(const QualType Type, const 
ASTContext &Context) {
   return false;
 }
 
-LValue CodeGenFunction::EmitLValueForField(LValue base,
-                                           const FieldDecl *field) {
+LValue CodeGenFunction::EmitLValueForField(LValue base, const FieldDecl *field,
+                                           bool IsBaseConstantNull) {
   LValueBaseInfo BaseInfo = base.getBaseInfo();
 
   if (field->isBitField()) {
@@ -5076,7 +5086,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
     if (!IsInPreservedAIRegion &&
         (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>()))
       // For structs, we GEP to the field that the record layout suggests.
-      addr = emitAddrOfFieldStorage(*this, addr, field);
+      addr = emitAddrOfFieldStorage(*this, addr, field, IsBaseConstantNull);
     else
       // Remember the original struct field index
       addr = emitPreserveStructAccess(*this, base, addr, field);
@@ -5120,7 +5130,8 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue 
Base,
   if (!FieldType->isReferenceType())
     return EmitLValueForField(Base, Field);
 
-  Address V = emitAddrOfFieldStorage(*this, Base.getAddress(), Field);
+  Address V = emitAddrOfFieldStorage(*this, Base.getAddress(), Field,
+                                     /*IsBaseConstantNull=*/false);
 
   // Make sure that the address is pointing to the right type.
   llvm::Type *llvmType = ConvertTypeForMem(FieldType);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index ca00a0e8c6cf4..46e0dde6a1090 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4472,7 +4472,8 @@ class CodeGenFunction : public CodeGenTypeCache {
                               const ObjCIvarDecl *Ivar);
   llvm::Value *EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl *Interface,
                                            const ObjCIvarDecl *Ivar);
-  LValue EmitLValueForField(LValue Base, const FieldDecl *Field);
+  LValue EmitLValueForField(LValue Base, const FieldDecl *Field,
+                            bool IsBaseConstantNull = false);
   LValue EmitLValueForLambdaField(const FieldDecl *Field);
   LValue EmitLValueForLambdaField(const FieldDecl *Field,
                                   llvm::Value *ThisValue);

>From bf0d1f22e60f7c572104a3cd501e5df8a75b1f51 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2...@gmail.com>
Date: Fri, 14 Mar 2025 15:41:22 +0800
Subject: [PATCH 3/6] [Clang][CodeGen] Add more tests.

---
 clang/lib/CodeGen/CGBuilder.h                 | 18 ++++++++-----
 clang/lib/CodeGen/CGExpr.cpp                  |  8 +++---
 ...r-and-nonzero-offset-in-offsetof-idiom.cpp | 26 +++++++++++++++++++
 3 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index b8036cf6e6a30..88ec55b322cf5 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -215,19 +215,25 @@ class CGBuilderTy : public CGBuilderBaseTy {
   ///
   /// This API assumes that drilling into a struct like this is always an
   /// inbounds and nuw operation.
+  /// Specifically, inbounds flag will not be set if \p IsBaseConstantNull is
+  /// true.
   using CGBuilderBaseTy::CreateStructGEP;
   Address CreateStructGEP(Address Addr, unsigned Index,
-                          const llvm::Twine &Name = "") {
+                          const llvm::Twine &Name = "",
+                          bool IsBaseConstantNull = false) {
     llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType());
     const llvm::DataLayout &DL = BB->getDataLayout();
     const llvm::StructLayout *Layout = DL.getStructLayout(ElTy);
     auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
 
-    return Address(CreateStructGEP(Addr.getElementType(), 
Addr.getBasePointer(),
-                                   Index, Name),
-                   ElTy->getElementType(Index),
-                   Addr.getAlignment().alignmentAtOffset(Offset),
-                   Addr.isKnownNonNull());
+    llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap();
+    if (!IsBaseConstantNull)
+      NWFlags |= llvm::GEPNoWrapFlags::inBounds();
+    return Address(
+        CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0,
+                           Index, Name, NWFlags),
+        ElTy->getElementType(Index),
+        Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull());
   }
 
   /// Given
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index a55cee2f274af..53070231e00ad 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4778,6 +4778,9 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr 
*E) {
   }
 
   Expr *BaseExpr = E->getBase();
+  // Check whether the underlying base pointer is a constant null.
+  // If so, we do not set inbounds flag for GEP to avoid breaking some 
old-style
+  // offsetof idioms.
   Expr *UnderlyingBaseExpr = BaseExpr;
   while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr))
     UnderlyingBaseExpr = BaseMemberExpr->getBase();
@@ -4916,9 +4919,8 @@ static Address emitAddrOfFieldStorage(CodeGenFunction 
&CGF, Address base,
   unsigned idx =
     CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
 
-  if (IsBaseConstantNull)
-    return CGF.Builder.CreateConstGEP(base, idx, field->getName());
-  return CGF.Builder.CreateStructGEP(base, idx, field->getName());
+  return CGF.Builder.CreateStructGEP(base, idx, field->getName(),
+                                     IsBaseConstantNull);
 }
 
 static Address emitPreserveStructAccess(CodeGenFunction &CGF, LValue base,
diff --git 
a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp 
b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
index f00a2c486574c..ac45d2e0da4fc 100644
--- 
a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
+++ 
b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
@@ -16,6 +16,32 @@ uintptr_t get_offset_of_y_naively() {
   return ((uintptr_t)(&(((S *)nullptr)->y)));
 }
 
+struct Empty {};
+
+struct T {
+  int a;
+  S s;
+  [[no_unique_address]] Empty e1;
+  int b;
+  [[no_unique_address]] Empty e2;
+};
+
+// CHECK-LABEL: @_Z30get_offset_of_y_naively_nestedv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], 
ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) 
to i64)
+//
+uintptr_t get_offset_of_y_naively_nested() {
+  return ((uintptr_t)(&(((T *)nullptr)->s.y)));
+}
+
+// CHECK-LABEL: @_Z26get_offset_of_zero_storagev(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr (i8, ptr null, i64 16) 
to i64)
+//
+uintptr_t get_offset_of_zero_storage() {
+  return ((uintptr_t)(&(((T *)nullptr)->e2)));
+}
+
 // CHECK-LABEL: @_Z27get_offset_of_y_via_builtinv(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i64 4

>From 9189eaaaeeb875e94e564ad7301c4ec61e34b639 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2...@gmail.com>
Date: Thu, 20 Mar 2025 17:16:26 +0800
Subject: [PATCH 4/6] [Clang][CodeGen] Handle parens

---
 clang/lib/CodeGen/CGExpr.cpp                              | 4 ++--
 ...catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 53070231e00ad..a85d6f537d25b 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4781,9 +4781,9 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr 
*E) {
   // Check whether the underlying base pointer is a constant null.
   // If so, we do not set inbounds flag for GEP to avoid breaking some 
old-style
   // offsetof idioms.
-  Expr *UnderlyingBaseExpr = BaseExpr;
+  Expr *UnderlyingBaseExpr = BaseExpr->IgnoreParens();
   while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr))
-    UnderlyingBaseExpr = BaseMemberExpr->getBase();
+    UnderlyingBaseExpr = BaseMemberExpr->getBase()->IgnoreParens();
   bool IsBaseConstantNull = 
getContext().isSentinelNullExpr(UnderlyingBaseExpr);
   // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a scalar.
   LValue BaseLV;
diff --git 
a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp 
b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
index ac45d2e0da4fc..2df1df53a3448 100644
--- 
a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
+++ 
b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
@@ -34,6 +34,14 @@ uintptr_t get_offset_of_y_naively_nested() {
   return ((uintptr_t)(&(((T *)nullptr)->s.y)));
 }
 
+// CHECK-LABEL: @_Z42get_offset_of_y_naively_nested_with_parensv(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], 
ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) 
to i64)
+//
+uintptr_t get_offset_of_y_naively_nested_with_parens() {
+  return ((uintptr_t)(&((((T *)nullptr)->s).y)));
+}
+
 // CHECK-LABEL: @_Z26get_offset_of_zero_storagev(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr (i8, ptr null, i64 16) 
to i64)

>From 06cd8a744ecf33177574349fae27e902f419e057 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2...@gmail.com>
Date: Thu, 20 Mar 2025 17:57:59 +0800
Subject: [PATCH 5/6] [Clang][CodeGen] Use `createConstGEP2_32`

---
 clang/lib/CodeGen/CGBuilder.h                 | 42 ++++++++++---------
 clang/lib/CodeGen/CGExpr.cpp                  |  5 ++-
 ...ptr-and-nonzero-offset-in-offsetof-idiom.c |  2 +-
 ...r-and-nonzero-offset-in-offsetof-idiom.cpp |  6 +--
 4 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index 88ec55b322cf5..3a133447c62e1 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -64,21 +64,27 @@ class CGBuilderTy : public CGBuilderBaseTy {
   Address createConstGEP2_32(Address Addr, unsigned Idx0, unsigned Idx1,
                              const llvm::Twine &Name) {
     const llvm::DataLayout &DL = BB->getDataLayout();
-    llvm::GetElementPtrInst *GEP;
+    llvm::Value *V;
     if (IsInBounds)
-      GEP = cast<llvm::GetElementPtrInst>(CreateConstInBoundsGEP2_32(
-          Addr.getElementType(), emitRawPointerFromAddress(Addr), Idx0, Idx1,
-          Name));
+      V = CreateConstInBoundsGEP2_32(Addr.getElementType(),
+                                     emitRawPointerFromAddress(Addr), Idx0,
+                                     Idx1, Name);
     else
-      GEP = cast<llvm::GetElementPtrInst>(CreateConstGEP2_32(
-          Addr.getElementType(), emitRawPointerFromAddress(Addr), Idx0, Idx1,
-          Name));
+      V = CreateConstGEP2_32(Addr.getElementType(),
+                             emitRawPointerFromAddress(Addr), Idx0, Idx1, 
Name);
     llvm::APInt Offset(
         DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0,
         /*isSigned=*/true);
-    if (!GEP->accumulateConstantOffset(DL, Offset))
-      llvm_unreachable("offset of GEP with constants is always computable");
-    return Address(GEP, GEP->getResultElementType(),
+    llvm::Type *ElementTy = nullptr;
+    if (auto *GEP = dyn_cast<llvm::GEPOperator>(V)) {
+      if (!GEP->accumulateConstantOffset(DL, Offset))
+        llvm_unreachable("offset of GEP with constants is always computable");
+      ElementTy = GEP->getResultElementType();
+    } else {
+      ElementTy = 
llvm::GetElementPtrInst::getIndexedType(Addr.getElementType(),
+                                                          {Idx0, Idx1});
+    }
+    return Address(V, ElementTy,
                    Addr.getAlignment().alignmentAtOffset(
                        CharUnits::fromQuantity(Offset.getSExtValue())),
                    IsInBounds ? Addr.isKnownNonNull() : NotKnownNonNull);
@@ -219,21 +225,17 @@ class CGBuilderTy : public CGBuilderBaseTy {
   /// true.
   using CGBuilderBaseTy::CreateStructGEP;
   Address CreateStructGEP(Address Addr, unsigned Index,
-                          const llvm::Twine &Name = "",
-                          bool IsBaseConstantNull = false) {
+                          const llvm::Twine &Name = "") {
     llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType());
     const llvm::DataLayout &DL = BB->getDataLayout();
     const llvm::StructLayout *Layout = DL.getStructLayout(ElTy);
     auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index));
 
-    llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap();
-    if (!IsBaseConstantNull)
-      NWFlags |= llvm::GEPNoWrapFlags::inBounds();
-    return Address(
-        CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0,
-                           Index, Name, NWFlags),
-        ElTy->getElementType(Index),
-        Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull());
+    return Address(CreateStructGEP(Addr.getElementType(), 
Addr.getBasePointer(),
+                                   Index, Name),
+                   ElTy->getElementType(Index),
+                   Addr.getAlignment().alignmentAtOffset(Offset),
+                   Addr.isKnownNonNull());
   }
 
   /// Given
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index a85d6f537d25b..9d90e96a48d0d 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -4919,8 +4919,9 @@ static Address emitAddrOfFieldStorage(CodeGenFunction 
&CGF, Address base,
   unsigned idx =
     CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
 
-  return CGF.Builder.CreateStructGEP(base, idx, field->getName(),
-                                     IsBaseConstantNull);
+  if (IsBaseConstantNull)
+    return CGF.Builder.CreateConstGEP2_32(base, 0, idx, field->getName());
+  return CGF.Builder.CreateStructGEP(base, idx, field->getName());
 }
 
 static Address emitPreserveStructAccess(CodeGenFunction &CGF, LValue base,
diff --git 
a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c 
b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
index a7cfd77766712..46e22fbdb38ac 100644
--- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
+++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c
@@ -17,7 +17,7 @@ struct S {
 
 // CHECK-LABEL: @get_offset_of_y_naively(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], 
ptr null, i32 0, i32 1) to i64)
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr 
null, i32 0, i32 1) to i64)
 //
 uintptr_t get_offset_of_y_naively(void) {
   return ((uintptr_t)(&(((struct S *)0)->y)));
diff --git 
a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp 
b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
index 2df1df53a3448..7fa81e0986b21 100644
--- 
a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
+++ 
b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp
@@ -10,7 +10,7 @@ struct S {
 
 // CHECK-LABEL: @_Z23get_offset_of_y_naivelyv(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], 
ptr null, i32 0, i32 1) to i64)
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr 
null, i32 0, i32 1) to i64)
 //
 uintptr_t get_offset_of_y_naively() {
   return ((uintptr_t)(&(((S *)nullptr)->y)));
@@ -28,7 +28,7 @@ struct T {
 
 // CHECK-LABEL: @_Z30get_offset_of_y_naively_nestedv(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], 
ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) 
to i64)
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr 
getelementptr ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64)
 //
 uintptr_t get_offset_of_y_naively_nested() {
   return ((uintptr_t)(&(((T *)nullptr)->s.y)));
@@ -36,7 +36,7 @@ uintptr_t get_offset_of_y_naively_nested() {
 
 // CHECK-LABEL: @_Z42get_offset_of_y_naively_nested_with_parensv(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], 
ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) 
to i64)
+// CHECK-NEXT:    ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr 
getelementptr ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64)
 //
 uintptr_t get_offset_of_y_naively_nested_with_parens() {
   return ((uintptr_t)(&((((T *)nullptr)->s).y)));

>From 670ab42f245dfeccc17b4caa37b5b119a54e13ba Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2...@gmail.com>
Date: Thu, 20 Mar 2025 18:11:06 +0800
Subject: [PATCH 6/6] [Clang][CodeGen] Add release note.

---
 clang/docs/ReleaseNotes.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4258d0d72c950..94c865f309547 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -42,6 +42,11 @@ Potentially Breaking Changes
 C/C++ Language Potentially Breaking Changes
 -------------------------------------------
 
+- Some old-style offsetof idioms like ``((int)(&(((struct S *)0)->field)))`` 
are treated
+  as UB. To avoid breaking existing code, ``inbounds`` flags will not be set 
for such patterns.
+  However, it is still highly recommended to use the UB-free builtin 
``__builtin_offsetof``.
+  (#GH130734)
+
 C++ Specific Potentially Breaking Changes
 -----------------------------------------
 

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to