https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/130734
>From 173ba1595cef80e2affd3ed57feabb2feb0856a7 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Tue, 11 Mar 2025 16:20:08 +0800 Subject: [PATCH 01/11] [Clang][CodeGen] Do not set inbounds flag for struct GEP with null base pointers --- clang/lib/CodeGen/CGBuilder.h | 15 ++++++++++----- ...nullptr-and-nonzero-offset-in-offsetof-idiom.c | 2 +- ...llptr-and-nonzero-offset-in-offsetof-idiom.cpp | 2 +- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index b8036cf6e6a30..11e8818b33397 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -223,11 +223,16 @@ class CGBuilderTy : public CGBuilderBaseTy { const llvm::StructLayout *Layout = DL.getStructLayout(ElTy); auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); - return Address(CreateStructGEP(Addr.getElementType(), Addr.getBasePointer(), - Index, Name), - ElTy->getElementType(Index), - Addr.getAlignment().alignmentAtOffset(Offset), - Addr.isKnownNonNull()); + // Specially, we don't add inbounds flags if the base pointer is null. + // This is a workaround for old-style offsetof macros. + llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap(); + if (!isa<llvm::ConstantPointerNull>(Addr.getBasePointer())) + NWFlags |= llvm::GEPNoWrapFlags::inBounds(); + return Address( + CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0, + Index, Name, NWFlags), + ElTy->getElementType(Index), + Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull()); } /// Given diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c index 68c0ee3a3a885..a7cfd77766712 100644 --- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c +++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c @@ -17,7 +17,7 @@ struct S { // CHECK-LABEL: @get_offset_of_y_naively( // CHECK-NEXT: entry: -// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr inbounds nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) // uintptr_t get_offset_of_y_naively(void) { return ((uintptr_t)(&(((struct S *)0)->y))); diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp index 34d4f4c9e34eb..f00a2c486574c 100644 --- a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp +++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp @@ -10,7 +10,7 @@ struct S { // CHECK-LABEL: @_Z23get_offset_of_y_naivelyv( // CHECK-NEXT: entry: -// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr inbounds nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) // uintptr_t get_offset_of_y_naively() { return ((uintptr_t)(&(((S *)nullptr)->y))); >From fe733d081be378f713ab0169831cc63eec65a7dc Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Fri, 14 Mar 2025 14:28:55 +0800 Subject: [PATCH 02/11] [Clang][CodeGen] Address review comments. --- clang/lib/CodeGen/CGBuilder.h | 15 +++++---------- clang/lib/CodeGen/CGExpr.cpp | 27 +++++++++++++++++++-------- clang/lib/CodeGen/CodeGenFunction.h | 3 ++- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index 11e8818b33397..b8036cf6e6a30 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -223,16 +223,11 @@ class CGBuilderTy : public CGBuilderBaseTy { const llvm::StructLayout *Layout = DL.getStructLayout(ElTy); auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); - // Specially, we don't add inbounds flags if the base pointer is null. - // This is a workaround for old-style offsetof macros. - llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap(); - if (!isa<llvm::ConstantPointerNull>(Addr.getBasePointer())) - NWFlags |= llvm::GEPNoWrapFlags::inBounds(); - return Address( - CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0, - Index, Name, NWFlags), - ElTy->getElementType(Index), - Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull()); + return Address(CreateStructGEP(Addr.getElementType(), Addr.getBasePointer(), + Index, Name), + ElTy->getElementType(Index), + Addr.getAlignment().alignmentAtOffset(Offset), + Addr.isKnownNonNull()); } /// Given diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 3d3a111f0514a..0b32d03e143c5 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4792,6 +4792,10 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { } Expr *BaseExpr = E->getBase(); + Expr *UnderlyingBaseExpr = BaseExpr; + while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr)) + UnderlyingBaseExpr = BaseMemberExpr->getBase(); + bool IsBaseConstantNull = getContext().isSentinelNullExpr(UnderlyingBaseExpr); // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar. LValue BaseLV; if (E->isArrow()) { @@ -4813,7 +4817,7 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { NamedDecl *ND = E->getMemberDecl(); if (auto *Field = dyn_cast<FieldDecl>(ND)) { - LValue LV = EmitLValueForField(BaseLV, Field); + LValue LV = EmitLValueForField(BaseLV, Field, IsBaseConstantNull); setObjCGCLValueClass(getContext(), E, LV); if (getLangOpts().OpenMP) { // If the member was explicitly marked as nontemporal, mark it as @@ -4899,12 +4903,15 @@ unsigned CodeGenFunction::getDebugInfoFIndex(const RecordDecl *Rec, /// Get the address of a zero-sized field within a record. The resulting /// address doesn't necessarily have the right type. static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base, - const FieldDecl *Field) { + const FieldDecl *Field, + bool IsBaseConstantNull) { CharUnits Offset = CGF.getContext().toCharUnitsFromBits( CGF.getContext().getFieldOffset(Field)); if (Offset.isZero()) return Base; Base = Base.withElementType(CGF.Int8Ty); + if (IsBaseConstantNull) + return CGF.Builder.CreateConstByteGEP(Base, Offset); return CGF.Builder.CreateConstInBoundsByteGEP(Base, Offset); } @@ -4913,15 +4920,18 @@ static Address emitAddrOfZeroSizeField(CodeGenFunction &CGF, Address Base, /// /// The resulting address doesn't necessarily have the right type. static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base, - const FieldDecl *field) { + const FieldDecl *field, + bool IsBaseConstantNull) { if (isEmptyFieldForLayout(CGF.getContext(), field)) - return emitAddrOfZeroSizeField(CGF, base, field); + return emitAddrOfZeroSizeField(CGF, base, field, IsBaseConstantNull); const RecordDecl *rec = field->getParent(); unsigned idx = CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); + if (IsBaseConstantNull) + return CGF.Builder.CreateConstGEP(base, idx, field->getName()); return CGF.Builder.CreateStructGEP(base, idx, field->getName()); } @@ -4957,8 +4967,8 @@ static bool hasAnyVptr(const QualType Type, const ASTContext &Context) { return false; } -LValue CodeGenFunction::EmitLValueForField(LValue base, - const FieldDecl *field) { +LValue CodeGenFunction::EmitLValueForField(LValue base, const FieldDecl *field, + bool IsBaseConstantNull) { LValueBaseInfo BaseInfo = base.getBaseInfo(); if (field->isBitField()) { @@ -5090,7 +5100,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, if (!IsInPreservedAIRegion && (!getDebugInfo() || !rec->hasAttr<BPFPreserveAccessIndexAttr>())) // For structs, we GEP to the field that the record layout suggests. - addr = emitAddrOfFieldStorage(*this, addr, field); + addr = emitAddrOfFieldStorage(*this, addr, field, IsBaseConstantNull); else // Remember the original struct field index addr = emitPreserveStructAccess(*this, base, addr, field); @@ -5134,7 +5144,8 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base, if (!FieldType->isReferenceType()) return EmitLValueForField(Base, Field); - Address V = emitAddrOfFieldStorage(*this, Base.getAddress(), Field); + Address V = emitAddrOfFieldStorage(*this, Base.getAddress(), Field, + /*IsBaseConstantNull=*/false); // Make sure that the address is pointing to the right type. llvm::Type *llvmType = ConvertTypeForMem(FieldType); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index ca00a0e8c6cf4..46e0dde6a1090 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4472,7 +4472,8 @@ class CodeGenFunction : public CodeGenTypeCache { const ObjCIvarDecl *Ivar); llvm::Value *EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar); - LValue EmitLValueForField(LValue Base, const FieldDecl *Field); + LValue EmitLValueForField(LValue Base, const FieldDecl *Field, + bool IsBaseConstantNull = false); LValue EmitLValueForLambdaField(const FieldDecl *Field); LValue EmitLValueForLambdaField(const FieldDecl *Field, llvm::Value *ThisValue); >From 586bc15ed811805e159f2d18d436ccf42cb789c5 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Fri, 14 Mar 2025 15:41:22 +0800 Subject: [PATCH 03/11] [Clang][CodeGen] Add more tests. --- clang/lib/CodeGen/CGBuilder.h | 18 ++++++++----- clang/lib/CodeGen/CGExpr.cpp | 8 +++--- ...r-and-nonzero-offset-in-offsetof-idiom.cpp | 26 +++++++++++++++++++ 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index b8036cf6e6a30..88ec55b322cf5 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -215,19 +215,25 @@ class CGBuilderTy : public CGBuilderBaseTy { /// /// This API assumes that drilling into a struct like this is always an /// inbounds and nuw operation. + /// Specifically, inbounds flag will not be set if \p IsBaseConstantNull is + /// true. using CGBuilderBaseTy::CreateStructGEP; Address CreateStructGEP(Address Addr, unsigned Index, - const llvm::Twine &Name = "") { + const llvm::Twine &Name = "", + bool IsBaseConstantNull = false) { llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType()); const llvm::DataLayout &DL = BB->getDataLayout(); const llvm::StructLayout *Layout = DL.getStructLayout(ElTy); auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); - return Address(CreateStructGEP(Addr.getElementType(), Addr.getBasePointer(), - Index, Name), - ElTy->getElementType(Index), - Addr.getAlignment().alignmentAtOffset(Offset), - Addr.isKnownNonNull()); + llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap(); + if (!IsBaseConstantNull) + NWFlags |= llvm::GEPNoWrapFlags::inBounds(); + return Address( + CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0, + Index, Name, NWFlags), + ElTy->getElementType(Index), + Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull()); } /// Given diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 0b32d03e143c5..b10dbee783541 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4792,6 +4792,9 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { } Expr *BaseExpr = E->getBase(); + // Check whether the underlying base pointer is a constant null. + // If so, we do not set inbounds flag for GEP to avoid breaking some old-style + // offsetof idioms. Expr *UnderlyingBaseExpr = BaseExpr; while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr)) UnderlyingBaseExpr = BaseMemberExpr->getBase(); @@ -4930,9 +4933,8 @@ static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base, unsigned idx = CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); - if (IsBaseConstantNull) - return CGF.Builder.CreateConstGEP(base, idx, field->getName()); - return CGF.Builder.CreateStructGEP(base, idx, field->getName()); + return CGF.Builder.CreateStructGEP(base, idx, field->getName(), + IsBaseConstantNull); } static Address emitPreserveStructAccess(CodeGenFunction &CGF, LValue base, diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp index f00a2c486574c..ac45d2e0da4fc 100644 --- a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp +++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp @@ -16,6 +16,32 @@ uintptr_t get_offset_of_y_naively() { return ((uintptr_t)(&(((S *)nullptr)->y))); } +struct Empty {}; + +struct T { + int a; + S s; + [[no_unique_address]] Empty e1; + int b; + [[no_unique_address]] Empty e2; +}; + +// CHECK-LABEL: @_Z30get_offset_of_y_naively_nestedv( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64) +// +uintptr_t get_offset_of_y_naively_nested() { + return ((uintptr_t)(&(((T *)nullptr)->s.y))); +} + +// CHECK-LABEL: @_Z26get_offset_of_zero_storagev( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr (i8, ptr null, i64 16) to i64) +// +uintptr_t get_offset_of_zero_storage() { + return ((uintptr_t)(&(((T *)nullptr)->e2))); +} + // CHECK-LABEL: @_Z27get_offset_of_y_via_builtinv( // CHECK-NEXT: entry: // CHECK-NEXT: ret i64 4 >From b11610211414fc4aa615a669bf41872ef34f85ab Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Thu, 20 Mar 2025 17:16:26 +0800 Subject: [PATCH 04/11] [Clang][CodeGen] Handle parens --- clang/lib/CodeGen/CGExpr.cpp | 4 ++-- ...catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index b10dbee783541..6b5c4e7291031 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4795,9 +4795,9 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) { // Check whether the underlying base pointer is a constant null. // If so, we do not set inbounds flag for GEP to avoid breaking some old-style // offsetof idioms. - Expr *UnderlyingBaseExpr = BaseExpr; + Expr *UnderlyingBaseExpr = BaseExpr->IgnoreParens(); while (auto *BaseMemberExpr = dyn_cast<MemberExpr>(UnderlyingBaseExpr)) - UnderlyingBaseExpr = BaseMemberExpr->getBase(); + UnderlyingBaseExpr = BaseMemberExpr->getBase()->IgnoreParens(); bool IsBaseConstantNull = getContext().isSentinelNullExpr(UnderlyingBaseExpr); // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar. LValue BaseLV; diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp index ac45d2e0da4fc..2df1df53a3448 100644 --- a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp +++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp @@ -34,6 +34,14 @@ uintptr_t get_offset_of_y_naively_nested() { return ((uintptr_t)(&(((T *)nullptr)->s.y))); } +// CHECK-LABEL: @_Z42get_offset_of_y_naively_nested_with_parensv( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64) +// +uintptr_t get_offset_of_y_naively_nested_with_parens() { + return ((uintptr_t)(&((((T *)nullptr)->s).y))); +} + // CHECK-LABEL: @_Z26get_offset_of_zero_storagev( // CHECK-NEXT: entry: // CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr (i8, ptr null, i64 16) to i64) >From 78f88f09c57aecda41a9f910ec2fb4462ad8313e Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Thu, 20 Mar 2025 17:57:59 +0800 Subject: [PATCH 05/11] [Clang][CodeGen] Use `createConstGEP2_32` --- clang/lib/CodeGen/CGBuilder.h | 42 ++++++++++--------- clang/lib/CodeGen/CGExpr.cpp | 5 ++- ...ptr-and-nonzero-offset-in-offsetof-idiom.c | 2 +- ...r-and-nonzero-offset-in-offsetof-idiom.cpp | 6 +-- 4 files changed, 29 insertions(+), 26 deletions(-) diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index 88ec55b322cf5..3a133447c62e1 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -64,21 +64,27 @@ class CGBuilderTy : public CGBuilderBaseTy { Address createConstGEP2_32(Address Addr, unsigned Idx0, unsigned Idx1, const llvm::Twine &Name) { const llvm::DataLayout &DL = BB->getDataLayout(); - llvm::GetElementPtrInst *GEP; + llvm::Value *V; if (IsInBounds) - GEP = cast<llvm::GetElementPtrInst>(CreateConstInBoundsGEP2_32( - Addr.getElementType(), emitRawPointerFromAddress(Addr), Idx0, Idx1, - Name)); + V = CreateConstInBoundsGEP2_32(Addr.getElementType(), + emitRawPointerFromAddress(Addr), Idx0, + Idx1, Name); else - GEP = cast<llvm::GetElementPtrInst>(CreateConstGEP2_32( - Addr.getElementType(), emitRawPointerFromAddress(Addr), Idx0, Idx1, - Name)); + V = CreateConstGEP2_32(Addr.getElementType(), + emitRawPointerFromAddress(Addr), Idx0, Idx1, Name); llvm::APInt Offset( DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0, /*isSigned=*/true); - if (!GEP->accumulateConstantOffset(DL, Offset)) - llvm_unreachable("offset of GEP with constants is always computable"); - return Address(GEP, GEP->getResultElementType(), + llvm::Type *ElementTy = nullptr; + if (auto *GEP = dyn_cast<llvm::GEPOperator>(V)) { + if (!GEP->accumulateConstantOffset(DL, Offset)) + llvm_unreachable("offset of GEP with constants is always computable"); + ElementTy = GEP->getResultElementType(); + } else { + ElementTy = llvm::GetElementPtrInst::getIndexedType(Addr.getElementType(), + {Idx0, Idx1}); + } + return Address(V, ElementTy, Addr.getAlignment().alignmentAtOffset( CharUnits::fromQuantity(Offset.getSExtValue())), IsInBounds ? Addr.isKnownNonNull() : NotKnownNonNull); @@ -219,21 +225,17 @@ class CGBuilderTy : public CGBuilderBaseTy { /// true. using CGBuilderBaseTy::CreateStructGEP; Address CreateStructGEP(Address Addr, unsigned Index, - const llvm::Twine &Name = "", - bool IsBaseConstantNull = false) { + const llvm::Twine &Name = "") { llvm::StructType *ElTy = cast<llvm::StructType>(Addr.getElementType()); const llvm::DataLayout &DL = BB->getDataLayout(); const llvm::StructLayout *Layout = DL.getStructLayout(ElTy); auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); - llvm::GEPNoWrapFlags NWFlags = llvm::GEPNoWrapFlags::noUnsignedWrap(); - if (!IsBaseConstantNull) - NWFlags |= llvm::GEPNoWrapFlags::inBounds(); - return Address( - CreateConstGEP2_32(Addr.getElementType(), Addr.getBasePointer(), 0, - Index, Name, NWFlags), - ElTy->getElementType(Index), - Addr.getAlignment().alignmentAtOffset(Offset), Addr.isKnownNonNull()); + return Address(CreateStructGEP(Addr.getElementType(), Addr.getBasePointer(), + Index, Name), + ElTy->getElementType(Index), + Addr.getAlignment().alignmentAtOffset(Offset), + Addr.isKnownNonNull()); } /// Given diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 6b5c4e7291031..2b12cecaf3b43 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4933,8 +4933,9 @@ static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base, unsigned idx = CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field); - return CGF.Builder.CreateStructGEP(base, idx, field->getName(), - IsBaseConstantNull); + if (IsBaseConstantNull) + return CGF.Builder.CreateConstGEP2_32(base, 0, idx, field->getName()); + return CGF.Builder.CreateStructGEP(base, idx, field->getName()); } static Address emitPreserveStructAccess(CodeGenFunction &CGF, LValue base, diff --git a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c index a7cfd77766712..46e22fbdb38ac 100644 --- a/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c +++ b/clang/test/CodeGen/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.c @@ -17,7 +17,7 @@ struct S { // CHECK-LABEL: @get_offset_of_y_naively( // CHECK-NEXT: entry: -// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) // uintptr_t get_offset_of_y_naively(void) { return ((uintptr_t)(&(((struct S *)0)->y))); diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp index 2df1df53a3448..7fa81e0986b21 100644 --- a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp +++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp @@ -10,7 +10,7 @@ struct S { // CHECK-LABEL: @_Z23get_offset_of_y_naivelyv( // CHECK-NEXT: entry: -// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) // uintptr_t get_offset_of_y_naively() { return ((uintptr_t)(&(((S *)nullptr)->y))); @@ -28,7 +28,7 @@ struct T { // CHECK-LABEL: @_Z30get_offset_of_y_naively_nestedv( // CHECK-NEXT: entry: -// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64) +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr getelementptr ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64) // uintptr_t get_offset_of_y_naively_nested() { return ((uintptr_t)(&(((T *)nullptr)->s.y))); @@ -36,7 +36,7 @@ uintptr_t get_offset_of_y_naively_nested() { // CHECK-LABEL: @_Z42get_offset_of_y_naively_nested_with_parensv( // CHECK-NEXT: entry: -// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr nuw ([[STRUCT_S:%.*]], ptr getelementptr nuw ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64) +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr getelementptr ([[STRUCT_T:%.*]], ptr null, i32 0, i32 1), i32 0, i32 1) to i64) // uintptr_t get_offset_of_y_naively_nested_with_parens() { return ((uintptr_t)(&((((T *)nullptr)->s).y))); >From a5e4bc66c00a63fb354e99c3d15d0cae8eb2b8dc Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Thu, 20 Mar 2025 18:11:06 +0800 Subject: [PATCH 06/11] [Clang][CodeGen] Add release note. --- clang/docs/ReleaseNotes.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e409f206f6eae..536b477ed02a3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -42,6 +42,11 @@ Potentially Breaking Changes C/C++ Language Potentially Breaking Changes ------------------------------------------- +- Some old-style offsetof idioms like ``((int)(&(((struct S *)0)->field)))`` are treated + as UB. To avoid breaking existing code, ``inbounds`` flags will not be set for such patterns. + However, it is still highly recommended to use the UB-free builtin ``__builtin_offsetof``. + (#GH130734) + C++ Specific Potentially Breaking Changes ----------------------------------------- >From 7807c12628b8687f2cb4db42d58157b565122688 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Thu, 20 Mar 2025 18:20:40 +0800 Subject: [PATCH 07/11] [Clang][CodeGen] Remove comment. --- clang/lib/CodeGen/CGBuilder.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index 3a133447c62e1..c5e25d9f8cae5 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -221,8 +221,6 @@ class CGBuilderTy : public CGBuilderBaseTy { /// /// This API assumes that drilling into a struct like this is always an /// inbounds and nuw operation. - /// Specifically, inbounds flag will not be set if \p IsBaseConstantNull is - /// true. using CGBuilderBaseTy::CreateStructGEP; Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name = "") { >From 668cabe2b05ac336ab7c4f14c7878e88337d4071 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Fri, 21 Mar 2025 21:54:16 +0800 Subject: [PATCH 08/11] [Clang][CodeGen] Add more tests. NFC. --- ...r-and-nonzero-offset-in-offsetof-idiom.cpp | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp index 7fa81e0986b21..6a0d6265eff96 100644 --- a/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp +++ b/clang/test/CodeGenCXX/catch-nullptr-and-nonzero-offset-in-offsetof-idiom.cpp @@ -50,6 +50,42 @@ uintptr_t get_offset_of_zero_storage() { return ((uintptr_t)(&(((T *)nullptr)->e2))); } +namespace std { typedef decltype(__nullptr) nullptr_t; } +// CHECK-LABEL: @_Z29get_offset_of_y_integral_zerov( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) +// +uintptr_t get_offset_of_y_integral_zero() { + return ((uintptr_t)(&(((S *)0)->y))); +} + +// CHECK-LABEL: @_Z37get_offset_of_y_integral_zero_voidptrv( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) +// +uintptr_t get_offset_of_y_integral_zero_voidptr() { + return ((uintptr_t)(&(((S *)(void*)0)->y))); +} + +// CHECK-LABEL: @_Z25get_offset_of_y_nullptr_tv( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) +// +uintptr_t get_offset_of_y_nullptr_t() { + return ((uintptr_t)(&(((S *)std::nullptr_t{})->y))); +} + +// CHECK-LABEL: @_Z32get_offset_of_y_nullptr_constantv( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[NULL:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr null, ptr [[NULL]], align 8 +// CHECK-NEXT: ret i64 ptrtoint (ptr getelementptr inbounds nuw ([[STRUCT_S:%.*]], ptr null, i32 0, i32 1) to i64) +// +uintptr_t get_offset_of_y_nullptr_constant() { + constexpr void *null = nullptr; + return ((uintptr_t)(&(((S *)null)->y))); +} + // CHECK-LABEL: @_Z27get_offset_of_y_via_builtinv( // CHECK-NEXT: entry: // CHECK-NEXT: ret i64 4 >From 32cff3eae16eb5c4be42e830823ef8d754fd4c61 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Sat, 22 Mar 2025 16:35:38 +0800 Subject: [PATCH 09/11] [Clang] Update release notes. NFC. --- clang/docs/ReleaseNotes.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 536b477ed02a3..90f435c9f1e9b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -44,8 +44,9 @@ C/C++ Language Potentially Breaking Changes - Some old-style offsetof idioms like ``((int)(&(((struct S *)0)->field)))`` are treated as UB. To avoid breaking existing code, ``inbounds`` flags will not be set for such patterns. - However, it is still highly recommended to use the UB-free builtin ``__builtin_offsetof``. - (#GH130734) + However, it is still highly recommended to use the UB-free macro ``offsetof`` or clang builtin + function ``__builtin_offsetof``. It is also possible to use ``-fwrapv-pointer`` or + ``-fno-delete-null-pointer-checks`` to make this behavior well-defined. (#GH130734, #GH130742) C++ Specific Potentially Breaking Changes ----------------------------------------- >From b8a45206155aabff8f52b11fb5fd570fc010fa0b Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Mon, 31 Mar 2025 16:31:15 +0800 Subject: [PATCH 10/11] [Clang][Docs] Clarify release notes. NFC. --- clang/docs/ReleaseNotes.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 90f435c9f1e9b..d67183aa82637 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -42,11 +42,12 @@ Potentially Breaking Changes C/C++ Language Potentially Breaking Changes ------------------------------------------- -- Some old-style offsetof idioms like ``((int)(&(((struct S *)0)->field)))`` are treated - as UB. To avoid breaking existing code, ``inbounds`` flags will not be set for such patterns. - However, it is still highly recommended to use the UB-free macro ``offsetof`` or clang builtin - function ``__builtin_offsetof``. It is also possible to use ``-fwrapv-pointer`` or - ``-fno-delete-null-pointer-checks`` to make this behavior well-defined. (#GH130734, #GH130742) +- New LLVM optimizations have been implemented that optimize pointer arithmetic on + null pointers more aggressively. As part of this, clang has implemented a special + case for old-style offsetof idioms like ``((int)(&(((struct S *)0)->field)))``, to + ensure they are not caught by these optimizations. It is also possible to use + ``-fwrapv-pointer`` or ``-fno-delete-null-pointer-checks`` to make pointer arithmetic + on null pointers well-defined. (#GH130734, #GH130742) C++ Specific Potentially Breaking Changes ----------------------------------------- >From ad576ca9cf47df080e6211d09ab96c7fa21ec746 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Mon, 31 Mar 2025 17:03:52 +0800 Subject: [PATCH 11/11] [Clang] Do not use the GEP result to infer offset --- clang/lib/CodeGen/CGBuilder.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index c5e25d9f8cae5..090f75d3b5d3c 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -75,15 +75,13 @@ class CGBuilderTy : public CGBuilderBaseTy { llvm::APInt Offset( DL.getIndexSizeInBits(Addr.getType()->getPointerAddressSpace()), 0, /*isSigned=*/true); - llvm::Type *ElementTy = nullptr; - if (auto *GEP = dyn_cast<llvm::GEPOperator>(V)) { - if (!GEP->accumulateConstantOffset(DL, Offset)) - llvm_unreachable("offset of GEP with constants is always computable"); - ElementTy = GEP->getResultElementType(); - } else { - ElementTy = llvm::GetElementPtrInst::getIndexedType(Addr.getElementType(), - {Idx0, Idx1}); - } + if (!llvm::GEPOperator::accumulateConstantOffset( + Addr.getElementType(), {getInt32(Idx0), getInt32(Idx1)}, DL, + Offset)) + llvm_unreachable( + "accumulateConstantOffset with constant indices should not fail."); + llvm::Type *ElementTy = llvm::GetElementPtrInst::getIndexedType( + Addr.getElementType(), {Idx0, Idx1}); return Address(V, ElementTy, Addr.getAlignment().alignmentAtOffset( CharUnits::fromQuantity(Offset.getSExtValue())), _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits