https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/79502
>From e9151018b7a086b167db394caadb74e20dc27711 Mon Sep 17 00:00:00 2001 From: serge-sans-paille <sguel...@mozilla.com> Date: Thu, 25 Jan 2024 22:12:55 +0100 Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a partially initialized array Fix #79500 --- clang/lib/CodeGen/CGDecl.cpp | 60 ++++++++++++++++++- .../test/CodeGenCXX/trivial-auto-var-init.cpp | 6 +- .../test/CodeGenOpenCL/partial_initializer.cl | 3 +- 3 files changed, 64 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index bbe14ef4c17244f..9e7f2f15c6e0982 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -905,6 +905,47 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, EmitStoreOfScalar(value, lvalue, /* isInitialization */ true); } +__attribute__((optnone)) static size_t +CountLeadingNonNullBytes(const llvm::DataLayout &DL, llvm::Constant *Init) { + // Zero and Undef never requires any extra stores. + if (Init->isNullValue() || isa<llvm::ConstantAggregateZero>(Init) || + isa<llvm::ConstantPointerNull>(Init) || isa<llvm::UndefValue>(Init)) + return 0u; + if (isa<llvm::ConstantInt>(Init) || isa<llvm::ConstantFP>(Init) || + isa<llvm::ConstantVector>(Init) || isa<llvm::BlockAddress>(Init) || + isa<llvm::ConstantExpr>(Init)) + return DL.getTypeAllocSize(Init->getType()); + + // See if we can emit each element. + if (isa<llvm::ConstantArray>(Init) || isa<llvm::ConstantStruct>(Init)) { + size_t LeadingNonNullBytes = 0; + for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) { + llvm::Constant *Elt = cast<llvm::Constant>(Init->getOperand(i)); + size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt); + if (!ExtraBytes) + return LeadingNonNullBytes; + LeadingNonNullBytes += ExtraBytes; + } + return LeadingNonNullBytes; + } + + if (llvm::ConstantDataSequential *CDS = + dyn_cast<llvm::ConstantDataSequential>(Init)) { + size_t LeadingNonNullBytes = 0; + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + llvm::Constant *Elt = CDS->getElementAsConstant(i); + size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt); + if (!ExtraBytes) + return LeadingNonNullBytes; + LeadingNonNullBytes += ExtraBytes; + } + return LeadingNonNullBytes; + } + + // Anything else is hard and scary. + return 0; +} + /// Decide whether we can emit the non-zero parts of the specified initializer /// with equal or fewer than NumStores scalar stores. static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init, @@ -1209,8 +1250,23 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, // If the initializer is all or mostly the same, codegen with bzero / memset // then do a few stores afterward. if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { - auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0), - SizeVal, isVolatile); + size_t LeadingNonNullBytes = + CountLeadingNonNullBytes(CGM.getDataLayout(), constant); + // llvm::errs() << LeadingNonNullBytes << " out of " << ConstantSize << + // "\n"; + + llvm::Constant *Z8 = llvm::ConstantInt::get(CGM.Int8Ty, 0); + Address AdjustedLoc = + LeadingNonNullBytes ? Builder.CreateConstInBoundsByteGEP( + Loc.withElementType(CGM.Int8Ty), + CharUnits::fromQuantity(LeadingNonNullBytes)) + : Loc; + auto *I = Builder.CreateMemSet( + AdjustedLoc, Z8, + llvm::ConstantInt::get(CGM.IntPtrTy, + ConstantSize - LeadingNonNullBytes), + isVolatile); + if (IsAutoInit) I->addAnnotationMetadata("auto-init"); diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp index eed9868cad07f84..b0deb8149ed936f 100644 --- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp +++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp @@ -288,14 +288,16 @@ void test_huge_uninit() { // UNINIT-LABEL: test_huge_small_init( // ZERO-LABEL: test_huge_small_init( -// ZERO: call void @llvm.memset{{.*}}, i8 0, i64 65536, +// ZERO: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4 +// ZERO: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532, // ZERO-NOT: !annotation // ZERO: store i8 97, // ZERO: store i8 98, // ZERO: store i8 99, // ZERO: store i8 100, // PATTERN-LABEL: test_huge_small_init( -// PATTERN: call void @llvm.memset{{.*}}, i8 0, i64 65536, +// PATTERN: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4 +// PATTERN: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532, // PATTERN-NOT: !annotation // PATTERN: store i8 97, // PATTERN: store i8 98, diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl b/clang/test/CodeGenOpenCL/partial_initializer.cl index 5cc4e2b246003a1..7c01c750d1afef2 100644 --- a/clang/test/CodeGenOpenCL/partial_initializer.cl +++ b/clang/test/CodeGenOpenCL/partial_initializer.cl @@ -35,7 +35,8 @@ void f(void) { // CHECK: %[[compoundliteral1:.*]] = alloca <2 x i32>, align 8 // CHECK: %[[V2:.*]] = alloca <4 x i32>, align 16 - // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %A, i8 0, i32 144, i1 false) + // CHECK: %[[v0:.*]] = getelementptr inbounds i8, ptr %A, i32 8 + // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %[[v0]], i8 0, i32 136, i1 false) // CHECK: %[[v2:.*]] = getelementptr inbounds [6 x [6 x float]], ptr %A, i32 0, i32 0 // CHECK: %[[v3:.*]] = getelementptr inbounds [6 x float], ptr %[[v2]], i32 0, i32 0 // CHECK: store float 1.000000e+00, ptr %[[v3]], align 4 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits