https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/71677
>From d5934a4112166ce0375295b2347e7d5c43fdf5ed Mon Sep 17 00:00:00 2001 From: serge-sans-paille <sguel...@mozilla.com> Date: Wed, 8 Nov 2023 10:26:33 +0100 Subject: [PATCH 1/3] [clang] Avoid memcopy for small structure with padding under -ftrivial-auto-var-init --- clang/lib/CodeGen/CGDecl.cpp | 21 ++++++------- clang/test/CodeGenCXX/auto-var-init.cpp | 39 ++++++++++++------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index e5795d811c76de7..070a5e0a43e121b 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1244,17 +1244,18 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, // If the initializer is small, use a handful of stores. if (shouldSplitConstantStore(CGM, ConstantSize)) { if (auto *STy = dyn_cast<llvm::StructType>(Ty)) { - // FIXME: handle the case when STy != Loc.getElementType(). - if (STy == Loc.getElementType()) { - for (unsigned i = 0; i != constant->getNumOperands(); i++) { - Address EltPtr = Builder.CreateStructGEP(Loc, i); - emitStoresForConstant( - CGM, D, EltPtr, isVolatile, Builder, - cast<llvm::Constant>(Builder.CreateExtractValue(constant, i)), - IsAutoInit); - } - return; + const llvm::StructLayout *Layout = CGM.getDataLayout().getStructLayout( + cast<llvm::StructType>(constant->getType())); + for (unsigned i = 0; i != constant->getNumOperands(); i++) { + CharUnits CurOff = CharUnits::fromQuantity(Layout->getElementOffset(i)); + Address EltPtr = Builder.CreateConstInBoundsByteGEP( + Loc.withElementType(CGM.Int8Ty), CurOff); + emitStoresForConstant( + CGM, D, EltPtr, isVolatile, Builder, + cast<llvm::Constant>(Builder.CreateExtractValue(constant, i)), + IsAutoInit); } + return; } else if (auto *ATy = dyn_cast<llvm::ArrayType>(Ty)) { // FIXME: handle the case when ATy != Loc.getElementType(). if (ATy == Loc.getElementType()) { diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp index 6cb18528ebadcdf..75a137f461b27d1 100644 --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -89,22 +89,14 @@ struct padded { char c; int i; }; // PATTERN-O1-NOT: @__const.test_paddednullinit_custom.custom struct paddednullinit { char c = 0; int i = 0; }; // PATTERN-O0: @__const.test_paddedpacked_uninit.uninit = private unnamed_addr constant %struct.paddedpacked <{ i8 [[I8]], i32 [[I32]] }>, align 1 -// PATTERN: @__const.test_paddedpacked_custom.custom = private unnamed_addr constant %struct.paddedpacked <{ i8 42, i32 13371337 }>, align 1 -// ZERO: @__const.test_paddedpacked_custom.custom = private unnamed_addr constant %struct.paddedpacked <{ i8 42, i32 13371337 }>, align 1 struct paddedpacked { char c; int i; } __attribute__((packed)); // PATTERN-O0: @__const.test_paddedpackedarray_uninit.uninit = private unnamed_addr constant %struct.paddedpackedarray { [2 x %struct.paddedpacked] [%struct.paddedpacked <{ i8 [[I8]], i32 [[I32]] }>, %struct.paddedpacked <{ i8 [[I8]], i32 [[I32]] }>] }, align 1 -// PATTERN: @__const.test_paddedpackedarray_custom.custom = private unnamed_addr constant %struct.paddedpackedarray { [2 x %struct.paddedpacked] [%struct.paddedpacked <{ i8 42, i32 13371337 }>, %struct.paddedpacked <{ i8 43, i32 13371338 }>] }, align 1 -// ZERO: @__const.test_paddedpackedarray_custom.custom = private unnamed_addr constant %struct.paddedpackedarray { [2 x %struct.paddedpacked] [%struct.paddedpacked <{ i8 42, i32 13371337 }>, %struct.paddedpacked <{ i8 43, i32 13371338 }>] }, align 1 struct paddedpackedarray { struct paddedpacked p[2]; }; // PATTERN-O0: @__const.test_unpackedinpacked_uninit.uninit = private unnamed_addr constant <{ { i8, [3 x i8], i32 }, i8 }> <{ { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, i8 [[I8]] }>, align 1 struct unpackedinpacked { padded a; char b; } __attribute__((packed)); // PATTERN-O0: @__const.test_paddednested_uninit.uninit = private unnamed_addr constant { { i8, [3 x i8], i32 }, { i8, [3 x i8], i32 } } { { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] } }, align 4 -// PATTERN: @__const.test_paddednested_custom.custom = private unnamed_addr constant { { i8, [3 x i8], i32 }, { i8, [3 x i8], i32 } } { { i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, { i8, [3 x i8], i32 } { i8 43, [3 x i8] zeroinitializer, i32 13371338 } }, align 4 -// ZERO: @__const.test_paddednested_custom.custom = private unnamed_addr constant { { i8, [3 x i8], i32 }, { i8, [3 x i8], i32 } } { { i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, { i8, [3 x i8], i32 } { i8 43, [3 x i8] zeroinitializer, i32 13371338 } }, align 4 struct paddednested { struct padded p1, p2; }; // PATTERN-O0: @__const.test_paddedpackednested_uninit.uninit = private unnamed_addr constant %struct.paddedpackednested { %struct.paddedpacked <{ i8 [[I8]], i32 [[I32]] }>, %struct.paddedpacked <{ i8 [[I8]], i32 [[I32]] }> }, align 1 -// PATTERN: @__const.test_paddedpackednested_custom.custom = private unnamed_addr constant %struct.paddedpackednested { %struct.paddedpacked <{ i8 42, i32 13371337 }>, %struct.paddedpacked <{ i8 43, i32 13371338 }> }, align 1 -// ZERO: @__const.test_paddedpackednested_custom.custom = private unnamed_addr constant %struct.paddedpackednested { %struct.paddedpacked <{ i8 42, i32 13371337 }>, %struct.paddedpacked <{ i8 43, i32 13371338 }> }, align 1 struct paddedpackednested { struct paddedpacked p1, p2; }; // PATTERN-O0: @__const.test_bitfield_uninit.uninit = private unnamed_addr constant %struct.bitfield { i8 [[I8]], [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]" }, align 4 // PATTERN-O0: @__const.test_bitfield_custom.custom = private unnamed_addr constant %struct.bitfield { i8 20, [3 x i8] c"\[[IC]]\[[IC]]\[[IC]]" }, align 4 @@ -714,7 +706,8 @@ TEST_UNINIT(padded, padded); // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_padded_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_padded_uninit.uninit{{.+}}), !annotation [[AUTO_INIT]] -// PATTERN-O1: store i64 [[I64]], ptr %uninit, align 8, !annotation [[AUTO_INIT]] +// PATTERN-O1: store i64 [[I64]], ptr %uninit, align 8 +// PATTERN-O1-NOT: !annotation // ZERO-LABEL: @test_padded_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0,{{.+}}), !annotation [[AUTO_INIT]] // ZERO-O1: store i64 0, ptr %uninit, align 8, !annotation [[AUTO_INIT]] @@ -740,7 +733,8 @@ TEST_UNINIT(paddednullinit, paddednullinit); // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_paddednullinit_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_paddednullinit_uninit.uninit{{.+}}), !annotation [[AUTO_INIT]] -// PATTERN-O1: store i64 [[I64]], ptr %uninit, align 8, !annotation [[AUTO_INIT]] +// PATTERN-O1: store i64 [[I64]], ptr %uninit, align 8 +// PATTERN-O1-NOT: !annotation // ZERO-LABEL: @test_paddednullinit_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0, {{.*}}, !annotation [[AUTO_INIT]] // ZERO-O1: store i64 0, ptr %uninit, align 8 @@ -778,9 +772,8 @@ TEST_UNINIT(paddedpacked, paddedpacked); // PATTERN-LABEL: @test_paddedpacked_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_paddedpacked_uninit.uninit{{.+}}), !annotation [[AUTO_INIT]] // PATTERN-O1: store i8 [[I8]], ptr %uninit, align {{.+}}, !annotation [[AUTO_INIT]] -// PATTERN-O1: %[[I:[^ ]*]] = getelementptr inbounds {{.*}}%uninit, i64 0, i32 1 +// PATTERN-O1: %[[I:[^ ]*]] = getelementptr inbounds {{.*}}%uninit, i64 1 // PATTERN-O1: store i32 [[I32]], ptr %[[I]], align {{.+}}, !annotation [[AUTO_INIT]] - // ZERO-LABEL: @test_paddedpacked_uninit() // ZERO: call void @llvm.memset{{.*}}, i8 0,{{.+}}), !annotation [[AUTO_INIT]] @@ -1192,7 +1185,8 @@ TEST_UNINIT(atomicpadded, _Atomic(padded)); // CHECK-NEXT: call void @{{.*}}used{{.*}}%uninit) // PATTERN-LABEL: @test_atomicpadded_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_atomicpadded_uninit.uninit{{.+}}), !annotation [[AUTO_INIT]] -// PATTERN-O1: store i64 [[IPTR]], ptr %uninit, align 8, !annotation [[AUTO_INIT]] +// PATTERN-O1: store i64 [[IPTR]], ptr %uninit, align 8 +// PATTERN-O1-NOT: !annotation // ZERO-LABEL: @test_atomicpadded_uninit() // ZERO-O0: call void @llvm.memset{{.*}}, i8 0, {{.+}}), !annotation [[AUTO_INIT]] // ZERO-O1: store i64 0, ptr %uninit, align 8, !annotation [[AUTO_INIT]] @@ -1214,8 +1208,7 @@ TEST_UNINIT(complexfloat, _Complex float); // PATTERN-LABEL: @test_complexfloat_uninit() // PATTERN-O0: call void @llvm.memcpy{{.*}} @__const.test_complexfloat_uninit.uninit{{.+}}), !annotation [[AUTO_INIT]] // PATTERN-O1: store float 0xFFFFFFFFE0000000, ptr %uninit, align {{.+}}, !annotation [[AUTO_INIT]] - -// PATTERN-O1: %[[F2:[^ ]*]] = getelementptr inbounds {{.*}}%uninit, i64 0, i32 1 +// PATTERN-O1: %[[F2:[^ ]*]] = getelementptr inbounds {{.*}}%uninit, i64 4 // PATTERN-O1: store float 0xFFFFFFFFE0000000, ptr %[[F2]], align {{.+}}, !annotation [[AUTO_INIT]] // ZERO-LABEL: @test_complexfloat_uninit() @@ -1314,7 +1307,9 @@ TEST_CUSTOM(semivolatile, semivolatile, { 0x44444444, 0x44444444 }); // CHECK-O0: call void @llvm.memcpy // CHECK-NOT: !annotation // CHECK-O0: call void @{{.*}}used{{.*}}%custom) -// CHECK-O1: store i64 4919131752989213764, ptr %custom, align 8 +// CHECK-O1: store i32 1145324612, ptr %custom, align 4 +// CHECK-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 4 +// CHECK-O1-NEXT: store i32 1145324612, ptr %[[I]], align 4 // CHECK-NOT: !annotation TEST_UNINIT(semivolatileinit, semivolatileinit); @@ -1427,7 +1422,7 @@ TEST_CUSTOM(matching, matching, { .f = 0xf00f }); // CHECK-O0: call void @llvm.memcpy // CHECK-NOT: !annotation // CHECK-O0: call void @{{.*}}used{{.*}}%custom) -// CHECK-O1: store i32 1198526208, ptr {{.*}}, align 4 +// CHECK-O1: store float 6.145500e+04, ptr {{.*}}, align 4 // CHECK-NOT: !annotation TEST_UNINIT(matchingreverse, matchingreverse); @@ -1506,8 +1501,12 @@ TEST_CUSTOM(unmatchedreverse, unmatchedreverse, { .c = 42 }); // CHECK-O0: call void @llvm.memcpy // CHECK-NOT: !annotation // CHECK-O0: call void @{{.*}}used{{.*}}%custom) -// PATTERN-O1: store i32 -1431655894, ptr {{.*}}, align 4 -// ZERO-O1: store i32 42, ptr {{.*}}, align 4 +// PATTERN-O1: store i8 42, ptr {{.*}}, align 4 +// PATTERN-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 1 +// PATTERN-O1-NEXT: call void @llvm.memset.{{.*}}({{.*}}, i8 -86, i64 3, {{.*}}) +// ZERO-O1: store i8 42, ptr {{.*}}, align 4 +// ZERO-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 1 +// ZERO-O1-NEXT: call void @llvm.memset.{{.*}}({{.*}}, i8 0, i64 3, {{.*}}) TEST_UNINIT(unmatchedfp, unmatchedfp); // CHECK-LABEL: @test_unmatchedfp_uninit() @@ -1532,7 +1531,7 @@ TEST_CUSTOM(unmatchedfp, unmatchedfp, { .d = 3.1415926535897932384626433 }); // CHECK-O0: call void @llvm.memcpy // CHECK-NOT: !annotation // CHECK-O0: call void @{{.*}}used{{.*}}%custom) -// CHECK-O1: store i64 4614256656552045848, ptr %custom, align 8 +// CHECK-O1: store double 0x400921FB54442D18, ptr %custom, align 8 // CHECK-NOT: !annotation TEST_UNINIT(emptyenum, emptyenum); >From fcdfdbf5e2f3cc717bb43090afc7398ea0f5bc8a Mon Sep 17 00:00:00 2001 From: serge-sans-paille <sguel...@mozilla.com> Date: Wed, 8 Nov 2023 15:35:05 +0100 Subject: [PATCH 2/3] [clang] Avoid memcopy for small arrays with padding under -ftrivial-auto-var-init --- clang/lib/CodeGen/CGDecl.cpp | 18 ++++++++---------- clang/test/CodeGenCXX/auto-var-init.cpp | 10 +++++----- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 070a5e0a43e121b..0ac40b30727f6b2 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1257,17 +1257,15 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, } return; } else if (auto *ATy = dyn_cast<llvm::ArrayType>(Ty)) { - // FIXME: handle the case when ATy != Loc.getElementType(). - if (ATy == Loc.getElementType()) { - for (unsigned i = 0; i != ATy->getNumElements(); i++) { - Address EltPtr = Builder.CreateConstArrayGEP(Loc, i); - emitStoresForConstant( - CGM, D, EltPtr, isVolatile, Builder, - cast<llvm::Constant>(Builder.CreateExtractValue(constant, i)), - IsAutoInit); - } - return; + for (unsigned i = 0; i != ATy->getNumElements(); i++) { + Address EltPtr = Builder.CreateConstGEP( + Loc.withElementType(ATy->getElementType()), i); + emitStoresForConstant( + CGM, D, EltPtr, isVolatile, Builder, + cast<llvm::Constant>(Builder.CreateExtractValue(constant, i)), + IsAutoInit); } + return; } } diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp b/clang/test/CodeGenCXX/auto-var-init.cpp index 75a137f461b27d1..e5a9d015f22f276 100644 --- a/clang/test/CodeGenCXX/auto-var-init.cpp +++ b/clang/test/CodeGenCXX/auto-var-init.cpp @@ -134,12 +134,8 @@ struct arraytail { int i; int arr[]; }; // PATTERN-O1-NOT: @__const.test_bool4_custom.custom // ZERO-O1-NOT: @__const.test_bool4_custom.custom -// PATTERN: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x ptr] [ptr inttoptr ([[IPTRT]] 572662306 to ptr), ptr inttoptr ([[IPTRT]] 572662306 to ptr), ptr inttoptr ([[IPTRT]] 572662306 to ptr), ptr inttoptr ([[IPTRT]] 572662306 to ptr)], align -// ZERO: @__const.test_intptr4_custom.custom = private unnamed_addr constant [4 x ptr] [ptr inttoptr (i64 572662306 to ptr), ptr inttoptr (i64 572662306 to ptr), ptr inttoptr (i64 572662306 to ptr), ptr inttoptr (i64 572662306 to ptr)], align 16 // PATTERN-O0: @__const.test_tailpad4_uninit.uninit = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 [[I16]], i8 [[I8]], [1 x i8] c"\[[IC]]" }, { i16, i8, [1 x i8] } { i16 [[I16]], i8 [[I8]], [1 x i8] c"\[[IC]]" }, { i16, i8, [1 x i8] } { i16 [[I16]], i8 [[I8]], [1 x i8] c"\[[IC]]" }, { i16, i8, [1 x i8] } { i16 [[I16]], i8 [[I8]], [1 x i8] c"\[[IC]]" }], align // PATTERN-O1-NOT: @__const.test_tailpad4_uninit.uninit -// PATTERN: @__const.test_tailpad4_custom.custom = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }], align -// ZERO: @__const.test_tailpad4_custom.custom = private unnamed_addr constant [4 x { i16, i8, [1 x i8] }] [{ i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }, { i16, i8, [1 x i8] } { i16 257, i8 1, [1 x i8] zeroinitializer }], align 16 struct tailpad { short s; char c; }; // PATTERN-O0: @__const.test_atomicnotlockfree_uninit.uninit = private unnamed_addr constant %struct.notlockfree { [4 x i64] {{\[}}i64 [[I64]], i64 [[I64]], i64 [[I64]], i64 [[I64]]] }, align // PATTERN-O1-NOT: @__const.test_atomicnotlockfree_uninit.uninit @@ -1503,7 +1499,11 @@ TEST_CUSTOM(unmatchedreverse, unmatchedreverse, { .c = 42 }); // CHECK-O0: call void @{{.*}}used{{.*}}%custom) // PATTERN-O1: store i8 42, ptr {{.*}}, align 4 // PATTERN-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 1 -// PATTERN-O1-NEXT: call void @llvm.memset.{{.*}}({{.*}}, i8 -86, i64 3, {{.*}}) +// PATTERN-O1-NEXT: store i8 -86, ptr %[[I]], align {{.*}} +// PATTERN-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 2 +// PATTERN-O1-NEXT: store i8 -86, ptr %[[I]], align {{.*}} +// PATTERN-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 3 +// PATTERN-O1-NEXT: store i8 -86, ptr %[[I]], align {{.*}} // ZERO-O1: store i8 42, ptr {{.*}}, align 4 // ZERO-O1-NEXT: %[[I:[^ ]*]] = getelementptr inbounds i8, ptr %custom, i64 1 // ZERO-O1-NEXT: call void @llvm.memset.{{.*}}({{.*}}, i8 0, i64 3, {{.*}}) >From ef500310d122954c576cd4197f0a520e52962ffc Mon Sep 17 00:00:00 2001 From: serge-sans-paille <sguel...@mozilla.com> Date: Thu, 9 Nov 2023 22:07:33 +0100 Subject: [PATCH 3/3] fixup! [clang] Avoid memcopy for small arrays with padding under -ftrivial-auto-var-init --- clang/lib/CodeGen/CGDecl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 0ac40b30727f6b2..018fa7edd76b619 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1244,8 +1244,8 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, // If the initializer is small, use a handful of stores. if (shouldSplitConstantStore(CGM, ConstantSize)) { if (auto *STy = dyn_cast<llvm::StructType>(Ty)) { - const llvm::StructLayout *Layout = CGM.getDataLayout().getStructLayout( - cast<llvm::StructType>(constant->getType())); + const llvm::StructLayout *Layout = + CGM.getDataLayout().getStructLayout(STy); for (unsigned i = 0; i != constant->getNumOperands(); i++) { CharUnits CurOff = CharUnits::fromQuantity(Layout->getElementOffset(i)); Address EltPtr = Builder.CreateConstInBoundsByteGEP( _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits