tingwang created this revision. tingwang added reviewers: uweigand, wschmidt, PowerPC. tingwang added a project: clang. Herald added subscribers: shchenz, kbarton, nemanjai. Herald added a project: All. tingwang requested review of this revision. Herald added a subscriber: cfe-commits.
This is an attempt to fix issue: https://github.com/llvm/llvm-project/issues/55900 PPC64_SVR4_ABI handles those by-value aggregate fits in one register using coerced integer type. https://github.com/llvm/llvm-project/blob/51d33afcbe0a81bb8508d5685f38dc9fdb2b60c9/clang/lib/CodeGen/TargetInfo.cpp#L5351 Regarding the issue, the aggregate is passed using i8 as parameter. On big-endian, after register content stored to memory, the char locates at 7th byte. However current `PPC64_SVR4_ABIInfo::EmitVAArg()` generates argument access using the original type, so there is type mismatch between caller and callee. This patch tries to teach `PPC64_SVR4_ABIInfo::EmitVAArg()` regarding the type coerce. I'm not sure if this should be fixed in clang or backend, but I guess in the clang more likely, since there is logic taking care of argument smaller than a slot: https://github.com/llvm/llvm-project/blob/51d33afcbe0a81bb8508d5685f38dc9fdb2b60c9/clang/lib/CodeGen/TargetInfo.cpp#L356 Please help me review and let me know if any comments. Thank you! Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D133338 Files: clang/lib/CodeGen/TargetInfo.cpp clang/test/CodeGen/PowerPC/ppc64-align-struct.c Index: clang/test/CodeGen/PowerPC/ppc64-align-struct.c =================================================================== --- clang/test/CodeGen/PowerPC/ppc64-align-struct.c +++ clang/test/CodeGen/PowerPC/ppc64-align-struct.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -target-feature +altivec -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -no-opaque-pointers -target-feature +altivec -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-LE #include <stdarg.h> @@ -9,6 +10,7 @@ struct test5 { int x[17]; }; struct test6 { int x[17]; } __attribute__((aligned (16))); struct test7 { int x[17]; } __attribute__((aligned (32))); +struct test8 { char x; }; // CHECK: define{{.*}} void @test1(i32 noundef signext %x, i64 %y.coerce) void test1 (int x, struct test1 y) @@ -132,20 +134,17 @@ // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8 // CHECK: store i8* %[[NEXT]], i8** %ap -// CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test8* +// CHECK: [[SRC:%.*]] = getelementptr inbounds i8, i8* %[[CUR]], i64 7 // CHECK: [[DEST:%.*]] = bitcast %struct.test8* %[[AGG_RESULT]] to i8* -// CHECK: [[SRC:%.*]] = bitcast %struct.test8* [[T0]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 8 [[SRC]], i64 1, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i64 1, i1 false) // CHECK-LE: define{{.*}} i8 @test8va(i32 noundef signext %x, ...) // CHECK-LE: [[RETVAL:%.*]] = alloca %struct.test8 // CHECK-LE: %[[CUR:[^ ]+]] = load i8*, i8** %ap // CHECK-LE: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8 // CHECK-LE: store i8* %[[NEXT]], i8** %ap -// CHECK-LE: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test8* // CHECK-LE: [[DEST:%.*]] = bitcast %struct.test8* [[RETVAL]] to i8* -// CHECK-LE: [[SRC:%.*]] = bitcast %struct.test8* [[T0]] to i8* -// CHECK-LE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 8 [[SRC]], i64 1, i1 false) +// CHECK-LE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 8 %[[CUR]], i64 1, i1 false) // CHECK-LE: [[COERCE:%.*]] = getelementptr inbounds %struct.test8, %struct.test8* [[RETVAL]], i32 0, i32 0 // CHECK-LE: [[RET:%.*]] = load i8, i8* [[COERCE]], align 1 // CHECK-LE: ret i8 [[RET]] Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -5451,6 +5451,22 @@ return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy); } + // An aggregate may end up coerced to integer type in single register. When + // DirectSize is less than SlotSize on big-endian, need to use coerced type so + // that the argument will be right-adjusted in its slot. + ABIArgInfo AI = classifyArgumentType(Ty); + if (AI.isDirect() && AI.getCoerceToType()) { + llvm::Type *CoerceTy = AI.getCoerceToType(); + if (CoerceTy->isIntegerTy() && + llvm::alignTo(CoerceTy->getIntegerBitWidth(), 8) < GPRBits) + return emitVoidPtrDirectVAArg( + CGF, VAListAddr, CoerceTy, + CharUnits::fromQuantity( + llvm::alignTo(CoerceTy->getIntegerBitWidth(), 8) / 8), + CharUnits::fromQuantity(AI.getDirectAlign()), SlotSize, + /*AllowHigher*/ false); + } + // Otherwise, just use the general rule. return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, SlotSize, /*AllowHigher*/ true);
Index: clang/test/CodeGen/PowerPC/ppc64-align-struct.c =================================================================== --- clang/test/CodeGen/PowerPC/ppc64-align-struct.c +++ clang/test/CodeGen/PowerPC/ppc64-align-struct.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -no-opaque-pointers -target-feature +altivec -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -no-opaque-pointers -target-feature +altivec -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-LE #include <stdarg.h> @@ -9,6 +10,7 @@ struct test5 { int x[17]; }; struct test6 { int x[17]; } __attribute__((aligned (16))); struct test7 { int x[17]; } __attribute__((aligned (32))); +struct test8 { char x; }; // CHECK: define{{.*}} void @test1(i32 noundef signext %x, i64 %y.coerce) void test1 (int x, struct test1 y) @@ -132,20 +134,17 @@ // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8 // CHECK: store i8* %[[NEXT]], i8** %ap -// CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test8* +// CHECK: [[SRC:%.*]] = getelementptr inbounds i8, i8* %[[CUR]], i64 7 // CHECK: [[DEST:%.*]] = bitcast %struct.test8* %[[AGG_RESULT]] to i8* -// CHECK: [[SRC:%.*]] = bitcast %struct.test8* [[T0]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 8 [[SRC]], i64 1, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i64 1, i1 false) // CHECK-LE: define{{.*}} i8 @test8va(i32 noundef signext %x, ...) // CHECK-LE: [[RETVAL:%.*]] = alloca %struct.test8 // CHECK-LE: %[[CUR:[^ ]+]] = load i8*, i8** %ap // CHECK-LE: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8 // CHECK-LE: store i8* %[[NEXT]], i8** %ap -// CHECK-LE: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test8* // CHECK-LE: [[DEST:%.*]] = bitcast %struct.test8* [[RETVAL]] to i8* -// CHECK-LE: [[SRC:%.*]] = bitcast %struct.test8* [[T0]] to i8* -// CHECK-LE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 8 [[SRC]], i64 1, i1 false) +// CHECK-LE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 8 %[[CUR]], i64 1, i1 false) // CHECK-LE: [[COERCE:%.*]] = getelementptr inbounds %struct.test8, %struct.test8* [[RETVAL]], i32 0, i32 0 // CHECK-LE: [[RET:%.*]] = load i8, i8* [[COERCE]], align 1 // CHECK-LE: ret i8 [[RET]] Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -5451,6 +5451,22 @@ return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy); } + // An aggregate may end up coerced to integer type in single register. When + // DirectSize is less than SlotSize on big-endian, need to use coerced type so + // that the argument will be right-adjusted in its slot. + ABIArgInfo AI = classifyArgumentType(Ty); + if (AI.isDirect() && AI.getCoerceToType()) { + llvm::Type *CoerceTy = AI.getCoerceToType(); + if (CoerceTy->isIntegerTy() && + llvm::alignTo(CoerceTy->getIntegerBitWidth(), 8) < GPRBits) + return emitVoidPtrDirectVAArg( + CGF, VAListAddr, CoerceTy, + CharUnits::fromQuantity( + llvm::alignTo(CoerceTy->getIntegerBitWidth(), 8) / 8), + CharUnits::fromQuantity(AI.getDirectAlign()), SlotSize, + /*AllowHigher*/ false); + } + // Otherwise, just use the general rule. return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, SlotSize, /*AllowHigher*/ true);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits