[PATCH] D133338: [clang][PowerPC] PPC64 VAArg use coerced integer type for direct aggregate fits in register

Ting Wang via Phabricator via cfe-commits Mon, 05 Sep 2022 23:45:18 -0700

tingwang created this revision.
tingwang added reviewers: uweigand, wschmidt, PowerPC.
tingwang added a project: clang.
Herald added subscribers: shchenz, kbarton, nemanjai.
Herald added a project: All.
tingwang requested review of this revision.
Herald added a subscriber: cfe-commits.


This is an attempt to fix issue: 
https://github.com/llvm/llvm-project/issues/55900

PPC64_SVR4_ABI handles those by-value aggregate fits in one register using 
coerced integer type.
https://github.com/llvm/llvm-project/blob/51d33afcbe0a81bb8508d5685f38dc9fdb2b60c9/clang/lib/CodeGen/TargetInfo.cpp#L5351

Regarding the issue, the aggregate is passed using i8 as parameter. On 
big-endian, after register content stored to memory, the char locates at 7th 
byte. However current `PPC64_SVR4_ABIInfo::EmitVAArg()` generates argument 
access using the original type, so there is type mismatch between caller and 
callee.

This patch tries to teach `PPC64_SVR4_ABIInfo::EmitVAArg()` regarding the type 
coerce. I'm not sure if this should be fixed in clang or backend, but I guess 
in the clang more likely, since there is logic taking care of argument smaller 
than a slot:
https://github.com/llvm/llvm-project/blob/51d33afcbe0a81bb8508d5685f38dc9fdb2b60c9/clang/lib/CodeGen/TargetInfo.cpp#L356

Please help me review and let me know if any comments. Thank you!


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D133338

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/PowerPC/ppc64-align-struct.c


Index: clang/test/CodeGen/PowerPC/ppc64-align-struct.c
===================================================================
--- clang/test/CodeGen/PowerPC/ppc64-align-struct.c
+++ clang/test/CodeGen/PowerPC/ppc64-align-struct.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -target-feature +altivec -triple 
powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -no-opaque-pointers -target-feature +altivec -triple 
powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s 
--check-prefix=CHECK-LE
 
 #include <stdarg.h>
 
@@ -9,6 +10,7 @@
 struct test5 { int x[17]; };
 struct test6 { int x[17]; } __attribute__((aligned (16)));
 struct test7 { int x[17]; } __attribute__((aligned (32)));
+struct test8 { char x; };
 
 // CHECK: define{{.*}} void @test1(i32 noundef signext %x, i64 %y.coerce)
 void test1 (int x, struct test1 y)
@@ -132,20 +134,17 @@
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test8*
+// CHECK: [[SRC:%.*]] = getelementptr inbounds i8, i8* %[[CUR]], i64 7
 // CHECK: [[DEST:%.*]] = bitcast %struct.test8* %[[AGG_RESULT]] to i8*
-// CHECK: [[SRC:%.*]] = bitcast %struct.test8* [[T0]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 
8 [[SRC]], i64 1, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 
1 [[SRC]], i64 1, i1 false)
 
 // CHECK-LE: define{{.*}} i8 @test8va(i32 noundef signext %x, ...)
 // CHECK-LE: [[RETVAL:%.*]] = alloca %struct.test8
 // CHECK-LE: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK-LE: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK-LE: store i8* %[[NEXT]], i8** %ap
-// CHECK-LE: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test8*
 // CHECK-LE: [[DEST:%.*]] = bitcast %struct.test8* [[RETVAL]] to i8*
-// CHECK-LE: [[SRC:%.*]] = bitcast %struct.test8* [[T0]] to i8*
-// CHECK-LE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* 
align 8 [[SRC]], i64 1, i1 false)
+// CHECK-LE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* 
align 8 %[[CUR]], i64 1, i1 false)
 // CHECK-LE: [[COERCE:%.*]] = getelementptr inbounds %struct.test8, 
%struct.test8* [[RETVAL]], i32 0, i32 0
 // CHECK-LE: [[RET:%.*]] = load i8, i8* [[COERCE]], align 1
 // CHECK-LE: ret i8 [[RET]]
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -5451,6 +5451,22 @@
       return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy);
   }
 
+  // An aggregate may end up coerced to integer type in single register. When
+  // DirectSize is less than SlotSize on big-endian, need to use coerced type 
so
+  // that the argument will be right-adjusted in its slot.
+  ABIArgInfo AI = classifyArgumentType(Ty);
+  if (AI.isDirect() && AI.getCoerceToType()) {
+    llvm::Type *CoerceTy = AI.getCoerceToType();
+    if (CoerceTy->isIntegerTy() &&
+        llvm::alignTo(CoerceTy->getIntegerBitWidth(), 8) < GPRBits)
+      return emitVoidPtrDirectVAArg(
+          CGF, VAListAddr, CoerceTy,
+          CharUnits::fromQuantity(
+              llvm::alignTo(CoerceTy->getIntegerBitWidth(), 8) / 8),
+          CharUnits::fromQuantity(AI.getDirectAlign()), SlotSize,
+          /*AllowHigher*/ false);
+  }
+
   // Otherwise, just use the general rule.
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
                           TypeInfo, SlotSize, /*AllowHigher*/ true);

Index: clang/test/CodeGen/PowerPC/ppc64-align-struct.c
===================================================================
--- clang/test/CodeGen/PowerPC/ppc64-align-struct.c
+++ clang/test/CodeGen/PowerPC/ppc64-align-struct.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -no-opaque-pointers -target-feature +altivec -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -no-opaque-pointers -target-feature +altivec -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-LE
 
 #include <stdarg.h>
 
@@ -9,6 +10,7 @@
 struct test5 { int x[17]; };
 struct test6 { int x[17]; } __attribute__((aligned (16)));
 struct test7 { int x[17]; } __attribute__((aligned (32)));
+struct test8 { char x; };
 
 // CHECK: define{{.*}} void @test1(i32 noundef signext %x, i64 %y.coerce)
 void test1 (int x, struct test1 y)
@@ -132,20 +134,17 @@
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test8*
+// CHECK: [[SRC:%.*]] = getelementptr inbounds i8, i8* %[[CUR]], i64 7
 // CHECK: [[DEST:%.*]] = bitcast %struct.test8* %[[AGG_RESULT]] to i8*
-// CHECK: [[SRC:%.*]] = bitcast %struct.test8* [[T0]] to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 8 [[SRC]], i64 1, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 1 [[SRC]], i64 1, i1 false)
 
 // CHECK-LE: define{{.*}} i8 @test8va(i32 noundef signext %x, ...)
 // CHECK-LE: [[RETVAL:%.*]] = alloca %struct.test8
 // CHECK-LE: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK-LE: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK-LE: store i8* %[[NEXT]], i8** %ap
-// CHECK-LE: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test8*
 // CHECK-LE: [[DEST:%.*]] = bitcast %struct.test8* [[RETVAL]] to i8*
-// CHECK-LE: [[SRC:%.*]] = bitcast %struct.test8* [[T0]] to i8*
-// CHECK-LE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 8 [[SRC]], i64 1, i1 false)
+// CHECK-LE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DEST]], i8* align 8 %[[CUR]], i64 1, i1 false)
 // CHECK-LE: [[COERCE:%.*]] = getelementptr inbounds %struct.test8, %struct.test8* [[RETVAL]], i32 0, i32 0
 // CHECK-LE: [[RET:%.*]] = load i8, i8* [[COERCE]], align 1
 // CHECK-LE: ret i8 [[RET]]
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -5451,6 +5451,22 @@
       return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy);
   }
 
+  // An aggregate may end up coerced to integer type in single register. When
+  // DirectSize is less than SlotSize on big-endian, need to use coerced type so
+  // that the argument will be right-adjusted in its slot.
+  ABIArgInfo AI = classifyArgumentType(Ty);
+  if (AI.isDirect() && AI.getCoerceToType()) {
+    llvm::Type *CoerceTy = AI.getCoerceToType();
+    if (CoerceTy->isIntegerTy() &&
+        llvm::alignTo(CoerceTy->getIntegerBitWidth(), 8) < GPRBits)
+      return emitVoidPtrDirectVAArg(
+          CGF, VAListAddr, CoerceTy,
+          CharUnits::fromQuantity(
+              llvm::alignTo(CoerceTy->getIntegerBitWidth(), 8) / 8),
+          CharUnits::fromQuantity(AI.getDirectAlign()), SlotSize,
+          /*AllowHigher*/ false);
+  }
+
   // Otherwise, just use the general rule.
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
                           TypeInfo, SlotSize, /*AllowHigher*/ true);

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D133338: [clang][PowerPC] PPC64 VAArg use coerced integer type for direct aggregate fits in register

Reply via email to