https://github.com/CoTinker updated 
https://github.com/llvm/llvm-project/pull/101639

>From 328ae6052894eb45c4e7541dadf63a3a04540cda Mon Sep 17 00:00:00 2001
From: Longsheng Mou <moulongsh...@huawei.com>
Date: Fri, 2 Aug 2024 16:36:31 +0800
Subject: [PATCH 1/2] [X86_64] Fix empty field error in vaarg of C++.

Such struct types:
```
struct {
  struct{} a;
  long long b;
};

stuct {
  struct{} a;
  double b;
};
```
For such structures, Lo is NoClass and Hi is Integer/SSE. And when
this structure argument is passed, the high part is passed at
offset 8 in memory. So we should do special handling for these types
in EmitVAArg.
---
 clang/lib/CodeGen/Targets/X86.cpp      |  38 +++--
 clang/test/CodeGenCXX/x86_64-vaarg.cpp | 211 ++++++++++++++++++++++++-
 2 files changed, 231 insertions(+), 18 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/X86.cpp 
b/clang/lib/CodeGen/Targets/X86.cpp
index 26ff4e4ac0a3b5..0347bd01f82ef3 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -3124,26 +3124,40 @@ RValue X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, 
Address VAListAddr,
     CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));
 
     RegAddr = Tmp.withElementType(LTy);
-  } else if (neededInt) {
-    RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, 
gp_offset),
-                      LTy, CharUnits::fromQuantity(8));
-
+  } else if (neededInt || neededSSE == 1) {
     // Copy to a temporary if necessary to ensure the appropriate alignment.
     auto TInfo = getContext().getTypeInfoInChars(Ty);
     uint64_t TySize = TInfo.Width.getQuantity();
     CharUnits TyAlign = TInfo.Align;
 
-    // Copy into a temporary if the type is more aligned than the
-    // register save area.
-    if (TyAlign.getQuantity() > 8) {
+    llvm::Value *GpOrFpOffset = neededInt ? gp_offset : fp_offset;
+    uint64_t Alignment = neededInt ? 8 : 16;
+    if (auto Offset = AI.getDirectOffset()) {
       Address Tmp = CGF.CreateMemTemp(Ty);
-      CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false);
-      RegAddr = Tmp;
+      llvm::Type *TyHi = AI.getCoerceToType();
+      llvm::Value *Addr =
+          CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset);
+      llvm::Value *Src = CGF.Builder.CreateAlignedLoad(TyHi, Addr, TyAlign);
+      llvm::Value *PtrOffset = llvm::ConstantInt::get(CGF.Int32Ty, Offset);
+      Address Dst = Address(
+          CGF.Builder.CreateGEP(CGF.Int8Ty, Tmp.getBasePointer(), PtrOffset),
+          LTy, TyAlign);
+      CGF.Builder.CreateStore(Src, Dst);
+      RegAddr = Tmp.withElementType(LTy);
+    } else {
+      RegAddr =
+          Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset),
+                  LTy, CharUnits::fromQuantity(Alignment));
+
+      // Copy into a temporary if the type is more aligned than the
+      // register save area.
+      if (neededInt && TyAlign.getQuantity() > 8) {
+        Address Tmp = CGF.CreateMemTemp(Ty);
+        CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false);
+        RegAddr = Tmp;
+      }
     }
 
-  } else if (neededSSE == 1) {
-    RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, 
fp_offset),
-                      LTy, CharUnits::fromQuantity(16));
   } else {
     assert(neededSSE == 2 && "Invalid number of needed registers!");
     // SSE registers are spaced 16 bytes apart in the register save
diff --git a/clang/test/CodeGenCXX/x86_64-vaarg.cpp 
b/clang/test/CodeGenCXX/x86_64-vaarg.cpp
index c4616a97e20555..1703b74ad10a2f 100644
--- a/clang/test/CodeGenCXX/x86_64-vaarg.cpp
+++ b/clang/test/CodeGenCXX/x86_64-vaarg.cpp
@@ -29,6 +29,7 @@ typedef struct {
 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
 // CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S1]], align 8
 // CHECK-NEXT:    store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
 // CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
 // CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
@@ -41,8 +42,11 @@ typedef struct {
 // CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
 // CHECK-NEXT:    [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 
[[FP_OFFSET]]
-// CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[FP_OFFSET]], 16
-// CHECK-NEXT:    store i32 [[TMP2]], ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8
+// CHECK-NEXT:    store double [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[FP_OFFSET]], 16
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[FP_OFFSET_P]], align 4
 // CHECK-NEXT:    br label [[VAARG_END:%.*]]
 // CHECK:       vaarg.in_mem:
 // CHECK-NEXT:    [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
@@ -51,14 +55,209 @@ typedef struct {
 // CHECK-NEXT:    store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
 // CHECK-NEXT:    br label [[VAARG_END]]
 // CHECK:       vaarg.end:
-// CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP1]], [[VAARG_IN_REG]] ], 
[ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], 
[ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
 // CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr 
align 8 [[VAARG_ADDR]], i64 16, i1 false)
-// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], 
i64 8
-// CHECK-NEXT:    [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8
-// CHECK-NEXT:    ret double [[TMP4]]
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], 
i64 8
+// CHECK-NEXT:    [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8
+// CHECK-NEXT:    ret double [[TMP6]]
 //
 s1 f(int z, ...) {
   __builtin_va_list list;
   __builtin_va_start(list, z);
   return __builtin_va_arg(list, s1);
 }
+
+typedef struct {
+  struct{} a[5];
+  float b;
+  float c;
+} s2;
+
+// CHECK-LABEL: @_Z2f2iz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_S2:%.*]], align 4
+// CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S2]], align 4
+// CHECK-NEXT:    store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[FP_OFFSET_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1
+// CHECK-NEXT:    [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT:    [[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 160
+// CHECK-NEXT:    br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label 
[[VAARG_IN_MEM:%.*]]
+// CHECK:       vaarg.in_reg:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT:    [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 
[[FP_OFFSET]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8
+// CHECK-NEXT:    store <2 x float> [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[FP_OFFSET]], 16
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT:    br label [[VAARG_END:%.*]]
+// CHECK:       vaarg.in_mem:
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr 
[[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT:    store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    br label [[VAARG_END]]
+// CHECK:       vaarg.end:
+// CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], 
[ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL]], ptr 
align 4 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], 
i64 8
+// CHECK-NEXT:    [[TMP6:%.*]] = load <2 x float>, ptr [[TMP5]], align 4
+// CHECK-NEXT:    ret <2 x float> [[TMP6]]
+//
+s2 f2(int z, ...) {
+  __builtin_va_list list;
+  __builtin_va_start(list, z);
+  return __builtin_va_arg(list, s2);
+}
+
+typedef struct {
+  struct{} a;
+  long long b;
+} s3;
+
+// CHECK-LABEL: @_Z2f3iz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_S3:%.*]], align 8
+// CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S3]], align 8
+// CHECK-NEXT:    store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[GP_OFFSET_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0
+// CHECK-NEXT:    [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT:    [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40
+// CHECK-NEXT:    br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label 
[[VAARG_IN_MEM:%.*]]
+// CHECK:       vaarg.in_reg:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT:    [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 
[[GP_OFFSET]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8
+// CHECK-NEXT:    store i64 [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT:    br label [[VAARG_END:%.*]]
+// CHECK:       vaarg.in_mem:
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr 
[[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT:    store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    br label [[VAARG_END]]
+// CHECK:       vaarg.end:
+// CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], 
[ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr 
align 8 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], 
i64 8
+// CHECK-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// CHECK-NEXT:    ret i64 [[TMP6]]
+//
+s3 f3(int z, ...) {
+  __builtin_va_list list;
+  __builtin_va_start(list, z);
+  return __builtin_va_arg(list, s3);
+}
+
+typedef struct {
+  struct{} a[7];
+  short b;
+  int c;
+} s4;
+
+// CHECK-LABEL: @_Z2f4iz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_S4:%.*]], align 4
+// CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S4]], align 4
+// CHECK-NEXT:    store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[GP_OFFSET_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0
+// CHECK-NEXT:    [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT:    [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40
+// CHECK-NEXT:    br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label 
[[VAARG_IN_MEM:%.*]]
+// CHECK:       vaarg.in_reg:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT:    [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 
[[GP_OFFSET]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8
+// CHECK-NEXT:    store i64 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT:    br label [[VAARG_END:%.*]]
+// CHECK:       vaarg.in_mem:
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr 
[[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT:    store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    br label [[VAARG_END]]
+// CHECK:       vaarg.end:
+// CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], 
[ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL]], ptr 
align 4 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], 
i64 8
+// CHECK-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4
+// CHECK-NEXT:    ret i64 [[TMP6]]
+//
+s4 f4(int z, ...) {
+  __builtin_va_list list;
+  __builtin_va_start(list, z);
+  return __builtin_va_arg(list, s4);
+}
+
+typedef struct {
+  struct{} a[5];
+  float b;
+  int c;
+} s5;
+
+// CHECK-LABEL: @_Z2f5iz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_S5:%.*]], align 4
+// CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S5]], align 4
+// CHECK-NEXT:    store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[GP_OFFSET_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0
+// CHECK-NEXT:    [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT:    [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40
+// CHECK-NEXT:    br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label 
[[VAARG_IN_MEM:%.*]]
+// CHECK:       vaarg.in_reg:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT:    [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 
[[GP_OFFSET]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8
+// CHECK-NEXT:    store i64 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT:    br label [[VAARG_END:%.*]]
+// CHECK:       vaarg.in_mem:
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr 
[[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT:    store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    br label [[VAARG_END]]
+// CHECK:       vaarg.end:
+// CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], 
[ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL]], ptr 
align 4 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], 
i64 8
+// CHECK-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4
+// CHECK-NEXT:    ret i64 [[TMP6]]
+//
+s5 f5(int z, ...) {
+  __builtin_va_list list;
+  __builtin_va_start(list, z);
+  return __builtin_va_arg(list, s5);
+}

>From f43535f9fbf0cb9f54d1db81ed7fa5e41450a180 Mon Sep 17 00:00:00 2001
From: Longsheng Mou <moulongsh...@huawei.com>
Date: Sat, 10 Aug 2024 10:47:07 +0800
Subject: [PATCH 2/2] [X86_64] Fix edge cases

Fix edge cases that may reading past the end of the register save area.
Such as:
```
struct {
  long long a;
  struct{} b;
};
```
and
```
struct {
  long long a;
  int b;
};
```
---
 clang/lib/CodeGen/Targets/X86.cpp      |  39 ++++++-
 clang/test/CodeGenCXX/x86_64-vaarg.cpp | 143 +++++++++++++++++++++++++
 2 files changed, 178 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/X86.cpp 
b/clang/lib/CodeGen/Targets/X86.cpp
index 0347bd01f82ef3..7df6619467d7ab 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -3129,16 +3129,47 @@ RValue X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, 
Address VAListAddr,
     auto TInfo = getContext().getTypeInfoInChars(Ty);
     uint64_t TySize = TInfo.Width.getQuantity();
     CharUnits TyAlign = TInfo.Align;
+    llvm::Type *CoTy = AI.getCoerceToType();
+    uint64_t RegSize = 0;
+    // Obtain the actual register storage size.
+    if (CoTy->isStructTy()) {
+      auto ST = cast<llvm::StructType>(CoTy);
+      for (unsigned i = 0; i < ST->getNumElements(); ++i) {
+        auto elementType = ST->getElementType(i);
+        RegSize += CGF.CGM.getDataLayout().getTypeAllocSize(elementType);
+      }
+    } else {
+      RegSize = CGF.CGM.getDataLayout().getTypeAllocSize(CoTy);
+    }
 
     llvm::Value *GpOrFpOffset = neededInt ? gp_offset : fp_offset;
     uint64_t Alignment = neededInt ? 8 : 16;
-    if (auto Offset = AI.getDirectOffset()) {
+    // There are two cases require special handling:
+    // 1)
+    //    ```
+    //    struct {
+    //      struct {} a[8];
+    //      int b;
+    //    };
+    //    ```
+    //    The lower 8 bytes of the structure are not stored,
+    //    so an 8-byte offset is needed when accessing the structure.
+    // 2)
+    //   ```
+    //   struct {
+    //     long long a;
+    //     int b;
+    //   };
+    //   ```
+    //   The stored size of this structure is smaller than its actual size,
+    //   which may lead to reading past the end of the register save area.
+    if (AI.isDirect() && RegSize < TySize) {
       Address Tmp = CGF.CreateMemTemp(Ty);
-      llvm::Type *TyHi = AI.getCoerceToType();
       llvm::Value *Addr =
           CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset);
-      llvm::Value *Src = CGF.Builder.CreateAlignedLoad(TyHi, Addr, TyAlign);
-      llvm::Value *PtrOffset = llvm::ConstantInt::get(CGF.Int32Ty, Offset);
+      llvm::Value *Src = CGF.Builder.CreateAlignedLoad(CoTy, Addr, TyAlign);
+      llvm::Value *PtrOffset =
+          llvm::ConstantInt::get(CGF.Int32Ty, AI.getDirectOffset());
       Address Dst = Address(
           CGF.Builder.CreateGEP(CGF.Int8Ty, Tmp.getBasePointer(), PtrOffset),
           LTy, TyAlign);
diff --git a/clang/test/CodeGenCXX/x86_64-vaarg.cpp 
b/clang/test/CodeGenCXX/x86_64-vaarg.cpp
index 1703b74ad10a2f..80ed12362a6871 100644
--- a/clang/test/CodeGenCXX/x86_64-vaarg.cpp
+++ b/clang/test/CodeGenCXX/x86_64-vaarg.cpp
@@ -261,3 +261,146 @@ s5 f5(int z, ...) {
   __builtin_va_start(list, z);
   return __builtin_va_arg(list, s5);
 }
+
+typedef struct {
+  double a;
+  struct{} b;
+} s6;
+
+// CHECK-LABEL: @_Z2f6iz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_S6:%.*]], align 8
+// CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S6]], align 8
+// CHECK-NEXT:    store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[FP_OFFSET_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1
+// CHECK-NEXT:    [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT:    [[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 160
+// CHECK-NEXT:    br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label 
[[VAARG_IN_MEM:%.*]]
+// CHECK:       vaarg.in_reg:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT:    [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 
[[FP_OFFSET]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 0
+// CHECK-NEXT:    store double [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[FP_OFFSET]], 16
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[FP_OFFSET_P]], align 4
+// CHECK-NEXT:    br label [[VAARG_END:%.*]]
+// CHECK:       vaarg.in_mem:
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr 
[[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT:    store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    br label [[VAARG_END]]
+// CHECK:       vaarg.end:
+// CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], 
[ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr 
align 8 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_S6]], 
ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load double, ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    ret double [[TMP5]]
+//
+s6 f6(int z, ...) {
+  __builtin_va_list list;
+  __builtin_va_start(list, z);
+  return __builtin_va_arg(list, s6);
+}
+
+typedef struct {
+  long long a;
+  struct{} b;
+} s7;
+
+// CHECK-LABEL: @_Z2f7iz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_S7:%.*]], align 8
+// CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S7]], align 8
+// CHECK-NEXT:    store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[GP_OFFSET_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0
+// CHECK-NEXT:    [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT:    [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40
+// CHECK-NEXT:    br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label 
[[VAARG_IN_MEM:%.*]]
+// CHECK:       vaarg.in_reg:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT:    [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 
[[GP_OFFSET]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 0
+// CHECK-NEXT:    store i64 [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT:    br label [[VAARG_END:%.*]]
+// CHECK:       vaarg.in_mem:
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr 
[[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT:    store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    br label [[VAARG_END]]
+// CHECK:       vaarg.end:
+// CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], 
[ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr 
align 8 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT:    [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_S7]], 
ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT:    ret i64 [[TMP5]]
+//
+s7 f7(int z, ...) {
+  __builtin_va_list list;
+  __builtin_va_start(list, z);
+  return __builtin_va_arg(list, s7);
+}
+
+typedef struct {
+  long long a;
+  int b;
+} s8;
+
+// CHECK-LABEL: @_Z2f8iz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_S8:%.*]], align 8
+// CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [[STRUCT_S8]], align 8
+// CHECK-NEXT:    store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[ARRAYDECAY]])
+// CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x 
%struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0
+// CHECK-NEXT:    [[GP_OFFSET_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0
+// CHECK-NEXT:    [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT:    [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 32
+// CHECK-NEXT:    br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label 
[[VAARG_IN_MEM:%.*]]
+// CHECK:       vaarg.in_reg:
+// CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3
+// CHECK-NEXT:    [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 
[[GP_OFFSET]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load { i64, i32 }, ptr [[TMP1]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 0
+// CHECK-NEXT:    store { i64, i32 } [[TMP2]], ptr [[TMP3]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 16
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16
+// CHECK-NEXT:    br label [[VAARG_END:%.*]]
+// CHECK:       vaarg.in_mem:
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds 
[[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr 
[[OVERFLOW_ARG_AREA]], i32 16
+// CHECK-NEXT:    store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr 
[[OVERFLOW_ARG_AREA_P]], align 8
+// CHECK-NEXT:    br label [[VAARG_END]]
+// CHECK:       vaarg.end:
+// CHECK-NEXT:    [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], 
[ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ]
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr 
align 8 [[VAARG_ADDR]], i64 16, i1 false)
+// CHECK-NEXT:    [[TMP5:%.*]] = load { i64, i32 }, ptr [[RETVAL]], align 8
+// CHECK-NEXT:    ret { i64, i32 } [[TMP5]]
+//
+s8 f8(int z, ...) {
+  __builtin_va_list list;
+  __builtin_va_start(list, z);
+  return __builtin_va_arg(list, s8);
+}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to