https://github.com/CoTinker updated https://github.com/llvm/llvm-project/pull/101639
>From 328ae6052894eb45c4e7541dadf63a3a04540cda Mon Sep 17 00:00:00 2001 From: Longsheng Mou <moulongsh...@huawei.com> Date: Fri, 2 Aug 2024 16:36:31 +0800 Subject: [PATCH 1/2] [X86_64] Fix empty field error in vaarg of C++. Such struct types: ``` struct { struct{} a; long long b; }; stuct { struct{} a; double b; }; ``` For such structures, Lo is NoClass and Hi is Integer/SSE. And when this structure argument is passed, the high part is passed at offset 8 in memory. So we should do special handling for these types in EmitVAArg. --- clang/lib/CodeGen/Targets/X86.cpp | 38 +++-- clang/test/CodeGenCXX/x86_64-vaarg.cpp | 211 ++++++++++++++++++++++++- 2 files changed, 231 insertions(+), 18 deletions(-) diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 26ff4e4ac0a3b5..0347bd01f82ef3 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -3124,26 +3124,40 @@ RValue X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1)); RegAddr = Tmp.withElementType(LTy); - } else if (neededInt) { - RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset), - LTy, CharUnits::fromQuantity(8)); - + } else if (neededInt || neededSSE == 1) { // Copy to a temporary if necessary to ensure the appropriate alignment. auto TInfo = getContext().getTypeInfoInChars(Ty); uint64_t TySize = TInfo.Width.getQuantity(); CharUnits TyAlign = TInfo.Align; - // Copy into a temporary if the type is more aligned than the - // register save area. - if (TyAlign.getQuantity() > 8) { + llvm::Value *GpOrFpOffset = neededInt ? gp_offset : fp_offset; + uint64_t Alignment = neededInt ? 8 : 16; + if (auto Offset = AI.getDirectOffset()) { Address Tmp = CGF.CreateMemTemp(Ty); - CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); - RegAddr = Tmp; + llvm::Type *TyHi = AI.getCoerceToType(); + llvm::Value *Addr = + CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset); + llvm::Value *Src = CGF.Builder.CreateAlignedLoad(TyHi, Addr, TyAlign); + llvm::Value *PtrOffset = llvm::ConstantInt::get(CGF.Int32Ty, Offset); + Address Dst = Address( + CGF.Builder.CreateGEP(CGF.Int8Ty, Tmp.getBasePointer(), PtrOffset), + LTy, TyAlign); + CGF.Builder.CreateStore(Src, Dst); + RegAddr = Tmp.withElementType(LTy); + } else { + RegAddr = + Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset), + LTy, CharUnits::fromQuantity(Alignment)); + + // Copy into a temporary if the type is more aligned than the + // register save area. + if (neededInt && TyAlign.getQuantity() > 8) { + Address Tmp = CGF.CreateMemTemp(Ty); + CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false); + RegAddr = Tmp; + } } - } else if (neededSSE == 1) { - RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset), - LTy, CharUnits::fromQuantity(16)); } else { assert(neededSSE == 2 && "Invalid number of needed registers!"); // SSE registers are spaced 16 bytes apart in the register save diff --git a/clang/test/CodeGenCXX/x86_64-vaarg.cpp b/clang/test/CodeGenCXX/x86_64-vaarg.cpp index c4616a97e20555..1703b74ad10a2f 100644 --- a/clang/test/CodeGenCXX/x86_64-vaarg.cpp +++ b/clang/test/CodeGenCXX/x86_64-vaarg.cpp @@ -29,6 +29,7 @@ typedef struct { // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 // CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S1]], align 8 // CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 // CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) @@ -41,8 +42,11 @@ typedef struct { // CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 // CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 // CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]] -// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[FP_OFFSET]], 16 -// CHECK-NEXT: store i32 [[TMP2]], ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8 +// CHECK-NEXT: store double [[TMP2]], ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[FP_OFFSET]], 16 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[FP_OFFSET_P]], align 4 // CHECK-NEXT: br label [[VAARG_END:%.*]] // CHECK: vaarg.in_mem: // CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 @@ -51,14 +55,209 @@ typedef struct { // CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 // CHECK-NEXT: br label [[VAARG_END]] // CHECK: vaarg.end: -// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP1]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] +// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 16, i1 false) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8 -// CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 -// CHECK-NEXT: ret double [[TMP4]] +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8 +// CHECK-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK-NEXT: ret double [[TMP6]] // s1 f(int z, ...) { __builtin_va_list list; __builtin_va_start(list, z); return __builtin_va_arg(list, s1); } + +typedef struct { + struct{} a[5]; + float b; + float c; +} s2; + +// CHECK-LABEL: @_Z2f2iz( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S2:%.*]], align 4 +// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S2]], align 4 +// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1 +// CHECK-NEXT: [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT: [[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 160 +// CHECK-NEXT: br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] +// CHECK: vaarg.in_reg: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 +// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]] +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8 +// CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[TMP3]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[FP_OFFSET]], 16 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT: br label [[VAARG_END:%.*]] +// CHECK: vaarg.in_mem: +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16 +// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: br label [[VAARG_END]] +// CHECK: vaarg.end: +// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL]], ptr align 4 [[VAARG_ADDR]], i64 16, i1 false) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8 +// CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, ptr [[TMP5]], align 4 +// CHECK-NEXT: ret <2 x float> [[TMP6]] +// +s2 f2(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, s2); +} + +typedef struct { + struct{} a; + long long b; +} s3; + +// CHECK-LABEL: @_Z2f3iz( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S3:%.*]], align 8 +// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S3]], align 8 +// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: [[GP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0 +// CHECK-NEXT: [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40 +// CHECK-NEXT: br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] +// CHECK: vaarg.in_reg: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 +// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[GP_OFFSET]] +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8 +// CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: br label [[VAARG_END:%.*]] +// CHECK: vaarg.in_mem: +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16 +// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: br label [[VAARG_END]] +// CHECK: vaarg.end: +// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 16, i1 false) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK-NEXT: ret i64 [[TMP6]] +// +s3 f3(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, s3); +} + +typedef struct { + struct{} a[7]; + short b; + int c; +} s4; + +// CHECK-LABEL: @_Z2f4iz( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S4:%.*]], align 4 +// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S4]], align 4 +// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: [[GP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0 +// CHECK-NEXT: [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40 +// CHECK-NEXT: br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] +// CHECK: vaarg.in_reg: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 +// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[GP_OFFSET]] +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8 +// CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: br label [[VAARG_END:%.*]] +// CHECK: vaarg.in_mem: +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16 +// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: br label [[VAARG_END]] +// CHECK: vaarg.end: +// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL]], ptr align 4 [[VAARG_ADDR]], i64 16, i1 false) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4 +// CHECK-NEXT: ret i64 [[TMP6]] +// +s4 f4(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, s4); +} + +typedef struct { + struct{} a[5]; + float b; + int c; +} s5; + +// CHECK-LABEL: @_Z2f5iz( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S5:%.*]], align 4 +// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S5]], align 4 +// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: [[GP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0 +// CHECK-NEXT: [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40 +// CHECK-NEXT: br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] +// CHECK: vaarg.in_reg: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 +// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[GP_OFFSET]] +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 8 +// CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: br label [[VAARG_END:%.*]] +// CHECK: vaarg.in_mem: +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16 +// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: br label [[VAARG_END]] +// CHECK: vaarg.end: +// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL]], ptr align 4 [[VAARG_ADDR]], i64 16, i1 false) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[RETVAL]], i64 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4 +// CHECK-NEXT: ret i64 [[TMP6]] +// +s5 f5(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, s5); +} >From f43535f9fbf0cb9f54d1db81ed7fa5e41450a180 Mon Sep 17 00:00:00 2001 From: Longsheng Mou <moulongsh...@huawei.com> Date: Sat, 10 Aug 2024 10:47:07 +0800 Subject: [PATCH 2/2] [X86_64] Fix edge cases Fix edge cases that may reading past the end of the register save area. Such as: ``` struct { long long a; struct{} b; }; ``` and ``` struct { long long a; int b; }; ``` --- clang/lib/CodeGen/Targets/X86.cpp | 39 ++++++- clang/test/CodeGenCXX/x86_64-vaarg.cpp | 143 +++++++++++++++++++++++++ 2 files changed, 178 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 0347bd01f82ef3..7df6619467d7ab 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -3129,16 +3129,47 @@ RValue X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, auto TInfo = getContext().getTypeInfoInChars(Ty); uint64_t TySize = TInfo.Width.getQuantity(); CharUnits TyAlign = TInfo.Align; + llvm::Type *CoTy = AI.getCoerceToType(); + uint64_t RegSize = 0; + // Obtain the actual register storage size. + if (CoTy->isStructTy()) { + auto ST = cast<llvm::StructType>(CoTy); + for (unsigned i = 0; i < ST->getNumElements(); ++i) { + auto elementType = ST->getElementType(i); + RegSize += CGF.CGM.getDataLayout().getTypeAllocSize(elementType); + } + } else { + RegSize = CGF.CGM.getDataLayout().getTypeAllocSize(CoTy); + } llvm::Value *GpOrFpOffset = neededInt ? gp_offset : fp_offset; uint64_t Alignment = neededInt ? 8 : 16; - if (auto Offset = AI.getDirectOffset()) { + // There are two cases require special handling: + // 1) + // ``` + // struct { + // struct {} a[8]; + // int b; + // }; + // ``` + // The lower 8 bytes of the structure are not stored, + // so an 8-byte offset is needed when accessing the structure. + // 2) + // ``` + // struct { + // long long a; + // int b; + // }; + // ``` + // The stored size of this structure is smaller than its actual size, + // which may lead to reading past the end of the register save area. + if (AI.isDirect() && RegSize < TySize) { Address Tmp = CGF.CreateMemTemp(Ty); - llvm::Type *TyHi = AI.getCoerceToType(); llvm::Value *Addr = CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, GpOrFpOffset); - llvm::Value *Src = CGF.Builder.CreateAlignedLoad(TyHi, Addr, TyAlign); - llvm::Value *PtrOffset = llvm::ConstantInt::get(CGF.Int32Ty, Offset); + llvm::Value *Src = CGF.Builder.CreateAlignedLoad(CoTy, Addr, TyAlign); + llvm::Value *PtrOffset = + llvm::ConstantInt::get(CGF.Int32Ty, AI.getDirectOffset()); Address Dst = Address( CGF.Builder.CreateGEP(CGF.Int8Ty, Tmp.getBasePointer(), PtrOffset), LTy, TyAlign); diff --git a/clang/test/CodeGenCXX/x86_64-vaarg.cpp b/clang/test/CodeGenCXX/x86_64-vaarg.cpp index 1703b74ad10a2f..80ed12362a6871 100644 --- a/clang/test/CodeGenCXX/x86_64-vaarg.cpp +++ b/clang/test/CodeGenCXX/x86_64-vaarg.cpp @@ -261,3 +261,146 @@ s5 f5(int z, ...) { __builtin_va_start(list, z); return __builtin_va_arg(list, s5); } + +typedef struct { + double a; + struct{} b; +} s6; + +// CHECK-LABEL: @_Z2f6iz( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S6:%.*]], align 8 +// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S6]], align 8 +// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: [[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1 +// CHECK-NEXT: [[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT: [[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 160 +// CHECK-NEXT: br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] +// CHECK: vaarg.in_reg: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 +// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]] +// CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 0 +// CHECK-NEXT: store double [[TMP2]], ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[FP_OFFSET]], 16 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT: br label [[VAARG_END:%.*]] +// CHECK: vaarg.in_mem: +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16 +// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: br label [[VAARG_END]] +// CHECK: vaarg.end: +// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 16, i1 false) +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_S6]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[COERCE_DIVE]], align 8 +// CHECK-NEXT: ret double [[TMP5]] +// +s6 f6(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, s6); +} + +typedef struct { + long long a; + struct{} b; +} s7; + +// CHECK-LABEL: @_Z2f7iz( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S7:%.*]], align 8 +// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S7]], align 8 +// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: [[GP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0 +// CHECK-NEXT: [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 40 +// CHECK-NEXT: br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] +// CHECK: vaarg.in_reg: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 +// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[GP_OFFSET]] +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 0 +// CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 8 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: br label [[VAARG_END:%.*]] +// CHECK: vaarg.in_mem: +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16 +// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: br label [[VAARG_END]] +// CHECK: vaarg.end: +// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 16, i1 false) +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_S7]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[COERCE_DIVE]], align 8 +// CHECK-NEXT: ret i64 [[TMP5]] +// +s7 f7(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, s7); +} + +typedef struct { + long long a; + int b; +} s8; + +// CHECK-LABEL: @_Z2f8iz( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_S8:%.*]], align 8 +// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_S8]], align 8 +// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[ARRAYDECAY]]) +// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT: [[GP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 0 +// CHECK-NEXT: [[GP_OFFSET:%.*]] = load i32, ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: [[FITS_IN_GP:%.*]] = icmp ule i32 [[GP_OFFSET]], 32 +// CHECK-NEXT: br i1 [[FITS_IN_GP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] +// CHECK: vaarg.in_reg: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 +// CHECK-NEXT: [[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[GP_OFFSET]] +// CHECK-NEXT: [[TMP2:%.*]] = load { i64, i32 }, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP]], i32 0 +// CHECK-NEXT: store { i64, i32 } [[TMP2]], ptr [[TMP3]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[GP_OFFSET]], 16 +// CHECK-NEXT: store i32 [[TMP4]], ptr [[GP_OFFSET_P]], align 16 +// CHECK-NEXT: br label [[VAARG_END:%.*]] +// CHECK: vaarg.in_mem: +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: [[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 16 +// CHECK-NEXT: store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT: br label [[VAARG_END]] +// CHECK: vaarg.end: +// CHECK-NEXT: [[VAARG_ADDR:%.*]] = phi ptr [ [[TMP]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[VAARG_IN_MEM]] ] +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[VAARG_ADDR]], i64 16, i1 false) +// CHECK-NEXT: [[TMP5:%.*]] = load { i64, i32 }, ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret { i64, i32 } [[TMP5]] +// +s8 f8(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, s8); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits