rnk created this revision. rnk added reviewers: majnemer, hans. rnk added a subscriber: cfe-commits.
Before this change, we would pass all non-HFA record arguments on Windows with byval. Byval often blocks optimizations and results in bad code generation. Windows now uses the existing workaround that other x86_32 platforms use. I also expanded the workaround handle C++ records with constructors on Windows. On non-Windows platforms, we probably have to keep generating the same LLVM IR prototypes if we want our bitcode to be ABI compatible. Otherwise we will encounter mismatch issues like PR21573. http://reviews.llvm.org/D19756 Files: lib/CodeGen/TargetInfo.cpp test/CodeGen/vectorcall.c test/CodeGen/windows-struct-abi.c test/CodeGen/x86_32-arguments-win32.c test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
Index: test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp =================================================================== --- test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp +++ test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp @@ -22,6 +22,16 @@ int x; }; +struct Multibyte { + char a, b, c, d; +}; + +struct Packed { + short a; + int b; + short c; +}; + struct SmallWithDtor { SmallWithDtor(); ~SmallWithDtor(); @@ -102,19 +112,30 @@ void small_arg(Small s) {} // LINUX-LABEL: define void @_Z9small_arg5Small(i32 %s.0) -// WIN32: define void @"\01?small_arg@@YAXUSmall@@@Z"(%struct.Small* byval align 4 %s) +// WIN32: define void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.0) // WIN64: define void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.coerce) void medium_arg(Medium s) {} // LINUX-LABEL: define void @_Z10medium_arg6Medium(i32 %s.0, i32 %s.1) -// WIN32: define void @"\01?medium_arg@@YAXUMedium@@@Z"(%struct.Medium* byval align 4 %s) +// WIN32: define void @"\01?medium_arg@@YAXUMedium@@@Z"(i32 %s.0, i32 %s.1) // WIN64: define void @"\01?medium_arg@@YAXUMedium@@@Z"(i64 %s.coerce) void small_arg_with_ctor(SmallWithCtor s) {} // LINUX-LABEL: define void @_Z19small_arg_with_ctor13SmallWithCtor(%struct.SmallWithCtor* byval align 4 %s) -// WIN32: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(%struct.SmallWithCtor* byval align 4 %s) +// WIN32: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.0) // WIN64: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.coerce) +// FIXME: We could coerce to a series of i32s here if we wanted to. +void multibyte_arg(Multibyte s) {} +// LINUX-LABEL: define void @_Z13multibyte_arg9Multibyte(%struct.Multibyte* byval align 4 %s) +// WIN32: define void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(%struct.Multibyte* byval align 4 %s) +// WIN64: define void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(i32 %s.coerce) + +void packed_arg(Packed s) {} +// LINUX-LABEL: define void @_Z10packed_arg6Packed(%struct.Packed* byval align 4 %s) +// WIN32: define void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* byval align 4 %s) +// WIN64: define void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* %s) + // Test that dtors are invoked in the callee. void small_arg_with_dtor(SmallWithDtor s) {} // WIN32: define void @"\01?small_arg_with_dtor@@YAXUSmallWithDtor@@@Z"(<{ %struct.SmallWithDtor }>* inalloca) {{.*}} { @@ -230,12 +251,12 @@ void thiscall_method_arg(Small s) {} // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE5Small(%class.Class* %this, i32 %s.0) - // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmall@@@Z"(%class.Class* %this, %struct.Small* byval align 4 %s) + // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmall@@@Z"(%class.Class* %this, i32 %s.0) // WIN64: define linkonce_odr void @"\01?thiscall_method_arg@Class@@QEAAXUSmall@@@Z"(%class.Class* %this, i32 %s.coerce) void thiscall_method_arg(SmallWithCtor s) {} // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE13SmallWithCtor(%class.Class* %this, %struct.SmallWithCtor* byval align 4 %s) - // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmallWithCtor@@@Z"(%class.Class* %this, %struct.SmallWithCtor* byval align 4 %s) + // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.0) // WIN64: define linkonce_odr void @"\01?thiscall_method_arg@Class@@QEAAXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.coerce) void thiscall_method_arg(Big s) {} Index: test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp =================================================================== --- test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp +++ test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp @@ -2,10 +2,10 @@ // PR15768 -// A trivial 12 byte struct is returned indirectly. +// A trivial 20 byte struct is returned indirectly and taken as byval. struct S { S(); - int a, b, c; + int a, b, c, d, e; }; struct C { Index: test/CodeGen/x86_32-arguments-win32.c =================================================================== --- test/CodeGen/x86_32-arguments-win32.c +++ test/CodeGen/x86_32-arguments-win32.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -w -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s // CHECK-LABEL: define i64 @f1_1() -// CHECK-LABEL: define void @f1_2(%struct.s1* byval align 4 %a0) +// CHECK-LABEL: define void @f1_2(i32 %a0.0, i32 %a0.1) struct s1 { int a; int b; @@ -31,15 +31,15 @@ struct s4 f4_1(void) { while (1) {} } // CHECK-LABEL: define i64 @f5_1() -// CHECK-LABEL: define void @f5_2(%struct.s5* byval align 4) +// CHECK-LABEL: define void @f5_2(double %a0.0) struct s5 { double a; }; struct s5 f5_1(void) { while (1) {} } void f5_2(struct s5 a0) {} // CHECK-LABEL: define i32 @f6_1() -// CHECK-LABEL: define void @f6_2(%struct.s6* byval align 4 %a0) +// CHECK-LABEL: define void @f6_2(float %a0.0) struct s6 { float a; }; Index: test/CodeGen/windows-struct-abi.c =================================================================== --- test/CodeGen/windows-struct-abi.c +++ test/CodeGen/windows-struct-abi.c @@ -10,7 +10,7 @@ void receive_f1(struct f1 a0) { } -// CHECK: define void @receive_f1(%struct.f1* byval align 4 %a0) +// CHECK: define void @receive_f1(float %a0.0) struct f2 { float f; @@ -23,7 +23,7 @@ void receive_f2(struct f2 a0) { } -// CHECK: define void @receive_f2(%struct.f2* byval align 4 %a0) +// CHECK: define void @receive_f2(float %a0.0, float %a0.1) struct f4 { float f; @@ -38,5 +38,5 @@ void receive_f4(struct f4 a0) { } -// CHECK: define void @receive_f4(%struct.f4* byval align 4 %a0) +// CHECK: define void @receive_f4(float %a0.0, float %a0.1, float %a0.2, float %a0.3) Index: test/CodeGen/vectorcall.c =================================================================== --- test/CodeGen/vectorcall.c +++ test/CodeGen/vectorcall.c @@ -9,9 +9,9 @@ // CHECK: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b) // X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b) -struct Small { int a; }; +struct Small { int x; }; void __vectorcall v3(int a, struct Small b, int c) {} -// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, %struct.Small* byval align 4 %b, i32 inreg %c) +// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c) // X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c) struct Large { int a[5]; }; Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -503,72 +503,6 @@ return Found; } -static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { - // Treat complex types as the element type. - if (const ComplexType *CTy = Ty->getAs<ComplexType>()) - Ty = CTy->getElementType(); - - // Check for a type which we know has a simple scalar argument-passing - // convention without any padding. (We're specifically looking for 32 - // and 64-bit integer and integer-equivalents, float, and double.) - if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && - !Ty->isEnumeralType() && !Ty->isBlockPointerType()) - return false; - - uint64_t Size = Context.getTypeSize(Ty); - return Size == 32 || Size == 64; -} - -/// canExpandIndirectArgument - Test whether an argument type which is to be -/// passed indirectly (on the stack) would have the equivalent layout if it was -/// expanded into separate arguments. If so, we prefer to do the latter to avoid -/// inhibiting optimizations. -/// -// FIXME: This predicate is missing many cases, currently it just follows -// llvm-gcc (checks that all fields are 32-bit or 64-bit primitive types). We -// should probably make this smarter, or better yet make the LLVM backend -// capable of handling it. -static bool canExpandIndirectArgument(QualType Ty, ASTContext &Context) { - // We can only expand structure types. - const RecordType *RT = Ty->getAs<RecordType>(); - if (!RT) - return false; - - // We can only expand (C) structures. - // - // FIXME: This needs to be generalized to handle classes as well. - const RecordDecl *RD = RT->getDecl(); - if (!RD->isStruct()) - return false; - - // We try to expand CLike CXXRecordDecl. - if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { - if (!CXXRD->isCLike()) - return false; - } - - uint64_t Size = 0; - - for (const auto *FD : RD->fields()) { - if (!is32Or64BitBasicType(FD->getType(), Context)) - return false; - - // FIXME: Reject bit-fields wholesale; there are two problems, we don't know - // how to expand them yet, and the predicate for telling if a bitfield still - // counts as "basic" is more complicated than what we were doing previously. - if (FD->isBitField()) - return false; - - Size += Context.getTypeSize(FD->getType()); - } - - // Make sure there are not any holes in the struct. - if (Size != Context.getTypeSize(Ty)) - return false; - - return true; -} - namespace { Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, const ABIArgInfo &AI) { @@ -959,6 +893,8 @@ bool &NeedsPadding) const; bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; + bool canExpandIndirectArgument(QualType Ty) const; + /// \brief Rewrite the function info so that all memory arguments use /// inalloca. void rewriteWithInAlloca(CGFunctionInfo &FI) const; @@ -1179,6 +1115,69 @@ return true; } +static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { + // Treat complex types as the element type. + if (const ComplexType *CTy = Ty->getAs<ComplexType>()) + Ty = CTy->getElementType(); + + // Check for a type which we know has a simple scalar argument-passing + // convention without any padding. (We're specifically looking for 32 + // and 64-bit integer and integer-equivalents, float, and double.) + if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() && + !Ty->isEnumeralType() && !Ty->isBlockPointerType()) + return false; + + uint64_t Size = Context.getTypeSize(Ty); + return Size == 32 || Size == 64; +} + +/// canExpandIndirectArgument - Test whether an argument type which is to be +/// passed indirectly (on the stack) would have the equivalent layout if it was +/// expanded into separate arguments. If so, we prefer to do the latter to avoid +/// inhibiting optimizations. +bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { + // We can only expand structure types. + const RecordType *RT = Ty->getAs<RecordType>(); + if (!RT) + return false; + const RecordDecl *RD = RT->getDecl(); + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { + if (!IsWin32StructABI ) { + // On non-Windows, we have to conservatively match our old bitcode + // prototypes in order to be ABI-compatible at the bitcode level. + if (!CXXRD->isCLike()) + return false; + } else { + // Don't do this for dynamic classes. + if (CXXRD->isDynamicClass()) + return false; + // Don't do this if there are any non-empty bases. + for (const CXXBaseSpecifier &Base : CXXRD->bases()) { + if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true)) + return false; + } + } + } + + uint64_t Size = 0; + + for (const auto *FD : RD->fields()) { + if (!is32Or64BitBasicType(FD->getType(), getContext())) + return false; + + // FIXME: Reject bit-fields wholesale; there are two problems, we don't know + // how to expand them yet, and the predicate for telling if a bitfield still + // counts as "basic" is more complicated than what we were doing previously. + if (FD->isBitField()) + return false; + + Size += getContext().getTypeSize(FD->getType()); + } + + // We can do this if there was no alignment padding. + return Size == getContext().getTypeSize(Ty); +} + ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { // If the return value is indirect, then the hidden argument is consuming one // integer register. @@ -1395,6 +1394,12 @@ bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, bool &NeedsPadding) const { + // On Windows, aggregates other than HFAs are never passed in registers, and + // they do not consume register slots. HFAs have already been dealt with at + // this point. + if (IsWin32StructABI && isAggregateTypeForABI(Ty)) + return false; + NeedsPadding = false; InReg = !IsMCUABI; @@ -1468,23 +1473,19 @@ } if (isAggregateTypeForABI(Ty)) { - if (RT) { - // Structs are always byval on win32, regardless of what they contain. - if (IsWin32StructABI) - return getIndirectResult(Ty, true, State); - - // Structures with flexible arrays are always indirect. - if (RT->getDecl()->hasFlexibleArrayMember()) - return getIndirectResult(Ty, true, State); - } + // Structures with flexible arrays are always indirect. + // FIXME: This should not be byval! + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectResult(Ty, true, State); - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) + // Ignore empty structs/unions on non-Windows. + if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); llvm::LLVMContext &LLVMContext = getVMContext(); llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); - bool NeedsPadding, InReg; + bool NeedsPadding = false; + bool InReg; if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32); @@ -1502,9 +1503,8 @@ // optimizations. // Don't do this for the MCU if there are still free integer registers // (see X86_64 ABI for full explanation). - if (getContext().getTypeSize(Ty) <= 4*32 && - canExpandIndirectArgument(Ty, getContext()) && - (!IsMCUABI || State.FreeRegs == 0)) + if (getContext().getTypeSize(Ty) <= 4 * 32 && + (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall, @@ -1624,11 +1624,14 @@ return false; case ABIArgInfo::Direct: case ABIArgInfo::Extend: - case ABIArgInfo::Expand: - case ABIArgInfo::CoerceAndExpand: if (Info.getInReg()) return false; return true; + case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: + // These are aggregate types which are never passed in registers when + // inalloca is involved. + return true; } llvm_unreachable("invalid enum"); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits