Author: miyuki Date: Tue Jun 18 07:34:27 2019 New Revision: 363687 URL: http://llvm.org/viewvc/llvm-project?rev=363687&view=rev Log: [CodeGen][ARM] Fix FP16 vector coercion
Summary: When a function argument or return type is a homogeneous aggregate which contains an FP16 vector but the target does not support FP16 operations natively, the type must be converted into an array of integer vectors by then front end (otherwise LLVM will handle FP16 vectors incorrectly by scalarizing them and promoting FP16 to float, see https://reviews.llvm.org/D50507). Currently the logic for checking whether or not a given homogeneous aggregate contains FP16 vectors is incorrect: it only looks at the type of the first vector. This patch fixes the issue by adding a new method ARMABIInfo::containsAnyFP16Vectors and using it. The traversal logic of this method is largely the same as in ABIInfo::isHomogeneousAggregate. Reviewers: eli.friedman, olista01, ostannard Reviewed By: ostannard Subscribers: ostannard, john.brawn, javed.absar, kristof.beyls, pbarrio, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D63437 Added: cfe/trunk/test/CodeGen/arm-vfp16-arguments2.cpp Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=363687&r1=363686&r2=363687&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Jun 18 07:34:27 2019 @@ -5607,6 +5607,7 @@ private: uint64_t Members) const; ABIArgInfo coerceIllegalVector(QualType Ty) const; bool isIllegalVectorType(QualType Ty) const; + bool containsAnyFP16Vectors(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, @@ -5806,9 +5807,7 @@ ABIArgInfo ARMABIInfo::classifyHomogeneo // Base can be a floating-point or a vector. if (const VectorType *VT = Base->getAs<VectorType>()) { // FP16 vectors should be converted to integer vectors - if (!getTarget().hasLegalHalfType() && - (VT->getElementType()->isFloat16Type() || - VT->getElementType()->isHalfType())) { + if (!getTarget().hasLegalHalfType() && containsAnyFP16Vectors(Ty)) { uint64_t Size = getContext().getTypeSize(VT); llvm::Type *NewVecTy = llvm::VectorType::get( llvm::Type::getInt32Ty(getVMContext()), Size / 32); @@ -6169,6 +6168,37 @@ bool ARMABIInfo::isIllegalVectorType(Qua return false; } +/// Return true if a type contains any 16-bit floating point vectors +bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const { + if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { + uint64_t NElements = AT->getSize().getZExtValue(); + if (NElements == 0) + return false; + return containsAnyFP16Vectors(AT->getElementType()); + } else if (const RecordType *RT = Ty->getAs<RecordType>()) { + const RecordDecl *RD = RT->getDecl(); + + // If this is a C++ record, check the bases first. + if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (llvm::any_of(CXXRD->bases(), [this](const CXXBaseSpecifier &B) { + return containsAnyFP16Vectors(B.getType()); + })) + return true; + + if (llvm::any_of(RD->fields(), [this](FieldDecl *FD) { + return FD && containsAnyFP16Vectors(FD->getType()); + })) + return true; + + return false; + } else { + if (const VectorType *VT = Ty->getAs<VectorType>()) + return (VT->getElementType()->isFloat16Type() || + VT->getElementType()->isHalfType()); + return false; + } +} + bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, llvm::Type *eltTy, unsigned numElts) const { Added: cfe/trunk/test/CodeGen/arm-vfp16-arguments2.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-vfp16-arguments2.cpp?rev=363687&view=auto ============================================================================== --- cfe/trunk/test/CodeGen/arm-vfp16-arguments2.cpp (added) +++ cfe/trunk/test/CodeGen/arm-vfp16-arguments2.cpp Tue Jun 18 07:34:27 2019 @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ +// RUN: -mfloat-abi soft -target-feature +neon -emit-llvm -o - -O1 %s \ +// RUN: | FileCheck %s --check-prefix=CHECK-SOFT +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ +// RUN: -mfloat-abi hard -target-feature +neon -emit-llvm -o - -O1 %s \ +// RUN: | FileCheck %s --check-prefix=CHECK-HARD +// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs \ +// RUN: -mfloat-abi hard -target-feature +neon -target-feature +fullfp16 \ +// RUN: -emit-llvm -o - -O1 %s \ +// RUN: | FileCheck %s --check-prefix=CHECK-FULL + +typedef float float32_t; +typedef __fp16 float16_t; +typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; +typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; + +struct S1 { + float32x2_t M1; + float16x4_t M2; +}; + +struct B1 { float32x2_t M; }; +struct B2 { float16x4_t M; }; + +struct S2 : B1, B2 {}; + +struct S3 : B1 { + float16x4_t M; +}; + +struct S4 : B1 { + B2 M[1]; +}; + +// S5 does not contain any FP16 vectors +struct S5 : B1 { + B1 M[1]; +}; + +// CHECK-SOFT: define void @_Z2f12S1(%struct.S1* noalias nocapture sret %agg.result, [2 x i64] %s1.coerce) +// CHECK-HARD: define arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f12S1([2 x <2 x i32>] returned %s1.coerce) +// CHECK-FULL: define arm_aapcs_vfpcc %struct.S1 @_Z2f12S1(%struct.S1 returned %s1.coerce) +struct S1 f1(struct S1 s1) { return s1; } + +// CHECK-SOFT: define void @_Z2f22S2(%struct.S2* noalias nocapture sret %agg.result, [4 x i32] %s2.coerce) +// CHECK-HARD: define arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f22S2([2 x <2 x i32>] returned %s2.coerce) +// CHECK-FULL: define arm_aapcs_vfpcc %struct.S2 @_Z2f22S2(%struct.S2 returned %s2.coerce) +struct S2 f2(struct S2 s2) { return s2; } + +// CHECK-SOFT: define void @_Z2f32S3(%struct.S3* noalias nocapture sret %agg.result, [2 x i64] %s3.coerce) +// CHECK-HARD: define arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f32S3([2 x <2 x i32>] returned %s3.coerce) +// CHECK-FULL: define arm_aapcs_vfpcc %struct.S3 @_Z2f32S3(%struct.S3 returned %s3.coerce) +struct S3 f3(struct S3 s3) { return s3; } + +// CHECK-SOFT: define void @_Z2f42S4(%struct.S4* noalias nocapture sret %agg.result, [2 x i64] %s4.coerce) +// CHECK-HARD: define arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f42S4([2 x <2 x i32>] returned %s4.coerce) +// CHECK-FULL: define arm_aapcs_vfpcc %struct.S4 @_Z2f42S4(%struct.S4 returned %s4.coerce) +struct S4 f4(struct S4 s4) { return s4; } + +// CHECK-SOFT: define void @_Z2f52S5(%struct.S5* noalias nocapture sret %agg.result, [2 x i64] %s5.coerce) +// CHECK-HARD: define arm_aapcs_vfpcc %struct.S5 @_Z2f52S5(%struct.S5 returned %s5.coerce) +// CHECK-FULL: define arm_aapcs_vfpcc %struct.S5 @_Z2f52S5(%struct.S5 returned %s5.coerce) +struct S5 f5(struct S5 s5) { return s5; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits