zahiraam updated this revision to Diff 430115. zahiraam marked an inline comment as done. Herald added subscribers: llvm-commits, hiraditya. Herald added projects: LLVM, All.
CHANGES SINCE LAST ACTION https://reviews.llvm.org/D113107/new/ https://reviews.llvm.org/D113107 Files: clang/docs/LanguageExtensions.rst clang/docs/ReleaseNotes.rst clang/lib/Basic/Targets/X86.cpp clang/lib/CodeGen/CGExprComplex.cpp clang/lib/CodeGen/CGExprScalar.cpp clang/lib/CodeGen/CGStmt.cpp clang/lib/CodeGen/CodeGenFunction.h clang/test/CodeGen/X86/Float16-arithmetic.c clang/test/CodeGen/X86/Float16-complex.c clang/test/CodeGen/X86/avx512fp16-complex.c clang/test/Sema/Float16.c clang/test/Sema/conversion-target-dep.c clang/test/SemaCXX/Float16.cpp llvm/lib/CodeGen/TargetLoweringBase.cpp
Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -188,8 +188,8 @@ } } } else { - setLibcallName(RTLIB::FPEXT_F16_F32, "__gnu_h2f_ieee"); - setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee"); + setLibcallName(RTLIB::FPEXT_F16_F32, "__gnu_h2f_ieee"); + setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee"); } if (TT.isGNUEnvironment() || TT.isOSFuchsia() || Index: clang/test/SemaCXX/Float16.cpp =================================================================== --- clang/test/SemaCXX/Float16.cpp +++ clang/test/SemaCXX/Float16.cpp @@ -1,18 +1,10 @@ // RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -#ifdef HAVE // expected-no-diagnostics -#endif // HAVE -#ifndef HAVE -// expected-error@+2{{_Float16 is not supported on this target}} -#endif // !HAVE _Float16 f; -#ifndef HAVE -// expected-error@+2{{invalid suffix 'F16' on floating constant}} -#endif // !HAVE const auto g = 1.1F16; Index: clang/test/Sema/conversion-target-dep.c =================================================================== --- clang/test/Sema/conversion-target-dep.c +++ clang/test/Sema/conversion-target-dep.c @@ -6,7 +6,7 @@ long double ld; double d; -_Float16 f16; // x86-error {{_Float16 is not supported on this target}} +_Float16 f16; int main(void) { ld = d; // x86-warning {{implicit conversion increases floating-point precision: 'double' to 'long double'}} Index: clang/test/Sema/Float16.c =================================================================== --- clang/test/Sema/Float16.c +++ clang/test/Sema/Float16.c @@ -1,18 +1,12 @@ // RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc %s -// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s -DHAVE -// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -DHAVE +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-linux-pc -target-feature +avx512fp16 %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple spir-unknown-unknown %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple armv7a-linux-gnu %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-linux-gnu %s -#ifndef HAVE -// expected-error@+2{{_Float16 is not supported on this target}} -#endif // HAVE -_Float16 f; - -#ifdef HAVE _Complex _Float16 a; void builtin_complex(void) { _Float16 a = 0; (void)__builtin_complex(a, a); // expected-error {{'_Complex _Float16' is invalid}} } -#endif + Index: clang/test/CodeGen/X86/Float16-complex.c =================================================================== --- clang/test/CodeGen/X86/Float16-complex.c +++ clang/test/CodeGen/X86/Float16-complex.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -target-feature +avx512fp16 -o - | FileCheck %s --check-prefix=X86 +// RUN: %clang_cc1 %s -O0 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s --check-prefixes=X86,X86-FP16 _Float16 _Complex add_half_rr(_Float16 a, _Float16 b) { // X86-LABEL: @add_half_rr( @@ -119,8 +120,8 @@ } _Float16 _Complex div_half_rc(_Float16 a, _Float16 _Complex b) { // X86-LABEL: @div_half_rc( - // X86-NOT: fdiv - // X86: call {{.*}} @__divhc3( + // X86-FP16: fdiv + // X86-FP16: fdiv // X86: ret return a / b; } Index: clang/test/CodeGen/X86/Float16-arithmetic.c =================================================================== --- /dev/null +++ clang/test/CodeGen/X86/Float16-arithmetic.c @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK + +_Float16 add1(_Float16 a, _Float16 b) { + // CHECK-LABEL: define {{.*}} half @add1 + // CHECK: alloca half + // CHECK: alloca half + // CHECK: store half {{.*}}, ptr {{.*}} + // CHECK: store half {{.*}}, ptr {{.*}} + // CHECK: load half, ptr {{.*}} + // CHECK: fpext half {{.*}} to float + // CHECK: load half, ptr {{.*}} + // CHECK: fpext half {{.*}} to float + // CHECK: fadd float {{.*}}, {{.*}} + // CHECK: fptrunc float {{.*}} to half + // CHECK: ret half + return a + b; +} + +_Float16 add2(_Float16 a, _Float16 b, _Float16 c) { + // CHECK-LABEL: define dso_local half @add2 + // CHECK: alloca half + // CHECK: alloca half + // CHECK: alloca half + // CHECK: store half {{.*}}, ptr {{.*}} + // CHECK: store half {{.*}}, ptr {{.*}} + // CHECK: store half {{.*}}, ptr {{.*}} + // CHECK: load half, ptr {{.*}} + // CHECK: fpext half {{.*}} to float + // CHECK: load half, ptr {{.*}} + // CHECK: fpext half {{.*}} to float + // CHECK: fadd float {{.*}}, {{.*}} + // CHECK: load half, ptr {{.*}} + // CHECK: fpext half {{.*}} to float + // CHECK: fadd float {{.*}}, {{.*}} + // CHECK: fptrunc float {{.*}} to half + // CHECK: ret half + return a + b + c; +} + +_Float16 div(_Float16 a, _Float16 b) { + // CHECK-LABEL: define dso_local half @div + // CHECK: alloca half + // CHECK: alloca half + // CHECK: store half {{.*}}, ptr {{.*}} + // CHECK: store half {{.*}}, ptr {{.*}} + // CHECK: load half, ptr {{.*}} + // CHECK: fpext half {{.*}} to float + // CHECK: load half, ptr {{.*}} + // CHECK: fpext half {{.*}} to float + // CHECK: fdiv float {{.*}}, {{.*}} + // CHECK: fptrunc float {{.*}} to half + // CHECK: ret half + return a / b; +} + +_Float16 mul(_Float16 a, _Float16 b) { + // CHECK-LABEL: define dso_local half @mul + // CHECK: alloca half + // CHECK: alloca half + // CHECK: store half {{.*}}, ptr {{.*}} + // CHECK: store half {{.*}}, ptr {{.*}} + // CHECK: load half, ptr{{.*}} + // CHECK: fpext half {{.*}} to float + // CHECK: load half, ptr {{.*}} + // CHECK: fpext half {{.*}} to float + // CHECK: fmul float {{.*}}, {{.*}} + // CHECK: fptrunc float {{.*}} to half + // CHECK: ret half + return a * b; +} Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -4391,6 +4391,8 @@ /// make sure it survives garbage collection until this point. void EmitExtendGCLifetime(llvm::Value *object); + llvm::Value *EmitFloat16IntoLValue(const Expr *E, LValue dest); + /// EmitComplexExpr - Emit the computation of the specified expression of /// complex type, returning the result. ComplexPairTy EmitComplexExpr(const Expr *E, Index: clang/lib/CodeGen/CGStmt.cpp =================================================================== --- clang/lib/CodeGen/CGStmt.cpp +++ clang/lib/CodeGen/CGStmt.cpp @@ -1326,7 +1326,13 @@ } else { switch (getEvaluationKind(RV->getType())) { case TEK_Scalar: - Builder.CreateStore(EmitScalarExpr(RV), ReturnValue); + if (RV->getType()->isFloat16Type() && !getTarget().hasLegalHalfType() && + getTarget().getTriple().isX86()) + Builder.CreateStore(EmitFloat16IntoLValue( + RV, MakeAddrLValue(ReturnValue, RV->getType())), + ReturnValue); + else + Builder.CreateStore(EmitScalarExpr(RV), ReturnValue); break; case TEK_Complex: EmitComplexExprIntoLValue(RV, MakeAddrLValue(ReturnValue, RV->getType()), Index: clang/lib/CodeGen/CGExprScalar.cpp =================================================================== --- clang/lib/CodeGen/CGExprScalar.cpp +++ clang/lib/CodeGen/CGExprScalar.cpp @@ -348,6 +348,10 @@ llvm::Type *SrcTy, llvm::Type *DstTy, ScalarConversionOpts Opts); Value * + EmitFloat16Conversion(Value *Src, QualType SrcType, QualType DstType, + SourceLocation Loc, + ScalarConversionOpts Opts = ScalarConversionOpts()); + Value * EmitScalarConversion(Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc, ScalarConversionOpts Opts = ScalarConversionOpts()); @@ -867,6 +871,8 @@ } Value *VisitAsTypeExpr(AsTypeExpr *CE); Value *VisitAtomicExpr(AtomicExpr *AE); + + Value *ScalarExprEmitter::EmitStoreFloat16(Value *val, LValue dest); }; } // end anonymous namespace. @@ -1258,6 +1264,24 @@ return Builder.CreateFPExt(Src, DstTy, "conv"); } +Value *ScalarExprEmitter::EmitFloat16Conversion(Value *Src, QualType SrcType, + QualType DstType, + SourceLocation Loc, + ScalarConversionOpts Opts) { + llvm::Type *SrcTy = Src->getType(); + SrcType = CGF.getContext().getCanonicalType(SrcType); + DstType = CGF.getContext().getCanonicalType(DstType); + llvm::Type *DstTy = ConvertType(DstType); + Value *Res = nullptr; + DstTy = CGF.FloatTy; + if (SrcType->isFloat16Type() && !CGF.getTarget().hasLegalHalfType()) { + if (DstType->isFloat16Type()) { + Res = EmitScalarCast(Src, SrcType, DstType, SrcTy, DstTy, Opts); + } + } + return Res; +} + /// Emit a conversion from the specified type to the specified destination type, /// both of which are LLVM scalar types. Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType, @@ -1299,6 +1323,7 @@ SrcType = CGF.getContext().getCanonicalType(SrcType); DstType = CGF.getContext().getCanonicalType(DstType); + if (SrcType == DstType) return Src; if (DstType->isVoidType()) return nullptr; @@ -1313,6 +1338,14 @@ llvm::Type *DstTy = ConvertType(DstType); + if (SrcType->isFloat16Type() && !CGF.getTarget().hasLegalHalfType()) { + if (DstType->isFloat16Type()) { + Src = Builder.CreateFPExt(OrigSrc, CGF.CGM.FloatTy, "conv"); + SrcTy = OrigSrc->getType(); + DstTy = CGF.FloatTy; + } + } + // Cast from half through float if half isn't a native type. if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { // Cast to FP using the intrinsic if the half type itself isn't supported. @@ -2285,6 +2318,12 @@ case CK_LValueToRValue: assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), DestTy)); assert(E->isGLValue() && "lvalue-to-rvalue applied to r-value!"); + + if (E->getType()->isFloat16Type() && !CGF.getTarget().hasLegalHalfType() && CGF.getTarget().getTriple().isX86()) { + return EmitFloat16Conversion(Visit(E), + E->getType(), DestTy, + CE->getExprLoc()); + } return Visit(const_cast<Expr*>(E)); case CK_IntegralToPointer: { @@ -4864,6 +4903,24 @@ // Entry Point into this File //===----------------------------------------------------------------------===// +Value *ScalarExprEmitter::EmitStoreFloat16(Value *val, LValue dest) { + QualType destTy = dest.getType(); + llvm::Type *valTy = val->getType(); + llvm::Type *DstTy = ConvertType(destTy); + unsigned srcsize = valTy->getPrimitiveSizeInBits(); + unsigned dstsize = DstTy->getPrimitiveSizeInBits(); + // // Truncate a float to a half. + if (dstsize < srcsize) + return Builder.CreateFPTrunc(val, DstTy, "conv"); + return val; +} + +Value *CodeGenFunction::EmitFloat16IntoLValue(const Expr *E, LValue dest) { + ScalarExprEmitter Emitter(*this, false); + Value *Val = Emitter.Visit(const_cast<Expr *>(E)); + return Emitter.EmitStoreFloat16(Val, dest); +} + /// Emit the computation of the specified expression of scalar type, ignoring /// the result. Value *CodeGenFunction::EmitScalarExpr(const Expr *E, bool IgnoreResultAssign) { Index: clang/lib/CodeGen/CGExprComplex.cpp =================================================================== --- clang/lib/CodeGen/CGExprComplex.cpp +++ clang/lib/CodeGen/CGExprComplex.cpp @@ -590,7 +590,18 @@ llvm::Value *ResR, *ResI; if (Op.LHS.first->getType()->isFloatingPointTy()) { - ResR = Builder.CreateFAdd(Op.LHS.first, Op.RHS.first, "add.r"); + if (!CGF.getTarget().hasLegalHalfType() && + CGF.getTarget().getTriple().isX86()) { + if (Op.LHS.first->getType() != Op.RHS.first->getType()) { + if (Op.LHS.first->getType()->isHalfTy()) + ResR = Builder.CreateFAdd(Op.LHS.first, Op.LHS.first, "add.r"); + else if (Op.RHS.first->getType()->isHalfTy()) + ResR = Builder.CreateFAdd(Op.RHS.first, Op.RHS.first, "add.r"); + } else + ResR = Builder.CreateFAdd(Op.LHS.first, Op.RHS.first, "add.r"); + } else { + ResR = Builder.CreateFAdd(Op.LHS.first, Op.RHS.first, "add.r"); + } if (Op.LHS.second && Op.RHS.second) ResI = Builder.CreateFAdd(Op.LHS.second, Op.RHS.second, "add.i"); else @@ -608,7 +619,16 @@ ComplexPairTy ComplexExprEmitter::EmitBinSub(const BinOpInfo &Op) { llvm::Value *ResR, *ResI; if (Op.LHS.first->getType()->isFloatingPointTy()) { - ResR = Builder.CreateFSub(Op.LHS.first, Op.RHS.first, "sub.r"); + if (Op.LHS.first->getType() != Op.RHS.first->getType()) { + if (Op.LHS.first->getType()->isHalfTy()) + ResR = Builder.CreateFSub(Op.LHS.first, Op.LHS.first, "sub.r"); + else if (Op.RHS.first->getType()->isHalfTy()) + ResR = Builder.CreateFSub(Op.RHS.first, Op.RHS.first, "sub.r"); + else + ResR = Builder.CreateFSub(Op.LHS.first, Op.RHS.first, "sub.r"); + } else { + ResR = Builder.CreateFSub(Op.LHS.first, Op.RHS.first, "sub.r"); + } if (Op.LHS.second && Op.RHS.second) ResI = Builder.CreateFSub(Op.LHS.second, Op.RHS.second, "sub.i"); else @@ -764,14 +784,38 @@ assert((Op.LHS.second || Op.RHS.second) && "At least one operand must be complex!"); - // If either of the operands is a real rather than a complex, the - // imaginary component is ignored when computing the real component of the - // result. - ResR = Builder.CreateFMul(Op.LHS.first, Op.RHS.first, "mul.rl"); - - ResI = Op.LHS.second - ? Builder.CreateFMul(Op.LHS.second, Op.RHS.first, "mul.il") - : Builder.CreateFMul(Op.LHS.first, Op.RHS.second, "mul.ir"); + if ((Op.LHS.first->getType()->isHalfTy() || + Op.RHS.first->getType()->isHalfTy()) && + !CGF.getTarget().hasLegalHalfType() && + CGF.getTarget().getTriple().isX86()) { + if (Op.LHS.first->getType() != Op.RHS.first->getType()) + if (Op.LHS.first->getType()->isHalfTy()) + ResR = Builder.CreateFMul(Op.LHS.first, Op.LHS.first, "mul.rl"); + else if (Op.RHS.first->getType()->isHalfTy()) + ResR = Builder.CreateFMul(Op.RHS.first, Op.RHS.first, "mul.rl"); + else + ResR = Builder.CreateFMul(Op.LHS.first, Op.RHS.first, "mul.rl"); + + if (Op.LHS.first->getType() != Op.RHS.first->getType()) + if (Op.LHS.first->getType()->isHalfTy()) + ResI = Builder.CreateFMul(Op.LHS.first, Op.LHS.first, "mul.il"); + else if (Op.RHS.first->getType()->isHalfTy()) + ResI = Builder.CreateFMul(Op.RHS.first, Op.RHS.first, "mul.il"); + else + ResI = + Op.LHS.second + ? Builder.CreateFMul(Op.LHS.second, Op.RHS.first, "mul.il") + : Builder.CreateFMul(Op.LHS.first, Op.RHS.second, "mul.ir"); + } else { + // If either of the operands is a real rather than a complex, the + // imaginary component is ignored when computing the real component of the + // result. + ResR = Builder.CreateFMul(Op.LHS.first, Op.RHS.first, "mul.rl"); + + ResI = Op.LHS.second + ? Builder.CreateFMul(Op.LHS.second, Op.RHS.first, "mul.il") + : Builder.CreateFMul(Op.LHS.first, Op.RHS.second, "mul.ir"); + } } else { assert(Op.LHS.second && Op.RHS.second && "Both operands of integer complex operators must be complex!"); @@ -794,6 +838,22 @@ llvm::Value *DSTr, *DSTi; if (LHSr->getType()->isFloatingPointTy()) { + if (!CGF.getTarget().hasLegalHalfType() && + CGF.getTarget().getTriple().isX86()) { + if (LHSr->getType() != RHSr->getType()) { + if (LHSr->getType()->isHalfTy()) { + DSTr = Builder.CreateFDiv(LHSr, LHSr); + DSTi = Builder.CreateFDiv(LHSi, LHSi); + } else if (RHSr->getType()->isHalfTy()) { + DSTr = Builder.CreateFDiv(RHSr, RHSr); + DSTi = Builder.CreateFDiv(RHSr, RHSr); + } else { + DSTr = Builder.CreateFDiv(LHSr, RHSr); + DSTi = Builder.CreateFDiv(LHSi, RHSr); + } + return ComplexPairTy(DSTr, DSTi); + } + } // If we have a complex operand on the RHS and FastMath is not allowed, we // delegate to a libcall to handle all of the complexities and minimize // underflow/overflow cases. When FastMath is allowed we construct the @@ -845,8 +905,21 @@ } else { assert(LHSi && "Can have at most one non-complex operand!"); - DSTr = Builder.CreateFDiv(LHSr, RHSr); - DSTi = Builder.CreateFDiv(LHSi, RHSr); + if ((LHSr->getType()->isHalfTy() || RHSr->getType()->isHalfTy()) && + !CGF.getTarget().hasLegalHalfType() && + CGF.getTarget().getTriple().isX86()) { + if (LHSr->getType() != RHSr->getType()) + if (LHSr->getType()->isHalfTy()) { + DSTr = Builder.CreateFDiv(LHSr, LHSr); + DSTi = Builder.CreateFDiv(LHSi, LHSi); + } else if (RHSr->getType()->isHalfTy()) { + DSTr = Builder.CreateFDiv(RHSr, RHSr); + DSTi = Builder.CreateFDiv(RHSr, RHSr); + } + } else { + DSTr = Builder.CreateFDiv(LHSr, RHSr); + DSTi = Builder.CreateFDiv(LHSi, RHSr); + } } } else { assert(Op.LHS.second && Op.RHS.second && Index: clang/lib/Basic/Targets/X86.cpp =================================================================== --- clang/lib/Basic/Targets/X86.cpp +++ clang/lib/Basic/Targets/X86.cpp @@ -240,6 +240,7 @@ } else if (Feature == "+avx512fp16") { HasAVX512FP16 = true; HasFloat16 = true; + HasLegalHalfType = true; } else if (Feature == "+avx512pf") { HasAVX512PF = true; } else if (Feature == "+avx512dq") { @@ -369,6 +370,8 @@ .Default(NoXOP); XOPLevel = std::max(XOPLevel, XLevel); } + // Turn on _float16 for x86 (feature sse2) + HasFloat16 = SSELevel >= SSE2; // LLVM doesn't have a separate switch for fpmath, so only accept it if it // matches the selected sse level. Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -421,6 +421,9 @@ DWARF Support in Clang ---------------------- +- Support for ``AVX512-FP16`` instructions has been added. +- Support for ``_Float16`` type has been added. + Arm and AArch64 Support in Clang -------------------------------- Index: clang/docs/LanguageExtensions.rst =================================================================== --- clang/docs/LanguageExtensions.rst +++ clang/docs/LanguageExtensions.rst @@ -743,7 +743,7 @@ * 64-bit ARM (AArch64) * AMDGPU * SPIR -* X86 (Only available under feature AVX512-FP16) +* X86 (Enabled with feature SSE2 and up) ``_Float16`` will be supported on more targets as they define ABIs for it.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits