r324940 - [AArch64] Fixes for ARMv8.2-A FP16 scalar intrinsic - clang portion
Author: az Date: Mon Feb 12 13:26:06 2018 New Revision: 324940 URL: http://llvm.org/viewvc/llvm-project?rev=324940&view=rev Log: [AArch64] Fixes for ARMv8.2-A FP16 scalar intrinsic - clang portion https://reviews.llvm.org/D42993 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=324940&r1=324939&r2=324940&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Feb 12 13:26:06 2018 @@ -4160,45 +4160,28 @@ static const NeonIntrinsicInfo AArch64SI NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), // FP16 scalar intrinisics go here. NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType), - NEONMAP1(vabsh_f16, aarch64_neon_abs, Add1ArgType), - NEONMAP1(vcageh_f16, aarch64_neon_facge, AddRetType | Add1ArgType), - NEONMAP1(vcagth_f16, aarch64_neon_facgt, AddRetType | Add1ArgType), - NEONMAP1(vcaleh_f16, aarch64_neon_facge, AddRetType | Add1ArgType), - NEONMAP1(vcalth_f16, aarch64_neon_facgt, AddRetType | Add1ArgType), - NEONMAP1(vcvtah_s16_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), - NEONMAP1(vcvtah_u16_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), - NEONMAP1(vcvth_n_f16_s16, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), - NEONMAP1(vcvth_n_f16_u16, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), - NEONMAP1(vcvth_n_s16_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), - NEONMAP1(vcvth_n_u16_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), - NEONMAP1(vcvtmh_s16_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), - NEONMAP1(vcvtmh_u16_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), - NEONMAP1(vcvtnh_s16_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), - NEONMAP1(vcvtnh_u16_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), - NEONMAP1(vcvtph_s16_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), - NEONMAP1(vcvtph_u16_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType), @@ -6188,6 +6171,9 @@ Value *CodeGenFunction::EmitAArch64Built // Handle non-overloaded intrinsics first. switch (BuiltinID) { default: break; + case NEON::BI__builtin_neon_vabsh_f16: +Ops.push_back(EmitScalarExpr(E->getArg(0))); +return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs"); case NEON::BI__builtin_neon_vldrq_p128: { llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0); @@ -6282,6 +6268,101 @@ Value *CodeGenFunction::EmitAArch64Built return Builder.CreateFPToUI(Ops[0], Int64Ty); return Builder.CreateFPToSI(Ops[0], Int64Ty); } + case NEON::BI__builtin_neon_vcvtah_u16_f16: + case NEON::BI__builtin_neon_vcvtmh_u16_f16: + case NEON::BI__builtin_neon_vcvtnh_u16_f16: + case NEON::BI__builtin_neon_vcvtph_u16_f16: + case NE
r321301 - [AArch64] Enable fp16 data type for the Builtin for AArch64 only.
Author: az Date: Thu Dec 21 12:10:03 2017 New Revision: 321301 URL: http://llvm.org/viewvc/llvm-project?rev=321301&view=rev Log: [AArch64] Enable fp16 data type for the Builtin for AArch64 only. Differential Revision: https:://reviews.llvm.org/D41360 Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h cfe/trunk/test/CodeGen/arm_neon_intrinsics.c Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=321301&r1=321300&r2=321301&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Dec 21 12:10:03 2017 @@ -3334,10 +3334,10 @@ static Value *EmitTargetArchBuiltinExpr( case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: -return CGF->EmitARMBuiltinExpr(BuiltinID, E); +return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch); case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: -return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); +return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch); case llvm::Triple::x86: case llvm::Triple::x86_64: return CGF->EmitX86BuiltinExpr(BuiltinID, E); @@ -3378,6 +3378,7 @@ Value *CodeGenFunction::EmitTargetBuilti static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, + llvm::Triple::ArchType Arch, bool V1Ty=false) { int IsQuad = TypeFlags.isQuad(); switch (TypeFlags.getEltType()) { @@ -3388,7 +3389,12 @@ static llvm::VectorType *GetNeonType(Cod case NeonTypeFlags::Poly16: return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); case NeonTypeFlags::Float16: -return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); +// FIXME: Only AArch64 backend can so far properly handle half types. +// Remove else part once ARM backend support for half is complete. +if (Arch == llvm::Triple::aarch64) + return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); +else + return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); case NeonTypeFlags::Int32: return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); case NeonTypeFlags::Int64: @@ -4226,7 +4232,8 @@ static Value *EmitCommonNeonSISDBuiltinE Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, -SmallVectorImpl &Ops, Address PtrOp0, Address PtrOp1) { +SmallVectorImpl &Ops, Address PtrOp0, Address PtrOp1, +llvm::Triple::ArchType Arch) { // Get the last argument, which specifies the vector type. llvm::APSInt NeonTypeConst; const Expr *Arg = E->getArg(E->getNumArgs() - 1); @@ -4238,7 +4245,7 @@ Value *CodeGenFunction::EmitCommonNeonBu bool Usgn = Type.isUnsigned(); bool Quad = Type.isQuad(); - llvm::VectorType *VTy = GetNeonType(this, Type); + llvm::VectorType *VTy = GetNeonType(this, Type, Arch); llvm::Type *Ty = VTy; if (!Ty) return nullptr; @@ -4312,13 +4319,13 @@ Value *CodeGenFunction::EmitCommonNeonBu case NEON::BI__builtin_neon_vcvt_f32_v: case NEON::BI__builtin_neon_vcvtq_f32_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); -Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); +Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), Arch); return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); case NEON::BI__builtin_neon_vcvt_f16_v: case NEON::BI__builtin_neon_vcvtq_f16_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); -Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad)); +Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), Arch); return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); case NEON::BI__builtin_neon_vcvt_n_f16_v: @@ -4887,7 +4894,8 @@ static bool HasExtraNeonArgument(unsigne } Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { + const CallExpr *E, + llvm::Triple::ArchType Arch) { if (auto Hint = GetValueForARMHint(BuiltinID)) return Hint; @@ -5426,7 +5434,7 @@ Value *CodeGenFunction::EmitARMBuiltinEx bool usgn = Type.isUnsigned(); bool rightShift = false; - llvm::VectorType *VTy = GetNeonType(this, Type); + llvm::VectorType *VTy = GetNeonType(this, Type, Arch); llvm::Type *Ty = VTy; if (!Ty) return nullptr; @@ -5439,7 +5447,7 @@ Value
r327189 - [ARM] Add ARMv8.2-A FP16 vector intrinsic
Author: az Date: Fri Mar 9 15:39:34 2018 New Revision: 327189 URL: http://llvm.org/viewvc/llvm-project?rev=327189&view=rev Log: [ARM] Add ARMv8.2-A FP16 vector intrinsic Add the fp16 neon vector intrinsic for ARM as described in the ARM ACLE document. Reviews in https://reviews.llvm.org/D43650 Added: cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c Modified: cfe/trunk/include/clang/Basic/arm_neon.td cfe/trunk/lib/Basic/Targets/ARM.cpp cfe/trunk/lib/Basic/Targets/ARM.h cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h cfe/trunk/test/CodeGen/arm_neon_intrinsics.c Modified: cfe/trunk/include/clang/Basic/arm_neon.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=327189&r1=327188&r2=327189&view=diff == --- cfe/trunk/include/clang/Basic/arm_neon.td (original) +++ cfe/trunk/include/clang/Basic/arm_neon.td Fri Mar 9 15:39:34 2018 @@ -1363,8 +1363,8 @@ def SCALAR_VDUP_LANE : IInst<"vdup_lane" def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; } -// ARMv8.2-A FP16 intrinsics. -let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in { +// ARMv8.2-A FP16 vector intrinsics for A32/A64. +let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { // ARMv8.2-A FP16 one-operand vector intrinsics. @@ -1395,14 +1395,12 @@ let ArchGuard = "defined(__ARM_FEATURE_F def FRINTPH : SInst<"vrndp", "dd", "hQh">; def FRINTMH : SInst<"vrndm", "dd", "hQh">; def FRINTXH : SInst<"vrndx", "dd", "hQh">; - def FRINTIH : SInst<"vrndi", "dd", "hQh">; // Misc. def VABSH: SInst<"vabs", "dd", "hQh">; def VNEGH: SOpInst<"vneg", "dd", "hQh", OP_NEG>; def VRECPEH : SInst<"vrecpe", "dd", "hQh">; def FRSQRTEH : SInst<"vrsqrte", "dd", "hQh">; - def FSQRTH : SInst<"vsqrt", "dd", "hQh">; // ARMv8.2-A FP16 two-operands vector intrinsics. @@ -1443,18 +1441,13 @@ let ArchGuard = "defined(__ARM_FEATURE_F // Multiplication/Division def VMULH : SOpInst<"vmul", "ddd", "hQh", OP_MUL>; - def MULXH : SInst<"vmulx", "ddd", "hQh">; - def FDIVH : IOpInst<"vdiv", "ddd", "hQh", OP_DIV>; // Pairwise addition - def VPADDH: SInst<"vpadd", "ddd", "hQh">; + def VPADDH: SInst<"vpadd", "ddd", "h">; // Pairwise Max/Min - def VPMAXH: SInst<"vpmax", "ddd", "hQh">; - def VPMINH: SInst<"vpmin", "ddd", "hQh">; - // Pairwise MaxNum/MinNum - def FMAXNMPH : SInst<"vpmaxnm", "ddd", "hQh">; - def FMINNMPH : SInst<"vpminnm", "ddd", "hQh">; + def VPMAXH: SInst<"vpmax", "ddd", "h">; + def VPMINH: SInst<"vpmin", "ddd", "h">; // Reciprocal/Sqrt def VRECPSH : SInst<"vrecps", "ddd", "hQh">; @@ -1468,6 +1461,63 @@ let ArchGuard = "defined(__ARM_FEATURE_F // ARMv8.2-A FP16 lane vector intrinsics. + // Mul lane + def VMUL_LANEH: IOpInst<"vmul_lane", "ddgi", "hQh", OP_MUL_LN>; + def VMUL_NH : IOpInst<"vmul_n", "dds", "hQh", OP_MUL_N>; + + // Data processing intrinsics - section 5 + + // Logical operations + let isHiddenLInst = 1 in + def VBSLH: SInst<"vbsl", "dudd", "hQh">; + + // Transposition operations + def VZIPH: WInst<"vzip", "2dd", "hQh">; + def VUZPH: WInst<"vuzp", "2dd", "hQh">; + def VTRNH: WInst<"vtrn", "2dd", "hQh">; + + + let ArchGuard = "!defined(__aarch64__)" in { +// Set all lanes to same value. +// Already implemented prior to ARMv8.2-A. +def VMOV_NH : WOpInst<"vmov_n", "ds", "hQh", OP_DUP>; +def VDUP_NH : WOpInst<"vdup_n", "ds", "hQh", OP_DUP>; +def VDUP_LANE1H : WOpInst<"vdup_lane", "dgi", "hQh", OP_DUP_LN>; + } + + // Vector Extract + def VEXTH : WInst<"vext", "dddi", "hQh">; + + // Reverse vector elements + def VREV64H: WOpInst<"vrev64", "dd", "hQh", OP_REV64>; +} + +// ARMv8.2-A FP16 vector intrinsics for A64 only. +let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in { + + // Vector rounding + def FRINTIH : SInst<"vrndi", "dd", "hQh">; + + // Misc. + def FSQRTH : SInst<"vsqrt", "dd", "hQh">; + + // Multiplication/Division + def MULXH : SInst<"vmulx", "ddd", "hQh">; + def FDIVH : IOpInst<"vdiv", "ddd", "hQh", OP_DIV>; + + // Pairwise addition + def VPADDH1 : SInst<"vpadd", "ddd", "Qh">; + + // Pairwise Max/Min + def VPMAXH1 : SInst<"vpmax", "ddd", "Qh">; + def VPMINH1 : SInst<"vpmin", "ddd", "Qh">; + + // Pairwise MaxNum/MinNum + def FMAXNMPH : SInst<"vpmaxnm", "ddd", "hQh">; + def FMINNMPH : SInst<"vpminnm", "ddd", "hQh">; + + // ARMv8.2-A FP16 lane vector intrinsics. + // FMA lane def VFMA_LANEH : IInst<"vfma_lane", "dddgi", "hQh">; def VFMA_LANEQH : IInst<"vfma_laneq", "dddji", "hQh">; @@ -1488,
r328038 - [AArch64] Add vmulxh_lane fp16 vector intrinsic
Author: az Date: Tue Mar 20 13:37:31 2018 New Revision: 328038 URL: http://llvm.org/viewvc/llvm-project?rev=328038&view=rev Log: [AArch64] Add vmulxh_lane fp16 vector intrinsic https://reviews.llvm.org/D44591 Modified: cfe/trunk/include/clang/Basic/arm_neon.td cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c Modified: cfe/trunk/include/clang/Basic/arm_neon.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=328038&r1=328037&r2=328038&view=diff == --- cfe/trunk/include/clang/Basic/arm_neon.td (original) +++ cfe/trunk/include/clang/Basic/arm_neon.td Tue Mar 20 13:37:31 2018 @@ -1499,11 +1499,9 @@ let ArchGuard = "defined(__ARM_FEATURE_F def VMULX_LANEH : IOpInst<"vmulx_lane", "ddgi", "hQh", OP_MULX_LN>; def VMULX_LANEQH : IOpInst<"vmulx_laneq", "ddji", "hQh", OP_MULX_LN>; def VMULX_NH : IOpInst<"vmulx_n", "dds", "hQh", OP_MULX_N>; - // TODO: Scalar floating point multiply extended (scalar, by element) - // Below ones are commented out because they need vmulx_f16(float16_t, float16_t) - // which will be implemented later with fp16 scalar intrinsic (arm_fp16.h) - //def SCALAR_FMULX_LANEH : IOpInst<"vmulx_lane", "ssdi", "Sh", OP_SCALAR_MUL_LN>; - //def SCALAR_FMULX_LANEQH : IOpInst<"vmulx_laneq", "ssji", "Sh", OP_SCALAR_MUL_LN>; + // Scalar floating point mulx (scalar, by element) + def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "ssdi", "Sh">; + def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "ssji", "Sh">; // ARMv8.2-A FP16 reduction vector intrinsics. def VMAXVH : SInst<"vmaxv", "sd", "hQh">; Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=328038&r1=328037&r2=328038&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Mar 20 13:37:31 2018 @@ -7238,6 +7238,16 @@ Value *CodeGenFunction::EmitAArch64Built Int = Intrinsic::aarch64_neon_fmulx; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); } + case NEON::BI__builtin_neon_vmulxh_lane_f16: + case NEON::BI__builtin_neon_vmulxh_laneq_f16: { +// vmulx_lane should be mapped to Neon scalar mulx after +// extracting the scalar element +Ops.push_back(EmitScalarExpr(E->getArg(2))); +Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); +Ops.pop_back(); +Int = Intrinsic::aarch64_neon_fmulx; +return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx"); + } case NEON::BI__builtin_neon_vmul_lane_v: case NEON::BI__builtin_neon_vmul_laneq_v: { // v1f64 vmul_lane should be mapped to Neon scalar mul lane Modified: cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c?rev=328038&r1=328037&r2=328038&view=diff == --- cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c (original) +++ cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c Tue Mar 20 13:37:31 2018 @@ -1223,27 +1223,25 @@ float16x8_t test_vmulxq_n_f16(float16x8_ return vmulxq_n_f16(a, b); } -/* TODO: Not implemented yet (needs scalar intrinsic from arm_fp16.h) -// CCHECK-LABEL: test_vmulxh_lane_f16 -// CCHECK: [[CONV0:%.*]] = fpext half %a to float -// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float -// CCHECK: [[MUL:%.*]] = fmul float [[CONV0:%.*]], [[CONV0:%.*]] -// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half -// CCHECK: ret half [[CONV3:%.*]] +// CHECK-LABEL: test_vmulxh_lane_f16 +// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %b to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3 +// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]] +// CHECK: ret half [[MULX]] float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) { return vmulxh_lane_f16(a, b, 3); } -// CCHECK-LABEL: test_vmulxh_laneq_f16 -// CCHECK: [[CONV0:%.*]] = fpext half %a to float -// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float -// CCHECK: [[MUL:%.*]] = fmul float [[CONV0:%.*]], [[CONV0:%.*]] -// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half -// CCHECK: ret half [[CONV3:%.*]] +// CHECK-LABEL: test_vmulxh_laneq_f16 +// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %b to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> +// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7 +// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]]) +// CHECK: ret half [[MULX]] float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) { return vmulxh_laneq_f16(a, b,
r328277 - [ARM] Add ARMv8.2-A FP16 vector intrinsic
Author: az Date: Thu Mar 22 17:08:40 2018 New Revision: 328277 URL: http://llvm.org/viewvc/llvm-project?rev=328277&view=rev Log: [ARM] Add ARMv8.2-A FP16 vector intrinsic Putting back the code in commit r327189 that was reverted in r322737. The code is being committed in three stages and this one is the last stage: 1) r327455 fp16 feature flags, 2) r327836 pass half type or i16 based on FullFP16, and 3) the code here which the front-end fp16 vector intrinsic for ARM. Differential revision https://reviews.llvm.org/D43650 Added: cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c Modified: cfe/trunk/include/clang/Basic/arm_neon.td cfe/trunk/lib/CodeGen/CGBuiltin.cpp Modified: cfe/trunk/include/clang/Basic/arm_neon.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=328277&r1=328276&r2=328277&view=diff == --- cfe/trunk/include/clang/Basic/arm_neon.td (original) +++ cfe/trunk/include/clang/Basic/arm_neon.td Thu Mar 22 17:08:40 2018 @@ -1363,8 +1363,8 @@ def SCALAR_VDUP_LANE : IInst<"vdup_lane" def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; } -// ARMv8.2-A FP16 intrinsics. -let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in { +// ARMv8.2-A FP16 vector intrinsics for A32/A64. +let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in { // ARMv8.2-A FP16 one-operand vector intrinsics. @@ -1395,14 +1395,12 @@ let ArchGuard = "defined(__ARM_FEATURE_F def FRINTPH : SInst<"vrndp", "dd", "hQh">; def FRINTMH : SInst<"vrndm", "dd", "hQh">; def FRINTXH : SInst<"vrndx", "dd", "hQh">; - def FRINTIH : SInst<"vrndi", "dd", "hQh">; // Misc. def VABSH: SInst<"vabs", "dd", "hQh">; def VNEGH: SOpInst<"vneg", "dd", "hQh", OP_NEG>; def VRECPEH : SInst<"vrecpe", "dd", "hQh">; def FRSQRTEH : SInst<"vrsqrte", "dd", "hQh">; - def FSQRTH : SInst<"vsqrt", "dd", "hQh">; // ARMv8.2-A FP16 two-operands vector intrinsics. @@ -1443,18 +1441,13 @@ let ArchGuard = "defined(__ARM_FEATURE_F // Multiplication/Division def VMULH : SOpInst<"vmul", "ddd", "hQh", OP_MUL>; - def MULXH : SInst<"vmulx", "ddd", "hQh">; - def FDIVH : IOpInst<"vdiv", "ddd", "hQh", OP_DIV>; // Pairwise addition - def VPADDH: SInst<"vpadd", "ddd", "hQh">; + def VPADDH: SInst<"vpadd", "ddd", "h">; // Pairwise Max/Min - def VPMAXH: SInst<"vpmax", "ddd", "hQh">; - def VPMINH: SInst<"vpmin", "ddd", "hQh">; - // Pairwise MaxNum/MinNum - def FMAXNMPH : SInst<"vpmaxnm", "ddd", "hQh">; - def FMINNMPH : SInst<"vpminnm", "ddd", "hQh">; + def VPMAXH: SInst<"vpmax", "ddd", "h">; + def VPMINH: SInst<"vpmin", "ddd", "h">; // Reciprocal/Sqrt def VRECPSH : SInst<"vrecps", "ddd", "hQh">; @@ -1468,6 +1461,63 @@ let ArchGuard = "defined(__ARM_FEATURE_F // ARMv8.2-A FP16 lane vector intrinsics. + // Mul lane + def VMUL_LANEH: IOpInst<"vmul_lane", "ddgi", "hQh", OP_MUL_LN>; + def VMUL_NH : IOpInst<"vmul_n", "dds", "hQh", OP_MUL_N>; + + // Data processing intrinsics - section 5 + + // Logical operations + let isHiddenLInst = 1 in + def VBSLH: SInst<"vbsl", "dudd", "hQh">; + + // Transposition operations + def VZIPH: WInst<"vzip", "2dd", "hQh">; + def VUZPH: WInst<"vuzp", "2dd", "hQh">; + def VTRNH: WInst<"vtrn", "2dd", "hQh">; + + + let ArchGuard = "!defined(__aarch64__)" in { +// Set all lanes to same value. +// Already implemented prior to ARMv8.2-A. +def VMOV_NH : WOpInst<"vmov_n", "ds", "hQh", OP_DUP>; +def VDUP_NH : WOpInst<"vdup_n", "ds", "hQh", OP_DUP>; +def VDUP_LANE1H : WOpInst<"vdup_lane", "dgi", "hQh", OP_DUP_LN>; + } + + // Vector Extract + def VEXTH : WInst<"vext", "dddi", "hQh">; + + // Reverse vector elements + def VREV64H: WOpInst<"vrev64", "dd", "hQh", OP_REV64>; +} + +// ARMv8.2-A FP16 vector intrinsics for A64 only. +let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in { + + // Vector rounding + def FRINTIH : SInst<"vrndi", "dd", "hQh">; + + // Misc. + def FSQRTH : SInst<"vsqrt", "dd", "hQh">; + + // Multiplication/Division + def MULXH : SInst<"vmulx", "ddd", "hQh">; + def FDIVH : IOpInst<"vdiv", "ddd", "hQh", OP_DIV>; + + // Pairwise addition + def VPADDH1 : SInst<"vpadd", "ddd", "Qh">; + + // Pairwise Max/Min + def VPMAXH1 : SInst<"vpmax", "ddd", "Qh">; + def VPMINH1 : SInst<"vpmin", "ddd", "Qh">; + + // Pairwise MaxNum/MinNum + def FMAXNMPH : SInst<"vpmaxnm", "ddd", "hQh">; + def FMINNMPH : SInst<"vpminnm", "ddd", "hQh">; + + // ARMv8.2-A FP16 lane vector intrinsics. + // FMA lane def VFMA_LANEH : IInst<"vfma_lane", "dddgi", "hQh">; def
r323006 - [AArch64] Add ARMv8.2-A FP16 scalar intrinsics
Author: az Date: Fri Jan 19 15:11:18 2018 New Revision: 323006 URL: http://llvm.org/viewvc/llvm-project?rev=323006&view=rev Log: [AArch64] Add ARMv8.2-A FP16 scalar intrinsics https://reviews.llvm.org/D41792 Added: cfe/trunk/include/clang/Basic/arm_fp16.td cfe/trunk/include/clang/Basic/arm_neon_incl.td cfe/trunk/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c Modified: cfe/trunk/include/clang/Basic/BuiltinsNEON.def cfe/trunk/include/clang/Basic/CMakeLists.txt cfe/trunk/include/clang/Basic/arm_neon.td cfe/trunk/lib/Basic/Targets/AArch64.cpp cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/CMakeLists.txt cfe/trunk/lib/Headers/module.modulemap cfe/trunk/lib/Sema/SemaChecking.cpp cfe/trunk/utils/TableGen/NeonEmitter.cpp cfe/trunk/utils/TableGen/TableGen.cpp cfe/trunk/utils/TableGen/TableGenBackends.h Modified: cfe/trunk/include/clang/Basic/BuiltinsNEON.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsNEON.def?rev=323006&r1=323005&r2=323006&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsNEON.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsNEON.def Fri Jan 19 15:11:18 2018 @@ -16,6 +16,7 @@ #define GET_NEON_BUILTINS #include "clang/Basic/arm_neon.inc" +#include "clang/Basic/arm_fp16.inc" #undef GET_NEON_BUILTINS #undef BUILTIN Modified: cfe/trunk/include/clang/Basic/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/CMakeLists.txt?rev=323006&r1=323005&r2=323006&view=diff == --- cfe/trunk/include/clang/Basic/CMakeLists.txt (original) +++ cfe/trunk/include/clang/Basic/CMakeLists.txt Fri Jan 19 15:11:18 2018 @@ -46,3 +46,7 @@ clang_tablegen(arm_neon.inc -gen-arm-neo -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ SOURCE arm_neon.td TARGET ClangARMNeon) +clang_tablegen(arm_fp16.inc -gen-arm-neon-sema + -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ + SOURCE arm_fp16.td + TARGET ClangARMFP16) Added: cfe/trunk/include/clang/Basic/arm_fp16.td URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_fp16.td?rev=323006&view=auto == --- cfe/trunk/include/clang/Basic/arm_fp16.td (added) +++ cfe/trunk/include/clang/Basic/arm_fp16.td Fri Jan 19 15:11:18 2018 @@ -0,0 +1,131 @@ +//===--- arm_fp16.td - ARM FP16 compiler interface ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +// +// This file defines the TableGen definitions from which the ARM FP16 header +// file will be generated. +// +//===--===// + +include "arm_neon_incl.td" + +// ARMv8.2-A FP16 intrinsics. +let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in { + + // Negate + def VNEGSH : SInst<"vneg", "ss", "Sh">; + + // Reciprocal/Sqrt + def SCALAR_FRECPSH : IInst<"vrecps", "sss", "Sh">; + def FSQRTSH : SInst<"vsqrt", "ss", "Sh">; + def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">; + + // Reciprocal Estimate + def SCALAR_FRECPEH : IInst<"vrecpe", "ss", "Sh">; + + // Reciprocal Exponent + def SCALAR_FRECPXH : IInst<"vrecpx", "ss", "Sh">; + + // Reciprocal Square Root Estimate + def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">; + + // Rounding + def FRINTZ_S64H : SInst<"vrnd", "ss", "Sh">; + def FRINTA_S64H : SInst<"vrnda", "ss", "Sh">; + def FRINTI_S64H : SInst<"vrndi", "ss", "Sh">; + def FRINTM_S64H : SInst<"vrndm", "ss", "Sh">; + def FRINTN_S64H : SInst<"vrndn", "ss", "Sh">; + def FRINTP_S64H : SInst<"vrndp", "ss", "Sh">; + def FRINTX_S64H : SInst<"vrndx", "ss", "Sh">; + + // Conversion + def SCALAR_SCVTFSH : SInst<"vcvth_f16", "Ys", "silUsUiUl">; + def SCALAR_FCVTZSH : SInst<"vcvt_s16", "$s", "Sh">; + def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">; + def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">; + def SCALAR_FCVTZUH : SInst<"vcvt_u16", "bs", "Sh">; + def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">; + def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">; + def SCALAR_FCVTASH : SInst<"vcvta_s16", "$s", "Sh">; + def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">; + def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">; + def SCALAR_FCVTAUH : SInst<"vcvta_u16", "bs", "Sh">; + def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">; + def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">; + def SCALAR_FCVTMSH : SInst<"vcvtm_s16", "$s", "Sh">; + def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">; + def SCALAR_FCVTM