r324940 - [AArch64] Fixes for ARMv8.2-A FP16 scalar intrinsic - clang portion

2018-02-12 Thread Abderrazek Zaafrani via cfe-commits
Author: az
Date: Mon Feb 12 13:26:06 2018
New Revision: 324940

URL: http://llvm.org/viewvc/llvm-project?rev=324940&view=rev
Log:
[AArch64] Fixes for ARMv8.2-A FP16 scalar intrinsic - clang portion

https://reviews.llvm.org/D42993

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=324940&r1=324939&r2=324940&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Feb 12 13:26:06 2018
@@ -4160,45 +4160,28 @@ static const NeonIntrinsicInfo AArch64SI
   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
   // FP16 scalar intrinisics go here.
   NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
-  NEONMAP1(vabsh_f16, aarch64_neon_abs, Add1ArgType),
-  NEONMAP1(vcageh_f16, aarch64_neon_facge, AddRetType | Add1ArgType),
-  NEONMAP1(vcagth_f16, aarch64_neon_facgt, AddRetType | Add1ArgType),
-  NEONMAP1(vcaleh_f16, aarch64_neon_facge, AddRetType | Add1ArgType),
-  NEONMAP1(vcalth_f16, aarch64_neon_facgt, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtah_s16_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
   NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
   NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtah_u16_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
   NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
   NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
-  NEONMAP1(vcvth_n_f16_s16, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
   NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
   NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
-  NEONMAP1(vcvth_n_f16_u16, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
   NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
   NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
-  NEONMAP1(vcvth_n_s16_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
   NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
   NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
-  NEONMAP1(vcvth_n_u16_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
   NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
   NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtmh_s16_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
   NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
   NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtmh_u16_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
   NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
   NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtnh_s16_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
   NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
   NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtnh_u16_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
   NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
   NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtph_s16_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
   NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
   NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
-  NEONMAP1(vcvtph_u16_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
   NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
   NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
   NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
@@ -6188,6 +6171,9 @@ Value *CodeGenFunction::EmitAArch64Built
   // Handle non-overloaded intrinsics first.
   switch (BuiltinID) {
   default: break;
+  case NEON::BI__builtin_neon_vabsh_f16:
+Ops.push_back(EmitScalarExpr(E->getArg(0)));
+return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, 
"vabs");
   case NEON::BI__builtin_neon_vldrq_p128: {
 llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
 llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
@@ -6282,6 +6268,101 @@ Value *CodeGenFunction::EmitAArch64Built
   return Builder.CreateFPToUI(Ops[0], Int64Ty);
 return Builder.CreateFPToSI(Ops[0], Int64Ty);
   }
+  case NEON::BI__builtin_neon_vcvtah_u16_f16:
+  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
+  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
+  case NEON::BI__builtin_neon_vcvtph_u16_f16:
+  case NE

r321301 - [AArch64] Enable fp16 data type for the Builtin for AArch64 only.

2017-12-21 Thread Abderrazek Zaafrani via cfe-commits
Author: az
Date: Thu Dec 21 12:10:03 2017
New Revision: 321301

URL: http://llvm.org/viewvc/llvm-project?rev=321301&view=rev
Log:
[AArch64] Enable fp16 data type for the Builtin for AArch64 only.

Differential Revision: https:://reviews.llvm.org/D41360

Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/test/CodeGen/arm_neon_intrinsics.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=321301&r1=321300&r2=321301&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Dec 21 12:10:03 2017
@@ -3334,10 +3334,10 @@ static Value *EmitTargetArchBuiltinExpr(
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
-return CGF->EmitARMBuiltinExpr(BuiltinID, E);
+return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
-return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
+return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
 return CGF->EmitX86BuiltinExpr(BuiltinID, E);
@@ -3378,6 +3378,7 @@ Value *CodeGenFunction::EmitTargetBuilti
 
 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
  NeonTypeFlags TypeFlags,
+ llvm::Triple::ArchType Arch,
  bool V1Ty=false) {
   int IsQuad = TypeFlags.isQuad();
   switch (TypeFlags.getEltType()) {
@@ -3388,7 +3389,12 @@ static llvm::VectorType *GetNeonType(Cod
   case NeonTypeFlags::Poly16:
 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
   case NeonTypeFlags::Float16:
-return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
+// FIXME: Only AArch64 backend can so far properly handle half types.
+// Remove else part once ARM backend support for half is complete.
+if (Arch == llvm::Triple::aarch64)
+  return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
+else
+  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
   case NeonTypeFlags::Int32:
 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
   case NeonTypeFlags::Int64:
@@ -4226,7 +4232,8 @@ static Value *EmitCommonNeonSISDBuiltinE
 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
 const char *NameHint, unsigned Modifier, const CallExpr *E,
-SmallVectorImpl &Ops, Address PtrOp0, Address PtrOp1) {
+SmallVectorImpl &Ops, Address PtrOp0, Address PtrOp1,
+llvm::Triple::ArchType Arch) {
   // Get the last argument, which specifies the vector type.
   llvm::APSInt NeonTypeConst;
   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
@@ -4238,7 +4245,7 @@ Value *CodeGenFunction::EmitCommonNeonBu
   bool Usgn = Type.isUnsigned();
   bool Quad = Type.isQuad();
 
-  llvm::VectorType *VTy = GetNeonType(this, Type);
+  llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
   llvm::Type *Ty = VTy;
   if (!Ty)
 return nullptr;
@@ -4312,13 +4319,13 @@ Value *CodeGenFunction::EmitCommonNeonBu
   case NEON::BI__builtin_neon_vcvt_f32_v:
   case NEON::BI__builtin_neon_vcvtq_f32_v:
 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
+Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), 
Arch);
 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
   case NEON::BI__builtin_neon_vcvt_f16_v:
   case NEON::BI__builtin_neon_vcvtq_f16_v:
 Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad));
+Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), 
Arch);
 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
   case NEON::BI__builtin_neon_vcvt_n_f16_v:
@@ -4887,7 +4894,8 @@ static bool HasExtraNeonArgument(unsigne
 }
 
 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
-   const CallExpr *E) {
+   const CallExpr *E,
+   llvm::Triple::ArchType Arch) {
   if (auto Hint = GetValueForARMHint(BuiltinID))
 return Hint;
 
@@ -5426,7 +5434,7 @@ Value *CodeGenFunction::EmitARMBuiltinEx
   bool usgn = Type.isUnsigned();
   bool rightShift = false;
 
-  llvm::VectorType *VTy = GetNeonType(this, Type);
+  llvm::VectorType *VTy = GetNeonType(this, Type, Arch);
   llvm::Type *Ty = VTy;
   if (!Ty)
 return nullptr;
@@ -5439,7 +5447,7 @@ Value

r327189 - [ARM] Add ARMv8.2-A FP16 vector intrinsic

2018-03-09 Thread Abderrazek Zaafrani via cfe-commits
Author: az
Date: Fri Mar  9 15:39:34 2018
New Revision: 327189

URL: http://llvm.org/viewvc/llvm-project?rev=327189&view=rev
Log:
[ARM] Add ARMv8.2-A FP16 vector intrinsic

Add the fp16 neon vector intrinsic for ARM as described in the ARM ACLE 
document.

Reviews in https://reviews.llvm.org/D43650

Added:
cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c
Modified:
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/lib/Basic/Targets/ARM.cpp
cfe/trunk/lib/Basic/Targets/ARM.h
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/test/CodeGen/arm_neon_intrinsics.c

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=327189&r1=327188&r2=327189&view=diff
==
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Fri Mar  9 15:39:34 2018
@@ -1363,8 +1363,8 @@ def SCALAR_VDUP_LANE : IInst<"vdup_lane"
 def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", 
"ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
 }
 
-// ARMv8.2-A FP16 intrinsics.
-let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && 
defined(__aarch64__)" in {
+// ARMv8.2-A FP16 vector intrinsics for A32/A64.
+let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
 
   // ARMv8.2-A FP16 one-operand vector intrinsics.
 
@@ -1395,14 +1395,12 @@ let ArchGuard = "defined(__ARM_FEATURE_F
   def FRINTPH  : SInst<"vrndp", "dd", "hQh">;
   def FRINTMH  : SInst<"vrndm", "dd", "hQh">;
   def FRINTXH  : SInst<"vrndx", "dd", "hQh">;
-  def FRINTIH  : SInst<"vrndi", "dd", "hQh">;
 
   // Misc.
   def VABSH: SInst<"vabs", "dd", "hQh">;
   def VNEGH: SOpInst<"vneg", "dd", "hQh", OP_NEG>;
   def VRECPEH  : SInst<"vrecpe", "dd", "hQh">;
   def FRSQRTEH : SInst<"vrsqrte", "dd", "hQh">;
-  def FSQRTH   : SInst<"vsqrt", "dd", "hQh">;
 
   // ARMv8.2-A FP16 two-operands vector intrinsics.
 
@@ -1443,18 +1441,13 @@ let ArchGuard = "defined(__ARM_FEATURE_F
 
   // Multiplication/Division
   def VMULH : SOpInst<"vmul", "ddd", "hQh", OP_MUL>;
-  def MULXH : SInst<"vmulx", "ddd", "hQh">;
-  def FDIVH : IOpInst<"vdiv", "ddd",  "hQh", OP_DIV>;
 
   // Pairwise addition
-  def VPADDH: SInst<"vpadd", "ddd", "hQh">;
+  def VPADDH: SInst<"vpadd", "ddd", "h">;
 
   // Pairwise Max/Min
-  def VPMAXH: SInst<"vpmax", "ddd", "hQh">;
-  def VPMINH: SInst<"vpmin", "ddd", "hQh">;
-  // Pairwise MaxNum/MinNum
-  def FMAXNMPH  : SInst<"vpmaxnm", "ddd", "hQh">;
-  def FMINNMPH  : SInst<"vpminnm", "ddd", "hQh">;
+  def VPMAXH: SInst<"vpmax", "ddd", "h">;
+  def VPMINH: SInst<"vpmin", "ddd", "h">;
 
   // Reciprocal/Sqrt
   def VRECPSH   : SInst<"vrecps", "ddd", "hQh">;
@@ -1468,6 +1461,63 @@ let ArchGuard = "defined(__ARM_FEATURE_F
 
   // ARMv8.2-A FP16 lane vector intrinsics.
 
+  // Mul lane
+  def VMUL_LANEH: IOpInst<"vmul_lane", "ddgi", "hQh", OP_MUL_LN>;
+  def VMUL_NH   : IOpInst<"vmul_n", "dds", "hQh", OP_MUL_N>;
+
+  // Data processing intrinsics - section 5
+
+  // Logical operations
+  let isHiddenLInst = 1 in
+  def VBSLH: SInst<"vbsl", "dudd", "hQh">;
+
+  // Transposition operations
+  def VZIPH: WInst<"vzip", "2dd", "hQh">;
+  def VUZPH: WInst<"vuzp", "2dd", "hQh">;
+  def VTRNH: WInst<"vtrn", "2dd", "hQh">;
+
+
+  let ArchGuard = "!defined(__aarch64__)" in {
+// Set all lanes to same value.
+// Already implemented prior to ARMv8.2-A.
+def VMOV_NH  : WOpInst<"vmov_n", "ds", "hQh", OP_DUP>;
+def VDUP_NH  : WOpInst<"vdup_n", "ds", "hQh", OP_DUP>;
+def VDUP_LANE1H : WOpInst<"vdup_lane", "dgi", "hQh", OP_DUP_LN>;
+  }
+
+  // Vector Extract
+  def VEXTH  : WInst<"vext", "dddi", "hQh">;
+
+  // Reverse vector elements
+  def VREV64H: WOpInst<"vrev64", "dd", "hQh", OP_REV64>;
+}
+
+// ARMv8.2-A FP16 vector intrinsics for A64 only.
+let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && 
defined(__aarch64__)" in {
+
+  // Vector rounding
+  def FRINTIH  : SInst<"vrndi", "dd", "hQh">;
+
+  // Misc.
+  def FSQRTH   : SInst<"vsqrt", "dd", "hQh">;
+
+  // Multiplication/Division
+  def MULXH : SInst<"vmulx", "ddd", "hQh">;
+  def FDIVH : IOpInst<"vdiv", "ddd",  "hQh", OP_DIV>;
+
+  // Pairwise addition
+  def VPADDH1   : SInst<"vpadd", "ddd", "Qh">;
+
+  // Pairwise Max/Min
+  def VPMAXH1   : SInst<"vpmax", "ddd", "Qh">;
+  def VPMINH1   : SInst<"vpmin", "ddd", "Qh">;
+
+  // Pairwise MaxNum/MinNum
+  def FMAXNMPH  : SInst<"vpmaxnm", "ddd", "hQh">;
+  def FMINNMPH  : SInst<"vpminnm", "ddd", "hQh">;
+
+  // ARMv8.2-A FP16 lane vector intrinsics.
+
   // FMA lane
   def VFMA_LANEH   : IInst<"vfma_lane", "dddgi", "hQh">;
   def VFMA_LANEQH  : IInst<"vfma_laneq", "dddji", "hQh">;
@@ -1488,

r328038 - [AArch64] Add vmulxh_lane fp16 vector intrinsic

2018-03-20 Thread Abderrazek Zaafrani via cfe-commits
Author: az
Date: Tue Mar 20 13:37:31 2018
New Revision: 328038

URL: http://llvm.org/viewvc/llvm-project?rev=328038&view=rev
Log:
[AArch64] Add vmulxh_lane fp16 vector intrinsic

https://reviews.llvm.org/D44591

Modified:
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=328038&r1=328037&r2=328038&view=diff
==
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Tue Mar 20 13:37:31 2018
@@ -1499,11 +1499,9 @@ let ArchGuard = "defined(__ARM_FEATURE_F
   def VMULX_LANEH   : IOpInst<"vmulx_lane", "ddgi", "hQh", OP_MULX_LN>;
   def VMULX_LANEQH  : IOpInst<"vmulx_laneq", "ddji", "hQh", OP_MULX_LN>;
   def VMULX_NH  : IOpInst<"vmulx_n", "dds", "hQh", OP_MULX_N>;
-  // TODO: Scalar floating point multiply extended (scalar, by element)
-  // Below ones are commented out because they need vmulx_f16(float16_t, 
float16_t)
-  // which will be implemented later with fp16 scalar intrinsic (arm_fp16.h)
-  //def SCALAR_FMULX_LANEH : IOpInst<"vmulx_lane", "ssdi", "Sh", 
OP_SCALAR_MUL_LN>;
-  //def SCALAR_FMULX_LANEQH : IOpInst<"vmulx_laneq", "ssji", "Sh", 
OP_SCALAR_MUL_LN>;
+  // Scalar floating point  mulx (scalar, by element)
+  def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "ssdi", "Sh">;
+  def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "ssji", "Sh">;
 
   // ARMv8.2-A FP16 reduction vector intrinsics.
   def VMAXVH   : SInst<"vmaxv", "sd", "hQh">;

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=328038&r1=328037&r2=328038&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Mar 20 13:37:31 2018
@@ -7238,6 +7238,16 @@ Value *CodeGenFunction::EmitAArch64Built
 Int = Intrinsic::aarch64_neon_fmulx;
 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
   }
+  case NEON::BI__builtin_neon_vmulxh_lane_f16:
+  case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
+// vmulx_lane should be mapped to Neon scalar mulx after
+// extracting the scalar element
+Ops.push_back(EmitScalarExpr(E->getArg(2)));
+Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
+Ops.pop_back();
+Int = Intrinsic::aarch64_neon_fmulx;
+return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
+  }
   case NEON::BI__builtin_neon_vmul_lane_v:
   case NEON::BI__builtin_neon_vmul_laneq_v: {
 // v1f64 vmul_lane should be mapped to Neon scalar mul lane

Modified: cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c?rev=328038&r1=328037&r2=328038&view=diff
==
--- cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c Tue Mar 20 13:37:31 
2018
@@ -1223,27 +1223,25 @@ float16x8_t test_vmulxq_n_f16(float16x8_
   return vmulxq_n_f16(a, b);
 }
 
-/* TODO: Not implemented yet (needs scalar intrinsic from arm_fp16.h)
-// CCHECK-LABEL: test_vmulxh_lane_f16
-// CCHECK: [[CONV0:%.*]] = fpext half %a to float
-// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
-// CCHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
-// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
-// CCHECK: ret half [[CONV3:%.*]]
+// CHECK-LABEL: test_vmulxh_lane_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3
+// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half 
[[EXTR]]
+// CHECK: ret half [[MULX]]
 float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) {
   return vmulxh_lane_f16(a, b, 3);
 }
 
-// CCHECK-LABEL: test_vmulxh_laneq_f16
-// CCHECK: [[CONV0:%.*]] = fpext half %a to float
-// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
-// CCHECK: [[MUL:%.*]]   = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
-// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
-// CCHECK: ret half [[CONV3:%.*]]
+// CHECK-LABEL: test_vmulxh_laneq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7
+// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half 
[[EXTR]])
+// CHECK: ret half [[MULX]]
 float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) {
   return vmulxh_laneq_f16(a, b, 

r328277 - [ARM] Add ARMv8.2-A FP16 vector intrinsic

2018-03-22 Thread Abderrazek Zaafrani via cfe-commits
Author: az
Date: Thu Mar 22 17:08:40 2018
New Revision: 328277

URL: http://llvm.org/viewvc/llvm-project?rev=328277&view=rev
Log:
[ARM] Add ARMv8.2-A FP16 vector intrinsic

Putting back the code in commit r327189 that was reverted in r322737. The code 
is being committed in three stages and this one is the last stage: 1) r327455 
fp16 feature flags, 2) r327836 pass half type or i16 based on FullFP16, and 3) 
the code here which the front-end fp16 vector intrinsic for ARM.

Differential revision https://reviews.llvm.org/D43650

Added:
cfe/trunk/test/CodeGen/arm-v8.2a-neon-intrinsics.c
Modified:
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/lib/CodeGen/CGBuiltin.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=328277&r1=328276&r2=328277&view=diff
==
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Thu Mar 22 17:08:40 2018
@@ -1363,8 +1363,8 @@ def SCALAR_VDUP_LANE : IInst<"vdup_lane"
 def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", 
"ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
 }
 
-// ARMv8.2-A FP16 intrinsics.
-let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && 
defined(__aarch64__)" in {
+// ARMv8.2-A FP16 vector intrinsics for A32/A64.
+let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
 
   // ARMv8.2-A FP16 one-operand vector intrinsics.
 
@@ -1395,14 +1395,12 @@ let ArchGuard = "defined(__ARM_FEATURE_F
   def FRINTPH  : SInst<"vrndp", "dd", "hQh">;
   def FRINTMH  : SInst<"vrndm", "dd", "hQh">;
   def FRINTXH  : SInst<"vrndx", "dd", "hQh">;
-  def FRINTIH  : SInst<"vrndi", "dd", "hQh">;
 
   // Misc.
   def VABSH: SInst<"vabs", "dd", "hQh">;
   def VNEGH: SOpInst<"vneg", "dd", "hQh", OP_NEG>;
   def VRECPEH  : SInst<"vrecpe", "dd", "hQh">;
   def FRSQRTEH : SInst<"vrsqrte", "dd", "hQh">;
-  def FSQRTH   : SInst<"vsqrt", "dd", "hQh">;
 
   // ARMv8.2-A FP16 two-operands vector intrinsics.
 
@@ -1443,18 +1441,13 @@ let ArchGuard = "defined(__ARM_FEATURE_F
 
   // Multiplication/Division
   def VMULH : SOpInst<"vmul", "ddd", "hQh", OP_MUL>;
-  def MULXH : SInst<"vmulx", "ddd", "hQh">;
-  def FDIVH : IOpInst<"vdiv", "ddd",  "hQh", OP_DIV>;
 
   // Pairwise addition
-  def VPADDH: SInst<"vpadd", "ddd", "hQh">;
+  def VPADDH: SInst<"vpadd", "ddd", "h">;
 
   // Pairwise Max/Min
-  def VPMAXH: SInst<"vpmax", "ddd", "hQh">;
-  def VPMINH: SInst<"vpmin", "ddd", "hQh">;
-  // Pairwise MaxNum/MinNum
-  def FMAXNMPH  : SInst<"vpmaxnm", "ddd", "hQh">;
-  def FMINNMPH  : SInst<"vpminnm", "ddd", "hQh">;
+  def VPMAXH: SInst<"vpmax", "ddd", "h">;
+  def VPMINH: SInst<"vpmin", "ddd", "h">;
 
   // Reciprocal/Sqrt
   def VRECPSH   : SInst<"vrecps", "ddd", "hQh">;
@@ -1468,6 +1461,63 @@ let ArchGuard = "defined(__ARM_FEATURE_F
 
   // ARMv8.2-A FP16 lane vector intrinsics.
 
+  // Mul lane
+  def VMUL_LANEH: IOpInst<"vmul_lane", "ddgi", "hQh", OP_MUL_LN>;
+  def VMUL_NH   : IOpInst<"vmul_n", "dds", "hQh", OP_MUL_N>;
+
+  // Data processing intrinsics - section 5
+
+  // Logical operations
+  let isHiddenLInst = 1 in
+  def VBSLH: SInst<"vbsl", "dudd", "hQh">;
+
+  // Transposition operations
+  def VZIPH: WInst<"vzip", "2dd", "hQh">;
+  def VUZPH: WInst<"vuzp", "2dd", "hQh">;
+  def VTRNH: WInst<"vtrn", "2dd", "hQh">;
+
+
+  let ArchGuard = "!defined(__aarch64__)" in {
+// Set all lanes to same value.
+// Already implemented prior to ARMv8.2-A.
+def VMOV_NH  : WOpInst<"vmov_n", "ds", "hQh", OP_DUP>;
+def VDUP_NH  : WOpInst<"vdup_n", "ds", "hQh", OP_DUP>;
+def VDUP_LANE1H : WOpInst<"vdup_lane", "dgi", "hQh", OP_DUP_LN>;
+  }
+
+  // Vector Extract
+  def VEXTH  : WInst<"vext", "dddi", "hQh">;
+
+  // Reverse vector elements
+  def VREV64H: WOpInst<"vrev64", "dd", "hQh", OP_REV64>;
+}
+
+// ARMv8.2-A FP16 vector intrinsics for A64 only.
+let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && 
defined(__aarch64__)" in {
+
+  // Vector rounding
+  def FRINTIH  : SInst<"vrndi", "dd", "hQh">;
+
+  // Misc.
+  def FSQRTH   : SInst<"vsqrt", "dd", "hQh">;
+
+  // Multiplication/Division
+  def MULXH : SInst<"vmulx", "ddd", "hQh">;
+  def FDIVH : IOpInst<"vdiv", "ddd",  "hQh", OP_DIV>;
+
+  // Pairwise addition
+  def VPADDH1   : SInst<"vpadd", "ddd", "Qh">;
+
+  // Pairwise Max/Min
+  def VPMAXH1   : SInst<"vpmax", "ddd", "Qh">;
+  def VPMINH1   : SInst<"vpmin", "ddd", "Qh">;
+
+  // Pairwise MaxNum/MinNum
+  def FMAXNMPH  : SInst<"vpmaxnm", "ddd", "hQh">;
+  def FMINNMPH  : SInst<"vpminnm", "ddd", "hQh">;
+
+  // ARMv8.2-A FP16 lane vector intrinsics.
+
   // FMA lane
   def VFMA_LANEH   : IInst<"vfma_lane", "dddgi", "hQh">;
   def 

r323006 - [AArch64] Add ARMv8.2-A FP16 scalar intrinsics

2018-01-19 Thread Abderrazek Zaafrani via cfe-commits
Author: az
Date: Fri Jan 19 15:11:18 2018
New Revision: 323006

URL: http://llvm.org/viewvc/llvm-project?rev=323006&view=rev
Log:
[AArch64] Add ARMv8.2-A FP16 scalar intrinsics

https://reviews.llvm.org/D41792

Added:
cfe/trunk/include/clang/Basic/arm_fp16.td
cfe/trunk/include/clang/Basic/arm_neon_incl.td
cfe/trunk/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c
Modified:
cfe/trunk/include/clang/Basic/BuiltinsNEON.def
cfe/trunk/include/clang/Basic/CMakeLists.txt
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/lib/Basic/Targets/AArch64.cpp
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/CMakeLists.txt
cfe/trunk/lib/Headers/module.modulemap
cfe/trunk/lib/Sema/SemaChecking.cpp
cfe/trunk/utils/TableGen/NeonEmitter.cpp
cfe/trunk/utils/TableGen/TableGen.cpp
cfe/trunk/utils/TableGen/TableGenBackends.h

Modified: cfe/trunk/include/clang/Basic/BuiltinsNEON.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsNEON.def?rev=323006&r1=323005&r2=323006&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsNEON.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsNEON.def Fri Jan 19 15:11:18 2018
@@ -16,6 +16,7 @@
 
 #define GET_NEON_BUILTINS
 #include "clang/Basic/arm_neon.inc"
+#include "clang/Basic/arm_fp16.inc"
 #undef GET_NEON_BUILTINS
 
 #undef BUILTIN

Modified: cfe/trunk/include/clang/Basic/CMakeLists.txt
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/CMakeLists.txt?rev=323006&r1=323005&r2=323006&view=diff
==
--- cfe/trunk/include/clang/Basic/CMakeLists.txt (original)
+++ cfe/trunk/include/clang/Basic/CMakeLists.txt Fri Jan 19 15:11:18 2018
@@ -46,3 +46,7 @@ clang_tablegen(arm_neon.inc -gen-arm-neo
   -I ${CMAKE_CURRENT_SOURCE_DIR}/../../
   SOURCE arm_neon.td
   TARGET ClangARMNeon)
+clang_tablegen(arm_fp16.inc -gen-arm-neon-sema
+  -I ${CMAKE_CURRENT_SOURCE_DIR}/../../
+  SOURCE arm_fp16.td
+  TARGET ClangARMFP16)

Added: cfe/trunk/include/clang/Basic/arm_fp16.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_fp16.td?rev=323006&view=auto
==
--- cfe/trunk/include/clang/Basic/arm_fp16.td (added)
+++ cfe/trunk/include/clang/Basic/arm_fp16.td Fri Jan 19 15:11:18 2018
@@ -0,0 +1,131 @@
+//===--- arm_fp16.td - ARM FP16 compiler interface 
===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//
+//  This file defines the TableGen definitions from which the ARM FP16 header
+//  file will be generated.
+//
+//===--===//
+
+include "arm_neon_incl.td"
+
+// ARMv8.2-A FP16 intrinsics.
+let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && 
defined(__aarch64__)" in {
+
+  // Negate
+  def VNEGSH  : SInst<"vneg", "ss", "Sh">;
+
+  // Reciprocal/Sqrt
+  def SCALAR_FRECPSH  : IInst<"vrecps", "sss", "Sh">;
+  def FSQRTSH : SInst<"vsqrt", "ss", "Sh">;
+  def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">;
+
+  // Reciprocal Estimate
+  def SCALAR_FRECPEH  : IInst<"vrecpe", "ss", "Sh">;
+
+  // Reciprocal Exponent
+  def SCALAR_FRECPXH  : IInst<"vrecpx", "ss", "Sh">;
+
+  // Reciprocal Square Root Estimate
+  def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">;
+
+  // Rounding
+  def FRINTZ_S64H : SInst<"vrnd", "ss", "Sh">;
+  def FRINTA_S64H : SInst<"vrnda", "ss", "Sh">;
+  def FRINTI_S64H : SInst<"vrndi", "ss", "Sh">;
+  def FRINTM_S64H : SInst<"vrndm", "ss", "Sh">;
+  def FRINTN_S64H : SInst<"vrndn", "ss", "Sh">;
+  def FRINTP_S64H : SInst<"vrndp", "ss", "Sh">;
+  def FRINTX_S64H : SInst<"vrndx", "ss", "Sh">;
+
+  // Conversion
+  def SCALAR_SCVTFSH  : SInst<"vcvth_f16", "Ys", "silUsUiUl">;
+  def SCALAR_FCVTZSH  : SInst<"vcvt_s16", "$s", "Sh">;
+  def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">;
+  def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">;
+  def SCALAR_FCVTZUH  : SInst<"vcvt_u16", "bs", "Sh">;
+  def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">;
+  def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">;
+  def SCALAR_FCVTASH  : SInst<"vcvta_s16", "$s", "Sh">;
+  def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">;
+  def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">;
+  def SCALAR_FCVTAUH  : SInst<"vcvta_u16", "bs", "Sh">;
+  def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">;
+  def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">;
+  def SCALAR_FCVTMSH  : SInst<"vcvtm_s16", "$s", "Sh">;
+  def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">;
+  def SCALAR_FCVTM