[PATCH] D158626: [AArch64] Add missing vrnd intrinsics

Max Iyengar via Phabricator via cfe-commits Wed, 23 Aug 2023 08:23:31 -0700

miyengar created this revision.
miyengar added a reviewer: vhscampos.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
miyengar requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.


This patch adds 8 missing intrinsics as specified in the Arm ACLE document 
section 2.12.1.1 : 
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#rounding-3 
<https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#rounding-3>

The intrinsics implemented are:

- vrnd32z_f64
- vrnd32zq_f64
- vrnd64z_f64
- vrnd64zq_f64
- vrnd32x_f64
- vrnd32xq_f64
- vrnd64x_f64
- vrnd64xq_f64


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D158626

Files:
  clang/include/clang/Basic/arm_neon.td
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
  llvm/lib/Target/AArch64/AArch64InstrFormats.td
  llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll

Index: llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
===================================================================
--- llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
+++ llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
@@ -81,3 +81,85 @@
   %val = tail call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
   ret <4 x float> %val
 }
+
+declare <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd32x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32x_f64:
+; CHECK:         frint32x d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32xq_f64:
+; CHECK:         frint32x v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd32z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32z_f64:
+; CHECK:         frint32z d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32zq_f64:
+; CHECK:         frint32z v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd64x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64x_f64:
+; CHECK:         frint64x d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64xq_f64:
+; CHECK:         frint64x v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd64z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64z_f64:
+; CHECK:         frint64z d0, d0
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a)
+  ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64zq_f64:
+; CHECK:         frint64z v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %val = tail call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a)
+  ret <2 x double> %val
+}
Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -6282,24 +6282,30 @@
     : SIMDTwoVectorFP<U, S, opc, asm, OpNode, 0>;
 
 // Supports only S and D element sizes
-let mayRaiseFPException = 1, Uses = [FPCR] in
-multiclass SIMDTwoVectorSD<bit U, bits<5> opc, string asm,
+multiclass SIMDTwoVectorSD<bit U, bit opc, string asm,
                            SDPatternOperator OpNode = null_frag> {
-
-  def v2f32 : BaseSIMDTwoSameVector<0, U, 00, opc, 0b00, V64,
+  let mayRaiseFPException = 1, Uses = [FPCR] in {
+    def v2f32 : BaseSIMDTwoSameVector<0, U, 00, {0b1111, opc}, 0b00, V64,
                                 asm, ".2s", ".2s",
                           [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, 00, opc, 0b00, V128,
+    def v4f32 : BaseSIMDTwoSameVector<1, U, 00, {0b1111, opc}, 0b00, V128,
                                 asm, ".4s", ".4s",
                           [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
-  def v2f64 : BaseSIMDTwoSameVector<1, U, 01, opc, 0b00, V128,
+    def v2f64 : BaseSIMDTwoSameVector<1, U, 01, {0b1111, opc}, 0b00, V128,
                                 asm, ".2d", ".2d",
                           [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+    def f64 : BaseSingleOperandFPData<{0b0100, U, opc},
+                                FPR64, f64, asm, null_frag>;
+  }
+
+  def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn))),
+           (!cast<Instruction>(NAME # f64) FPR64:$Rn)>;
+
 }
 
 multiclass FRIntNNTVector<bit U, bit op, string asm,
                           SDPatternOperator OpNode = null_frag> :
-           SIMDTwoVectorSD<U, {0b1111,op}, asm, OpNode>;
+           SIMDTwoVectorSD<U, op, asm, OpNode>;
 
 // Supports only S element size.
 multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
Index: clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
===================================================================
--- clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
+++ clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
@@ -62,3 +62,59 @@
 float32x4_t test_vrnd64zq_f32(float32x4_t a) {
   return vrnd64zq_f32(a);
 }
+
+// CHECK-LABEL: test_vrnd32x_f64
+// CHECK:  [[RND:%.*]] =  call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a)
+// CHECK:  ret <1 x double> [[RND]]
+float64x1_t test_vrnd32x_f64(float64x1_t a) {
+  return vrnd32x_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd32xq_f64
+// CHECK:  [[RND:%.*]] =  call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a)
+// CHECK:  ret <2 x double> [[RND]]
+float64x2_t test_vrnd32xq_f64(float64x2_t a) {
+  return vrnd32xq_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd32z_f64
+// CHECK:  [[RND:%.*]] =  call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> %a)
+// CHECK:  ret <1 x double> [[RND]]
+float64x1_t test_vrnd32z_f64(float64x1_t a) {
+  return vrnd32z_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd32zq_f64
+// CHECK:  [[RND:%.*]] =  call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> %a)
+// CHECK:  ret <2 x double> [[RND]]
+float64x2_t test_vrnd32zq_f64(float64x2_t a) {
+  return vrnd32zq_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd64x_f64
+// CHECK:  [[RND:%.*]] =  call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a)
+// CHECK:  ret <1 x double> [[RND]]
+float64x1_t test_vrnd64x_f64(float64x1_t a) {
+  return vrnd64x_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd64xq_f64
+// CHECK:  [[RND:%.*]] =  call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a)
+// CHECK:  ret <2 x double> [[RND]]
+float64x2_t test_vrnd64xq_f64(float64x2_t a) {
+  return vrnd64xq_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd64z_f64
+// CHECK:  [[RND:%.*]] =  call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a)
+// CHECK:  ret <1 x double> [[RND]]
+float64x1_t test_vrnd64z_f64(float64x1_t a) {
+  return vrnd64z_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd64zq_f64
+// CHECK:  [[RND:%.*]] =  call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a)
+// CHECK:  ret <2 x double> [[RND]]
+float64x2_t test_vrnd64zq_f64(float64x2_t a) {
+  return vrnd64zq_f64(a);
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -6371,13 +6371,21 @@
   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
   NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
+  NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
   NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
+  NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
   NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
+  NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
   NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
+  NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
   NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
+  NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
   NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
+  NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
   NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
+  NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
   NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
+  NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
   NEONMAP0(vrndi_v),
   NEONMAP0(vrndiq_v),
   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
@@ -11739,25 +11747,33 @@
     return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
   }
   case NEON::BI__builtin_neon_vrnd32x_f32:
-  case NEON::BI__builtin_neon_vrnd32xq_f32: {
+  case NEON::BI__builtin_neon_vrnd32xq_f32:
+  case NEON::BI__builtin_neon_vrnd32x_f64:
+  case NEON::BI__builtin_neon_vrnd32xq_f64: {
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Int = Intrinsic::aarch64_neon_frint32x;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
   }
   case NEON::BI__builtin_neon_vrnd32z_f32:
-  case NEON::BI__builtin_neon_vrnd32zq_f32: {
+  case NEON::BI__builtin_neon_vrnd32zq_f32:
+  case NEON::BI__builtin_neon_vrnd32z_f64:
+  case NEON::BI__builtin_neon_vrnd32zq_f64: {
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Int = Intrinsic::aarch64_neon_frint32z;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
   }
   case NEON::BI__builtin_neon_vrnd64x_f32:
-  case NEON::BI__builtin_neon_vrnd64xq_f32: {
+  case NEON::BI__builtin_neon_vrnd64xq_f32:
+  case NEON::BI__builtin_neon_vrnd64x_f64:
+  case NEON::BI__builtin_neon_vrnd64xq_f64: {
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Int = Intrinsic::aarch64_neon_frint64x;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
   }
   case NEON::BI__builtin_neon_vrnd64z_f32:
-  case NEON::BI__builtin_neon_vrnd64zq_f32: {
+  case NEON::BI__builtin_neon_vrnd64zq_f32:
+  case NEON::BI__builtin_neon_vrnd64z_f64:
+  case NEON::BI__builtin_neon_vrnd64zq_f64: {
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Int = Intrinsic::aarch64_neon_frint64z;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
Index: clang/include/clang/Basic/arm_neon.td
===================================================================
--- clang/include/clang/Basic/arm_neon.td
+++ clang/include/clang/Basic/arm_neon.td
@@ -1232,6 +1232,11 @@
 def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">;
 def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">;
 def FRINT64Z_S32 : SInst<"vrnd64z", "..", "fQf">;
+
+def FRINT32X_S64 : SInst<"vrnd32x", "..", "dQd">;
+def FRINT32Z_S64 : SInst<"vrnd32z", "..", "dQd">;
+def FRINT64X_S64 : SInst<"vrnd64x", "..", "dQd">;
+def FRINT64Z_S64 : SInst<"vrnd64z", "..", "dQd">;
 }
 
 ////////////////////////////////////////////////////////////////////////////////

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D158626: [AArch64] Add missing vrnd intrinsics

Reply via email to