[PATCH] D100425: [WebAssembly] Codegen for f64x2.convert_low_i32x4_{s,u}

Thomas Lively via Phabricator via cfe-commits Tue, 13 Apr 2021 16:23:54 -0700

tlively created this revision.
tlively added reviewers: aheejin, dschuff.
Herald added subscribers: wingo, ecnelises, sunfish, hiraditya, 
jgravelle-google, sbc100.
tlively requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.


Add a custom DAG combine and ISD opcode for detecting patterns like

  (uint_to_fp (extract_subvector ...))

before the extract_subvector is expanded to ensure that they will ultimately
lower to f64x2.convert_low_i32x4_{s,u} instructions. Since these instructions
are no longer prototypes and can now be produced via standard IR, this commit
also removes the target intrinsics and builtins that had been used to prototype
the instructions.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100425

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-conversions.ll
  llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll

Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -843,26 +843,6 @@
   ret <2 x double> %v
 }
 
-; CHECK-LABEL: convert_low_signed_v2f64:
-; CHECK-NEXT: .functype convert_low_signed_v2f64 (v128) -> (v128){{$}}
-; CHECK-NEXT: f64x2.convert_low_i32x4_s $push[[R:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.convert.low.signed(<4 x i32>)
-define <2 x double> @convert_low_signed_v2f64(<4 x i32> %a) {
-  %v = call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %a)
-  ret <2 x double> %v
-}
-
-; CHECK-LABEL: convert_low_unsigned_v2f64:
-; CHECK-NEXT: .functype convert_low_unsigned_v2f64 (v128) -> (v128){{$}}
-; CHECK-NEXT: f64x2.convert_low_i32x4_u $push[[R:[0-9]+]]=, $0{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
-declare <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32>)
-define <2 x double> @convert_low_unsigned_v2f64(<4 x i32> %a) {
-  %v = call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %a)
-  ret <2 x double> %v
-}
-
 ; CHECK-LABEL: promote_low_v2f64:
 ; CHECK-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}}
 ; CHECK-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0{{$}}
Index: llvm/test/CodeGen/WebAssembly/simd-conversions.ll
===================================================================
--- llvm/test/CodeGen/WebAssembly/simd-conversions.ll
+++ llvm/test/CodeGen/WebAssembly/simd-conversions.ll
@@ -81,3 +81,25 @@
   %a = fptoui <2 x double> %x to <2 x i64>
   ret <2 x i64> %a
 }
+
+; CHECK-LABEL: convert_low_s_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype convert_low_s_v2f64 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.convert_low_i32x4_s $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <2 x double> @convert_low_s_v2f64(<4 x i32> %x) {
+  %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %a = sitofp <2 x i32> %v to <2 x double>
+  ret <2 x double> %a
+}
+
+; CHECK-LABEL: convert_low_u_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .functype convert_low_u_v2f64 (v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.convert_low_i32x4_u $push[[R:[0-9]+]]=, $0
+; SIMD128-NEXT: return $pop[[R]]
+define <2 x double> @convert_low_u_v2f64(<4 x i32> %x) {
+  %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %a = uitofp <2 x i32> %v to <2 x double>
+  ret <2 x double> %a
+}
Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1091,16 +1091,21 @@
 defm "" : SIMDConvert<I32x4, F32x4, fp_to_sint, "trunc_sat_f32x4_s", 248>;
 defm "" : SIMDConvert<I32x4, F32x4, fp_to_uint, "trunc_sat_f32x4_u", 249>;
 
-// Integer to floating point: convert
-defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>;
-defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>;
-
 // Lower llvm.wasm.trunc.sat.* to saturating instructions
 def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))),
           (fp_to_sint_I32x4 $src)>;
 def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))),
           (fp_to_uint_I32x4 $src)>;
 
+// Integer to floating point: convert
+def convert_low_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
+def convert_low_s : SDNode<"WebAssemblyISD::CONVERT_LOW_S", convert_low_t>;
+def convert_low_u : SDNode<"WebAssemblyISD::CONVERT_LOW_U", convert_low_t>;
+defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>;
+defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>;
+defm "" : SIMDConvert<F64x2, I32x4, convert_low_s, "convert_low_i32x4_s", 0xfe>;
+defm "" : SIMDConvert<F64x2, I32x4, convert_low_u, "convert_low_i32x4_u", 0xff>;
+
 // Extending operations
 def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
 def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>;
@@ -1255,10 +1260,6 @@
                       "trunc_sat_zero_f64x2_s", 0xfc>;
 defm "" : SIMDConvert<I32x4, F64x2, int_wasm_trunc_sat_zero_unsigned,
                       "trunc_sat_zero_f64x2_u", 0xfd>;
-defm "" : SIMDConvert<F64x2, I32x4, int_wasm_convert_low_signed,
-                      "convert_low_i32x4_s", 0xfe>;
-defm "" : SIMDConvert<F64x2, I32x4, int_wasm_convert_low_unsigned,
-                      "convert_low_i32x4_u", 0xff>;
 
 //===----------------------------------------------------------------------===//
 // Saturating Rounding Q-Format Multiplication
Index: llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -130,6 +130,10 @@
     setTargetDAGCombine(ISD::SIGN_EXTEND);
     setTargetDAGCombine(ISD::ZERO_EXTEND);
 
+    // Combine {s,u}int_to_fp of extract_vectors into conversion ops
+    setTargetDAGCombine(ISD::SINT_TO_FP);
+    setTargetDAGCombine(ISD::UINT_TO_FP);
+
     // Support saturating add for i8x16 and i16x8
     for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
       for (auto T : {MVT::v16i8, MVT::v8i16})
@@ -2021,6 +2025,40 @@
   return DAG.getNode(Op, SDLoc(N), ResVT, Source);
 }
 
+static SDValue
+performVectorConvertLowCombine(SDNode *N,
+                               TargetLowering::DAGCombinerInfo &DCI) {
+  auto &DAG = DCI.DAG;
+  assert(N->getOpcode() == ISD::SINT_TO_FP ||
+         N->getOpcode() == ISD::UINT_TO_FP);
+
+  // Combine ({s,u}int_to_fp (extract_subvector ... 0)) to an
+  // f64x2.convert_low_i32x4_{s,u} SDNode.
+  auto Extract = N->getOperand(0);
+  if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+    return SDValue();
+  auto Source = Extract.getOperand(0);
+  auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
+  if (IndexNode == nullptr)
+    return SDValue();
+  auto Index = IndexNode->getZExtValue();
+
+  // The types must be correct.
+  EVT ResVT = N->getValueType(0);
+  if (ResVT != MVT::v2f64 || Extract.getValueType() != MVT::v2i32)
+    return SDValue();
+
+  // The extracted vector must be the low half.
+  if (Index != 0)
+    return SDValue();
+
+  unsigned Op = N->getOpcode() == ISD::SINT_TO_FP
+                    ? WebAssemblyISD::CONVERT_LOW_S
+                    : WebAssemblyISD::CONVERT_LOW_U;
+
+  return DAG.getNode(Op, SDLoc(N), ResVT, Source);
+}
+
 SDValue
 WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
@@ -2032,5 +2070,8 @@
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
     return performVectorExtendCombine(N, DCI);
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    return performVectorConvertLowCombine(N, DCI);
   }
 }
Index: llvm/lib/Target/WebAssembly/WebAssemblyISD.def
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -33,6 +33,8 @@
 HANDLE_NODETYPE(EXTEND_LOW_U)
 HANDLE_NODETYPE(EXTEND_HIGH_S)
 HANDLE_NODETYPE(EXTEND_HIGH_U)
+HANDLE_NODETYPE(CONVERT_LOW_S)
+HANDLE_NODETYPE(CONVERT_LOW_U)
 HANDLE_NODETYPE(THROW)
 HANDLE_NODETYPE(CATCH)
 HANDLE_NODETYPE(MEMORY_COPY)
Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -295,12 +295,6 @@
             [IntrNoMem, IntrSpeculatable]>;
 
 // TODO: Remove these if possible if they are merged to the spec.
-def int_wasm_convert_low_signed :
-  Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty],
-            [IntrNoMem, IntrSpeculatable]>;
-def int_wasm_convert_low_unsigned :
-  Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty],
-            [IntrNoMem, IntrSpeculatable]>;
 def int_wasm_trunc_sat_zero_signed :
   Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty],
             [IntrNoMem, IntrSpeculatable]>;
Index: clang/test/CodeGen/builtins-wasm.c
===================================================================
--- clang/test/CodeGen/builtins-wasm.c
+++ clang/test/CodeGen/builtins-wasm.c
@@ -914,18 +914,6 @@
   // WEBASSEMBLY: ret
 }
 
-f64x2 convert_low_s_i32x4_f64x2(i32x4 x) {
-  return __builtin_wasm_convert_low_s_i32x4_f64x2(x);
-  // WEBASSEMBLY: call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %x)
-  // WEBASSEMBLY: ret
-}
-
-f64x2 convert_low_u_i32x4_f64x2(u32x4 x) {
-  return __builtin_wasm_convert_low_u_i32x4_f64x2(x);
-  // WEBASSEMBLY: call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %x)
-  // WEBASSEMBLY: ret
-}
-
 i32x4 trunc_sat_zero_s_f64x2_i32x4(f64x2 x) {
   return __builtin_wasm_trunc_sat_zero_s_f64x2_i32x4(x);
   // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.signed(<2 x double> %x)
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -17500,23 +17500,6 @@
     Function *Callee = CGM.getIntrinsic(IntNo);
     return Builder.CreateCall(Callee, Vec);
   }
-  case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2:
-  case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2: {
-    Value *Vec = EmitScalarExpr(E->getArg(0));
-    unsigned IntNo;
-    switch (BuiltinID) {
-    case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2:
-      IntNo = Intrinsic::wasm_convert_low_signed;
-      break;
-    case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2:
-      IntNo = Intrinsic::wasm_convert_low_unsigned;
-      break;
-    default:
-      llvm_unreachable("unexpected builtin ID");
-    }
-    Function *Callee = CGM.getIntrinsic(IntNo);
-    return Builder.CreateCall(Callee, Vec);
-  }
   case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4:
   case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: {
     Value *Vec = EmitScalarExpr(E->getArg(0));
Index: clang/include/clang/Basic/BuiltinsWebAssembly.def
===================================================================
--- clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -196,8 +196,6 @@
 TARGET_BUILTIN(__builtin_wasm_extend_low_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_extend_high_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128")
 
-TARGET_BUILTIN(__builtin_wasm_convert_low_s_i32x4_f64x2, "V2dV4i", "nc", "simd128")
-TARGET_BUILTIN(__builtin_wasm_convert_low_u_i32x4_f64x2, "V2dV4Ui", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_demote_zero_f64x2_f32x4, "V4fV2d", "nc", "simd128")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D100425: [WebAssembly] Codegen for f64x2.convert_low_i32x4_{s,u}

Reply via email to