Author: Thomas Lively Date: 2021-04-14T10:42:45-07:00 New Revision: af7925b4dd6519ebf0126ac8a18d791eb26968c9
URL: https://github.com/llvm/llvm-project/commit/af7925b4dd6519ebf0126ac8a18d791eb26968c9 DIFF: https://github.com/llvm/llvm-project/commit/af7925b4dd6519ebf0126ac8a18d791eb26968c9.diff LOG: [WebAssembly] Codegen for f64x2.convert_low_i32x4_{s,u} Add a custom DAG combine and ISD opcode for detecting patterns like (uint_to_fp (extract_subvector ...)) before the extract_subvector is expanded to ensure that they will ultimately lower to f64x2.convert_low_i32x4_{s,u} instructions. Since these instructions are no longer prototypes and can now be produced via standard IR, this commit also removes the target intrinsics and builtins that had been used to prototype the instructions. Differential Revision: https://reviews.llvm.org/D100425 Added: Modified: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/builtins-wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/WebAssemblyISD.def llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-conversions.ll llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 3f8b050aabfd1..db8ec8ebeb302 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -196,8 +196,6 @@ TARGET_BUILTIN(__builtin_wasm_extend_high_s_i32x4_i64x2, "V2LLiV4i", "nc", "simd TARGET_BUILTIN(__builtin_wasm_extend_low_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_extend_high_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_convert_low_s_i32x4_f64x2, "V2dV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_convert_low_u_i32x4_f64x2, "V2dV4Ui", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_demote_zero_f64x2_f32x4, "V4fV2d", "nc", "simd128") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b9e3110da8345..7871dfd65d53e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17500,23 +17500,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(IntNo); return Builder.CreateCall(Callee, Vec); } - case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2: - case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2: { - Value *Vec = EmitScalarExpr(E->getArg(0)); - unsigned IntNo; - switch (BuiltinID) { - case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2: - IntNo = Intrinsic::wasm_convert_low_signed; - break; - case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2: - IntNo = Intrinsic::wasm_convert_low_unsigned; - break; - default: - llvm_unreachable("unexpected builtin ID"); - } - Function *Callee = CGM.getIntrinsic(IntNo); - return Builder.CreateCall(Callee, Vec); - } case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4: case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: { Value *Vec = EmitScalarExpr(E->getArg(0)); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 7b7965c026e1a..a5c6f4423c3b4 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -914,18 +914,6 @@ u64x2 extend_high_u_i32x4_i64x2(u32x4 x) { // WEBASSEMBLY: ret } -f64x2 convert_low_s_i32x4_f64x2(i32x4 x) { - return __builtin_wasm_convert_low_s_i32x4_f64x2(x); - // WEBASSEMBLY: call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %x) - // WEBASSEMBLY: ret -} - -f64x2 convert_low_u_i32x4_f64x2(u32x4 x) { - return __builtin_wasm_convert_low_u_i32x4_f64x2(x); - // WEBASSEMBLY: call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %x) - // WEBASSEMBLY: ret -} - i32x4 trunc_sat_zero_s_f64x2_i32x4(f64x2 x) { return __builtin_wasm_trunc_sat_zero_s_f64x2_i32x4(x); // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.signed(<2 x double> %x) diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index f4bdd07b81082..977647db92adf 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -275,12 +275,6 @@ def int_wasm_extadd_pairwise_unsigned : [IntrNoMem, IntrSpeculatable]>; // TODO: Remove these if possible if they are merged to the spec. -def int_wasm_convert_low_signed : - Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_convert_low_unsigned : - Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], - [IntrNoMem, IntrSpeculatable]>; def int_wasm_trunc_sat_zero_signed : Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem, IntrSpeculatable]>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index 3a82dd45a5f65..c73ce43057f85 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -33,6 +33,8 @@ HANDLE_NODETYPE(EXTEND_LOW_S) HANDLE_NODETYPE(EXTEND_LOW_U) HANDLE_NODETYPE(EXTEND_HIGH_S) HANDLE_NODETYPE(EXTEND_HIGH_U) +HANDLE_NODETYPE(CONVERT_LOW_S) +HANDLE_NODETYPE(CONVERT_LOW_U) HANDLE_NODETYPE(THROW) HANDLE_NODETYPE(CATCH) HANDLE_NODETYPE(MEMORY_COPY) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index d8d049c6dc817..a2f0d1164b6a2 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -130,6 +130,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); + // Combine {s,u}int_to_fp of extract_vectors into conversion ops + setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::UINT_TO_FP); + // Support saturating add for i8x16 and i16x8 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) for (auto T : {MVT::v16i8, MVT::v8i16}) @@ -2020,6 +2024,40 @@ performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return DAG.getNode(Op, SDLoc(N), ResVT, Source); } +static SDValue +performVectorConvertLowCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + auto &DAG = DCI.DAG; + assert(N->getOpcode() == ISD::SINT_TO_FP || + N->getOpcode() == ISD::UINT_TO_FP); + + // Combine ({s,u}int_to_fp (extract_subvector ... 0)) to an + // f64x2.convert_low_i32x4_{s,u} SDNode. + auto Extract = N->getOperand(0); + if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + auto Source = Extract.getOperand(0); + auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1)); + if (IndexNode == nullptr) + return SDValue(); + auto Index = IndexNode->getZExtValue(); + + // The types must be correct. + EVT ResVT = N->getValueType(0); + if (ResVT != MVT::v2f64 || Extract.getValueType() != MVT::v2i32) + return SDValue(); + + // The extracted vector must be the low half. + if (Index != 0) + return SDValue(); + + unsigned Op = N->getOpcode() == ISD::SINT_TO_FP + ? WebAssemblyISD::CONVERT_LOW_S + : WebAssemblyISD::CONVERT_LOW_U; + + return DAG.getNode(Op, SDLoc(N), ResVT, Source); +} + SDValue WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -2031,5 +2069,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: return performVectorExtendCombine(N, DCI); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return performVectorConvertLowCombine(N, DCI); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index c40714c376cd7..93f7c15d14446 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1091,16 +1091,21 @@ multiclass SIMDConvert<Vec vec, Vec arg, SDPatternOperator op, string name, defm "" : SIMDConvert<I32x4, F32x4, fp_to_sint, "trunc_sat_f32x4_s", 248>; defm "" : SIMDConvert<I32x4, F32x4, fp_to_uint, "trunc_sat_f32x4_u", 249>; -// Integer to floating point: convert -defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>; -defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>; - // Lower llvm.wasm.trunc.sat.* to saturating instructions def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), (fp_to_sint_I32x4 $src)>; def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), (fp_to_uint_I32x4 $src)>; +// Integer to floating point: convert +def convert_low_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; +def convert_low_s : SDNode<"WebAssemblyISD::CONVERT_LOW_S", convert_low_t>; +def convert_low_u : SDNode<"WebAssemblyISD::CONVERT_LOW_U", convert_low_t>; +defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>; +defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>; +defm "" : SIMDConvert<F64x2, I32x4, convert_low_s, "convert_low_i32x4_s", 0xfe>; +defm "" : SIMDConvert<F64x2, I32x4, convert_low_u, "convert_low_i32x4_u", 0xff>; + // Extending operations def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>; @@ -1255,10 +1260,6 @@ defm "" : SIMDConvert<I32x4, F64x2, int_wasm_trunc_sat_zero_signed, "trunc_sat_zero_f64x2_s", 0xfc>; defm "" : SIMDConvert<I32x4, F64x2, int_wasm_trunc_sat_zero_unsigned, "trunc_sat_zero_f64x2_u", 0xfd>; -defm "" : SIMDConvert<F64x2, I32x4, int_wasm_convert_low_signed, - "convert_low_i32x4_s", 0xfe>; -defm "" : SIMDConvert<F64x2, I32x4, int_wasm_convert_low_unsigned, - "convert_low_i32x4_u", 0xff>; //===----------------------------------------------------------------------===// // Saturating Rounding Q-Format Multiplication diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll index 36856336e65e5..431d559220409 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -81,3 +81,25 @@ define <2 x i64> @trunc_sat_u_v2i64(<2 x double> %x) { %a = fptoui <2 x double> %x to <2 x i64> ret <2 x i64> %a } + +; CHECK-LABEL: convert_low_s_v2f64: +; NO-SIMD128-NOT: f64x2 +; SIMD128-NEXT: .functype convert_low_s_v2f64 (v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.convert_low_i32x4_s $push[[R:[0-9]+]]=, $0 +; SIMD128-NEXT: return $pop[[R]] +define <2 x double> @convert_low_s_v2f64(<4 x i32> %x) { + %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1> + %a = sitofp <2 x i32> %v to <2 x double> + ret <2 x double> %a +} + +; CHECK-LABEL: convert_low_u_v2f64: +; NO-SIMD128-NOT: f64x2 +; SIMD128-NEXT: .functype convert_low_u_v2f64 (v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.convert_low_i32x4_u $push[[R:[0-9]+]]=, $0 +; SIMD128-NEXT: return $pop[[R]] +define <2 x double> @convert_low_u_v2f64(<4 x i32> %x) { + %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1> + %a = uitofp <2 x i32> %v to <2 x double> + ret <2 x double> %a +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index f28eb0b242a52..5df5ae9a21bde 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -843,26 +843,6 @@ define <2 x double> @nearest_v2f64(<2 x double> %a) { ret <2 x double> %v } -; CHECK-LABEL: convert_low_signed_v2f64: -; CHECK-NEXT: .functype convert_low_signed_v2f64 (v128) -> (v128){{$}} -; CHECK-NEXT: f64x2.convert_low_i32x4_s $push[[R:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x double> @llvm.wasm.convert.low.signed(<4 x i32>) -define <2 x double> @convert_low_signed_v2f64(<4 x i32> %a) { - %v = call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %a) - ret <2 x double> %v -} - -; CHECK-LABEL: convert_low_unsigned_v2f64: -; CHECK-NEXT: .functype convert_low_unsigned_v2f64 (v128) -> (v128){{$}} -; CHECK-NEXT: f64x2.convert_low_i32x4_u $push[[R:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32>) -define <2 x double> @convert_low_unsigned_v2f64(<4 x i32> %a) { - %v = call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %a) - ret <2 x double> %v -} - ; CHECK-LABEL: promote_low_v2f64: ; CHECK-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}} ; CHECK-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0{{$}} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits