[PATCH] D79742: [WebAssembly] Implement pseudo-min/max SIMD instructions

Thomas Lively via Phabricator via cfe-commits Mon, 11 May 2020 13:29:47 -0700

tlively created this revision.
tlively added a reviewer: aheejin.
Herald added subscribers: llvm-commits, cfe-commits, sunfish, hiraditya, 
jgravelle-google, sbc100, dschuff.
Herald added projects: clang, LLVM.


As proposed in https://github.com/WebAssembly/simd/pull/122. Since
these instructions are not yet merged to the SIMD spec proposal, this
patch makes them entirely opt-in by surfacing them only through LLVM
intrinsics and clang builtins. If these instructions are made
official, these intrinsics and builtins should be replaced with simple
instruction patterns.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D79742

Files:
  clang/include/clang/Basic/BuiltinsWebAssembly.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/wasm_simd128.h
  clang/test/CodeGen/builtins-wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
  llvm/test/MC/WebAssembly/simd-encodings.s

Index: llvm/test/MC/WebAssembly/simd-encodings.s
===================================================================
--- llvm/test/MC/WebAssembly/simd-encodings.s
+++ llvm/test/MC/WebAssembly/simd-encodings.s
@@ -535,6 +535,12 @@
     # CHECK: f32x4.max # encoding: [0xfd,0xe9,0x01]
     f32x4.max
 
+    # CHECK: f32x4.pmin # encoding: [0xfd,0xea,0x01]
+    f32x4.pmin
+
+    # CHECK: f32x4.pmax # encoding: [0xfd,0xeb,0x01]
+    f32x4.pmax
+
     # CHECK: f64x2.abs # encoding: [0xfd,0xec,0x01]
     f64x2.abs
 
@@ -562,6 +568,12 @@
     # CHECK: f64x2.max # encoding: [0xfd,0xf5,0x01]
     f64x2.max
 
+    # CHECK: f64x2.pmin # encoding: [0xfd,0xf6,0x01]
+    f64x2.pmin
+
+    # CHECK: f64x2.pmax # encoding: [0xfd,0xf7,0x01]
+    f64x2.pmax
+
     # CHECK: i32x4.trunc_sat_f32x4_s # encoding: [0xfd,0xf8,0x01]
     i32x4.trunc_sat_f32x4_s
 
Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
+++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
@@ -501,6 +501,26 @@
   ret <4 x float> %a
 }
 
+; CHECK-LABEL: pmin_v4f32:
+; SIMD128-NEXT: .functype pmin_v4f32 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f32x4.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x float> @llvm.wasm.pmin.v4f32(<4 x float>, <4 x float>)
+define <4 x float> @pmin_v4f32(<4 x float> %a, <4 x float> %b) {
+  %v = call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %v
+}
+
+; CHECK-LABEL: pmax_v4f32:
+; SIMD128-NEXT: .functype pmax_v4f32 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f32x4.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x float> @llvm.wasm.pmax.v4f32(<4 x float>, <4 x float>)
+define <4 x float> @pmax_v4f32(<4 x float> %a, <4 x float> %b) {
+  %v = call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %v
+}
+
 ; CHECK-LABEL: qfma_v4f32:
 ; SIMD128-NEXT: .functype qfma_v4f32 (v128, v128, v128) -> (v128){{$}}
 ; SIMD128-NEXT: f32x4.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
@@ -540,6 +560,26 @@
   ret <2 x double> %a
 }
 
+; CHECK-LABEL: pmin_v2f64:
+; SIMD128-NEXT: .functype pmin_v2f64 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x double> @llvm.wasm.pmin.v2f64(<2 x double>, <2 x double>)
+define <2 x double> @pmin_v2f64(<2 x double> %a, <2 x double> %b) {
+  %v = call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %v
+}
+
+; CHECK-LABEL: pmax_v2f64:
+; SIMD128-NEXT: .functype pmax_v2f64 (v128, v128) -> (v128){{$}}
+; SIMD128-NEXT: f64x2.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x double> @llvm.wasm.pmax.v2f64(<2 x double>, <2 x double>)
+define <2 x double> @pmax_v2f64(<2 x double> %a, <2 x double> %b) {
+  %v = call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %v
+}
+
 ; CHECK-LABEL: qfma_v2f64:
 ; SIMD128-NEXT: .functype qfma_v2f64 (v128, v128, v128) -> (v128){{$}}
 ; SIMD128-NEXT: f64x2.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
===================================================================
--- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -794,6 +794,12 @@
 // NaN-propagating maximum: max
 defm MAX : SIMDBinaryFP<fmaximum, "max", 233>;
 
+// Pseudo-minimum: pmin
+defm PMIN : SIMDBinaryFP<int_wasm_pmin, "pmin", 234>;
+
+// Pseudo-maximum: pmax
+defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>;
+
 //===----------------------------------------------------------------------===//
 // Conversions
 //===----------------------------------------------------------------------===//
Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -176,6 +176,17 @@
             [llvm_anyvector_ty],
             [IntrNoMem, IntrSpeculatable]>;
 
+// TODO: Replace these intrinsics with normal ISel patterns once the
+// pmin/pmax instructions are merged to the spec proposal.
+def int_wasm_pmin :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_pmax :
+  Intrinsic<[llvm_anyvector_ty],
+            [LLVMMatchType<0>, LLVMMatchType<0>],
+            [IntrNoMem, IntrSpeculatable]>;
+
 //===----------------------------------------------------------------------===//
 // Bulk memory intrinsics
 //===----------------------------------------------------------------------===//
Index: clang/test/CodeGen/builtins-wasm.c
===================================================================
--- clang/test/CodeGen/builtins-wasm.c
+++ clang/test/CodeGen/builtins-wasm.c
@@ -579,6 +579,20 @@
   // WEBASSEMBLY-NEXT: ret
 }
 
+f32x4 pmin_f32x4(f32x4 x, f32x4 y) {
+  return __builtin_wasm_pmin_f32x4(x, y);
+  // WEBASSEMBLY: call <4 x float> @llvm.wasm.pmin.v4f32(
+  // WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+f32x4 pmax_f32x4(f32x4 x, f32x4 y) {
+  return __builtin_wasm_pmax_f32x4(x, y);
+  // WEBASSEMBLY: call <4 x float> @llvm.wasm.pmax.v4f32(
+  // WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
 f64x2 min_f64x2(f64x2 x, f64x2 y) {
   return __builtin_wasm_min_f64x2(x, y);
   // WEBASSEMBLY: call <2 x double> @llvm.minimum.v2f64(
@@ -593,6 +607,20 @@
   // WEBASSEMBLY-NEXT: ret
 }
 
+f64x2 pmin_f64x2(f64x2 x, f64x2 y) {
+  return __builtin_wasm_pmin_f64x2(x, y);
+  // WEBASSEMBLY: call <2 x double> @llvm.wasm.pmin.v2f64(
+  // WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
+f64x2 pmax_f64x2(f64x2 x, f64x2 y) {
+  return __builtin_wasm_pmax_f64x2(x, y);
+  // WEBASSEMBLY: call <2 x double> @llvm.wasm.pmax.v2f64(
+  // WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y)
+  // WEBASSEMBLY-NEXT: ret
+}
+
 f32x4 sqrt_f32x4(f32x4 x) {
   return __builtin_wasm_sqrt_f32x4(x);
   // WEBASSEMBLY: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
Index: clang/lib/Headers/wasm_simd128.h
===================================================================
--- clang/lib/Headers/wasm_simd128.h
+++ clang/lib/Headers/wasm_simd128.h
@@ -937,6 +937,16 @@
   return (v128_t)__builtin_wasm_max_f32x4((__f32x4)__a, (__f32x4)__b);
 }
 
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmin(v128_t __a,
+                                                            v128_t __b) {
+  return (v128_t)__builtin_wasm_pmin_f32x4((__f32x4)__a, (__f32x4)__b);
+}
+
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmax(v128_t __a,
+                                                            v128_t __b) {
+  return (v128_t)__builtin_wasm_pmax_f32x4((__f32x4)__a, (__f32x4)__b);
+}
+
 static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t __a) {
   return (v128_t)__builtin_wasm_abs_f64x2((__f64x2)__a);
 }
@@ -997,6 +1007,16 @@
   return (v128_t)__builtin_wasm_max_f64x2((__f64x2)__a, (__f64x2)__b);
 }
 
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmin(v128_t __a,
+                                                            v128_t __b) {
+  return (v128_t)__builtin_wasm_pmin_f64x2((__f64x2)__a, (__f64x2)__b);
+}
+
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmax(v128_t __a,
+                                                            v128_t __b) {
+  return (v128_t)__builtin_wasm_pmax_f64x2((__f64x2)__a, (__f64x2)__b);
+}
+
 static __inline__ v128_t __DEFAULT_FN_ATTRS
 wasm_i32x4_trunc_saturate_f32x4(v128_t __a) {
   return (v128_t)__builtin_wasm_trunc_saturate_s_i32x4_f32x4((__f32x4)__a);
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -15744,6 +15744,22 @@
                                      ConvertType(E->getType()));
     return Builder.CreateCall(Callee, {LHS, RHS});
   }
+  case WebAssembly::BI__builtin_wasm_pmin_f32x4:
+  case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
+    Value *LHS = EmitScalarExpr(E->getArg(0));
+    Value *RHS = EmitScalarExpr(E->getArg(1));
+    Function *Callee =
+        CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
+    return Builder.CreateCall(Callee, {LHS, RHS});
+  }
+  case WebAssembly::BI__builtin_wasm_pmax_f32x4:
+  case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
+    Value *LHS = EmitScalarExpr(E->getArg(0));
+    Value *RHS = EmitScalarExpr(E->getArg(1));
+    Function *Callee =
+        CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
+    return Builder.CreateCall(Callee, {LHS, RHS});
+  }
   case WebAssembly::BI__builtin_wasm_swizzle_v8x16: {
     Value *Src = EmitScalarExpr(E->getArg(0));
     Value *Indices = EmitScalarExpr(E->getArg(1));
Index: clang/include/clang/Basic/BuiltinsWebAssembly.def
===================================================================
--- clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -139,8 +139,12 @@
 
 TARGET_BUILTIN(__builtin_wasm_min_f32x4, "V4fV4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_max_f32x4, "V4fV4fV4f", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmin_f32x4, "V4fV4fV4f", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmax_f32x4, "V4fV4fV4f", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128")
 TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128")
+TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128")
 
 TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D79742: [WebAssembly] Implement pseudo-min/max SIMD instructions

Reply via email to