tlively created this revision. tlively added a reviewer: aheejin. Herald added subscribers: llvm-commits, cfe-commits, sunfish, hiraditya, jgravelle-google, sbc100, dschuff. Herald added projects: clang, LLVM.
As proposed in https://github.com/WebAssembly/simd/pull/122. Since these instructions are not yet merged to the SIMD spec proposal, this patch makes them entirely opt-in by surfacing them only through LLVM intrinsics and clang builtins. If these instructions are made official, these intrinsics and builtins should be replaced with simple instruction patterns. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D79742 Files: clang/include/clang/Basic/BuiltinsWebAssembly.def clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Headers/wasm_simd128.h clang/test/CodeGen/builtins-wasm.c llvm/include/llvm/IR/IntrinsicsWebAssembly.td llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll llvm/test/MC/WebAssembly/simd-encodings.s
Index: llvm/test/MC/WebAssembly/simd-encodings.s =================================================================== --- llvm/test/MC/WebAssembly/simd-encodings.s +++ llvm/test/MC/WebAssembly/simd-encodings.s @@ -535,6 +535,12 @@ # CHECK: f32x4.max # encoding: [0xfd,0xe9,0x01] f32x4.max + # CHECK: f32x4.pmin # encoding: [0xfd,0xea,0x01] + f32x4.pmin + + # CHECK: f32x4.pmax # encoding: [0xfd,0xeb,0x01] + f32x4.pmax + # CHECK: f64x2.abs # encoding: [0xfd,0xec,0x01] f64x2.abs @@ -562,6 +568,12 @@ # CHECK: f64x2.max # encoding: [0xfd,0xf5,0x01] f64x2.max + # CHECK: f64x2.pmin # encoding: [0xfd,0xf6,0x01] + f64x2.pmin + + # CHECK: f64x2.pmax # encoding: [0xfd,0xf7,0x01] + f64x2.pmax + # CHECK: i32x4.trunc_sat_f32x4_s # encoding: [0xfd,0xf8,0x01] i32x4.trunc_sat_f32x4_s Index: llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -501,6 +501,26 @@ ret <4 x float> %a } +; CHECK-LABEL: pmin_v4f32: +; SIMD128-NEXT: .functype pmin_v4f32 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f32x4.pmin $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x float> @llvm.wasm.pmin.v4f32(<4 x float>, <4 x float>) +define <4 x float> @pmin_v4f32(<4 x float> %a, <4 x float> %b) { + %v = call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %v +} + +; CHECK-LABEL: pmax_v4f32: +; SIMD128-NEXT: .functype pmax_v4f32 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f32x4.pmax $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x float> @llvm.wasm.pmax.v4f32(<4 x float>, <4 x float>) +define <4 x float> @pmax_v4f32(<4 x float> %a, <4 x float> %b) { + %v = call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %v +} + ; CHECK-LABEL: qfma_v4f32: ; SIMD128-NEXT: .functype qfma_v4f32 (v128, v128, v128) -> (v128){{$}} ; SIMD128-NEXT: f32x4.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} @@ -540,6 +560,26 @@ ret <2 x double> %a } +; CHECK-LABEL: pmin_v2f64: +; SIMD128-NEXT: .functype pmin_v2f64 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.pmin $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x double> @llvm.wasm.pmin.v2f64(<2 x double>, <2 x double>) +define <2 x double> @pmin_v2f64(<2 x double> %a, <2 x double> %b) { + %v = call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %v +} + +; CHECK-LABEL: pmax_v2f64: +; SIMD128-NEXT: .functype pmax_v2f64 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.pmax $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x double> @llvm.wasm.pmax.v2f64(<2 x double>, <2 x double>) +define <2 x double> @pmax_v2f64(<2 x double> %a, <2 x double> %b) { + %v = call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> %a, <2 x double> %b) + ret <2 x double> %v +} + ; CHECK-LABEL: qfma_v2f64: ; SIMD128-NEXT: .functype qfma_v2f64 (v128, v128, v128) -> (v128){{$}} ; SIMD128-NEXT: f64x2.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}} Index: llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td =================================================================== --- llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -794,6 +794,12 @@ // NaN-propagating maximum: max defm MAX : SIMDBinaryFP<fmaximum, "max", 233>; +// Pseudo-minimum: pmin +defm PMIN : SIMDBinaryFP<int_wasm_pmin, "pmin", 234>; + +// Pseudo-maximum: pmax +defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>; + //===----------------------------------------------------------------------===// // Conversions //===----------------------------------------------------------------------===// Index: llvm/include/llvm/IR/IntrinsicsWebAssembly.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -176,6 +176,17 @@ [llvm_anyvector_ty], [IntrNoMem, IntrSpeculatable]>; +// TODO: Replace these intrinsics with normal ISel patterns once the +// pmin/pmax instructions are merged to the spec proposal. +def int_wasm_pmin : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_pmax : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Bulk memory intrinsics //===----------------------------------------------------------------------===// Index: clang/test/CodeGen/builtins-wasm.c =================================================================== --- clang/test/CodeGen/builtins-wasm.c +++ clang/test/CodeGen/builtins-wasm.c @@ -579,6 +579,20 @@ // WEBASSEMBLY-NEXT: ret } +f32x4 pmin_f32x4(f32x4 x, f32x4 y) { + return __builtin_wasm_pmin_f32x4(x, y); + // WEBASSEMBLY: call <4 x float> @llvm.wasm.pmin.v4f32( + // WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y) + // WEBASSEMBLY-NEXT: ret +} + +f32x4 pmax_f32x4(f32x4 x, f32x4 y) { + return __builtin_wasm_pmax_f32x4(x, y); + // WEBASSEMBLY: call <4 x float> @llvm.wasm.pmax.v4f32( + // WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y) + // WEBASSEMBLY-NEXT: ret +} + f64x2 min_f64x2(f64x2 x, f64x2 y) { return __builtin_wasm_min_f64x2(x, y); // WEBASSEMBLY: call <2 x double> @llvm.minimum.v2f64( @@ -593,6 +607,20 @@ // WEBASSEMBLY-NEXT: ret } +f64x2 pmin_f64x2(f64x2 x, f64x2 y) { + return __builtin_wasm_pmin_f64x2(x, y); + // WEBASSEMBLY: call <2 x double> @llvm.wasm.pmin.v2f64( + // WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y) + // WEBASSEMBLY-NEXT: ret +} + +f64x2 pmax_f64x2(f64x2 x, f64x2 y) { + return __builtin_wasm_pmax_f64x2(x, y); + // WEBASSEMBLY: call <2 x double> @llvm.wasm.pmax.v2f64( + // WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y) + // WEBASSEMBLY-NEXT: ret +} + f32x4 sqrt_f32x4(f32x4 x) { return __builtin_wasm_sqrt_f32x4(x); // WEBASSEMBLY: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) Index: clang/lib/Headers/wasm_simd128.h =================================================================== --- clang/lib/Headers/wasm_simd128.h +++ clang/lib/Headers/wasm_simd128.h @@ -937,6 +937,16 @@ return (v128_t)__builtin_wasm_max_f32x4((__f32x4)__a, (__f32x4)__b); } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmin(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_pmin_f32x4((__f32x4)__a, (__f32x4)__b); +} + +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmax(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_pmax_f32x4((__f32x4)__a, (__f32x4)__b); +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t __a) { return (v128_t)__builtin_wasm_abs_f64x2((__f64x2)__a); } @@ -997,6 +1007,16 @@ return (v128_t)__builtin_wasm_max_f64x2((__f64x2)__a, (__f64x2)__b); } +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmin(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_pmin_f64x2((__f64x2)__a, (__f64x2)__b); +} + +static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmax(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_pmax_f64x2((__f64x2)__a, (__f64x2)__b); +} + static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_trunc_saturate_f32x4(v128_t __a) { return (v128_t)__builtin_wasm_trunc_saturate_s_i32x4_f32x4((__f32x4)__a); Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -15744,6 +15744,22 @@ ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_pmin_f32x4: + case WebAssembly::BI__builtin_wasm_pmin_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } + case WebAssembly::BI__builtin_wasm_pmax_f32x4: + case WebAssembly::BI__builtin_wasm_pmax_f64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + Function *Callee = + CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } case WebAssembly::BI__builtin_wasm_swizzle_v8x16: { Value *Src = EmitScalarExpr(E->getArg(0)); Value *Indices = EmitScalarExpr(E->getArg(1)); Index: clang/include/clang/Basic/BuiltinsWebAssembly.def =================================================================== --- clang/include/clang/Basic/BuiltinsWebAssembly.def +++ clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -139,8 +139,12 @@ TARGET_BUILTIN(__builtin_wasm_min_f32x4, "V4fV4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_max_f32x4, "V4fV4fV4f", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_pmin_f32x4, "V4fV4fV4f", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_pmax_f32x4, "V4fV4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits