https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/109405
>From 657f1c0369a2befecdca26235f231bfd9ab55e9b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <llvm-...@redking.me.uk> Date: Fri, 20 Sep 2024 12:36:27 +0100 Subject: [PATCH 1/2] [clang][wasm] Replace the target integer sub saturate intrinsics with the equivalent generic `__builtin_elementwise_sub_sat` intrinsics Remove the Intrinsic::wasm_sub_sat_signed/wasm_sub_sat_unsigned entries and just use sub_sat_s/sub_sat_u directly --- .../clang/Basic/BuiltinsWebAssembly.def | 5 ---- clang/lib/CodeGen/CGBuiltin.cpp | 22 -------------- clang/lib/Headers/wasm_simd128.h | 8 ++--- clang/test/CodeGen/builtins-wasm.c | 30 +------------------ llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 8 ----- .../WebAssembly/WebAssemblyISelLowering.cpp | 4 +-- .../WebAssembly/WebAssemblyInstrSIMD.td | 6 ++-- .../CodeGen/WebAssembly/simd-intrinsics.ll | 16 +++++----- 8 files changed, 17 insertions(+), 82 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 90441a5d500120..ab480369b3820e 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -68,11 +68,6 @@ TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i64_f64, "LLid", "nc", "nontrappi // SIMD builtins TARGET_BUILTIN(__builtin_wasm_swizzle_i8x16, "V16ScV16ScV16Sc", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_sub_sat_s_i8x16, "V16ScV16ScV16Sc", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_sub_sat_u_i8x16, "V16UcV16UcV16Uc", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_sub_sat_s_i16x8, "V8sV8sV8s", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_sub_sat_u_i16x8, "V8UsV8UsV8Us", "nc", "simd128") - TARGET_BUILTIN(__builtin_wasm_abs_i8x16, "V16ScV16Sc", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_abs_i16x8, "V8sV8s", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_abs_i32x4, "V4iV4i", "nc", "simd128") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3d1138b7773853..78b432474ba3b7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -21443,28 +21443,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle); return Builder.CreateCall(Callee, {Src, Indices}); } - case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16: - case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16: - case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8: - case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: { - unsigned IntNo; - switch (BuiltinID) { - case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16: - case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8: - IntNo = Intrinsic::wasm_sub_sat_signed; - break; - case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16: - case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: - IntNo = Intrinsic::wasm_sub_sat_unsigned; - break; - default: - llvm_unreachable("unexpected builtin ID"); - } - Value *LHS = EmitScalarExpr(E->getArg(0)); - Value *RHS = EmitScalarExpr(E->getArg(1)); - Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); - return Builder.CreateCall(Callee, {LHS, RHS}); - } case WebAssembly::BI__builtin_wasm_abs_i8x16: case WebAssembly::BI__builtin_wasm_abs_i16x8: case WebAssembly::BI__builtin_wasm_abs_i32x4: diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h index b1bef7097800b9..08e39bf1a79b4f 100644 --- a/clang/lib/Headers/wasm_simd128.h +++ b/clang/lib/Headers/wasm_simd128.h @@ -997,12 +997,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_sub_sat_s_i8x16((__i8x16)__a, (__i8x16)__b); + return (v128_t)__builtin_elementwise_sub_sat((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_sub_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_sub_sat_u_i8x16((__u8x16)__a, (__u8x16)__b); + return (v128_t)__builtin_elementwise_sub_sat((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min(v128_t __a, @@ -1083,12 +1083,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_sub_sat_s_i16x8((__i16x8)__a, (__i16x8)__b); + return (v128_t)__builtin_elementwise_sub_sat((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_sub_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_sub_sat_u_i16x8((__u16x8)__a, (__u16x8)__b); + return (v128_t)__builtin_elementwise_sub_sat((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_mul(v128_t __a, diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index ff3fe27a29345a..7ee2ac5de3eac7 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -190,23 +190,9 @@ double max_f64(double x, double y) { // WEBASSEMBLY-NEXT: ret } -i8x16 sub_sat_s_i8x16(i8x16 x, i8x16 y) { - return __builtin_wasm_sub_sat_s_i8x16(x, y); - // MISSING-SIMD: error: '__builtin_wasm_sub_sat_s_i8x16' needs target feature simd128 - // WEBASSEMBLY: call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8( - // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) - // WEBASSEMBLY-NEXT: ret -} - -u8x16 sub_sat_u_i8x16(u8x16 x, u8x16 y) { - return __builtin_wasm_sub_sat_u_i8x16(x, y); - // WEBASSEMBLY: call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8( - // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) - // WEBASSEMBLY-NEXT: ret -} - i8x16 abs_i8x16(i8x16 v) { return __builtin_wasm_abs_i8x16(v); + // MISSING-SIMD: error: '__builtin_wasm_abs_i8x16' needs target feature simd128 // WEBASSEMBLY: call <16 x i8> @llvm.abs.v16i8(<16 x i8> %v, i1 false) // WEBASSEMBLY-NEXT: ret } @@ -229,20 +215,6 @@ i64x2 abs_i64x2(i64x2 v) { // WEBASSEMBLY-NEXT: ret } -i16x8 sub_sat_s_i16x8(i16x8 x, i16x8 y) { - return __builtin_wasm_sub_sat_s_i16x8(x, y); - // WEBASSEMBLY: call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16( - // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) - // WEBASSEMBLY-NEXT: ret -} - -u16x8 sub_sat_u_i16x8(u16x8 x, u16x8 y) { - return __builtin_wasm_sub_sat_u_i16x8(x, y); - // WEBASSEMBLY: call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16( - // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) - // WEBASSEMBLY-NEXT: ret -} - u8x16 avgr_u_i8x16(u8x16 x, u8x16 y) { return __builtin_wasm_avgr_u_i8x16(x, y); // WEBASSEMBLY: call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8( diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index d03e532964f287..f592ff287a0e30 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -202,14 +202,6 @@ def int_wasm_shuffle : ImmArg<ArgIndex<12>>, ImmArg<ArgIndex<13>>, ImmArg<ArgIndex<14>>, ImmArg<ArgIndex<15>>, ImmArg<ArgIndex<16>>, ImmArg<ArgIndex<17>>]>; -def int_wasm_sub_sat_signed : - DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_sub_sat_unsigned : - DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; def int_wasm_avgr_unsigned : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>], diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 1875a8fd4c4404..fa78bf38f426cd 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -198,8 +198,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setTargetDAGCombine(ISD::TRUNCATE); - // Support saturating add for i8x16 and i16x8 - for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) + // Support saturating add/sub for i8x16 and i16x8 + for (auto Op : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}) for (auto T : {MVT::v16i8, MVT::v8i16}) setOperationAction(Op, T, Legal); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index b652ee98cef107..60b3294b5f0bd0 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1112,10 +1112,8 @@ defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_sat_u", 112>; // Integer subtraction: sub / sub_sat_s / sub_sat_u defm SUB : SIMDBinaryInt<sub, "sub", 113>; -defm SUB_SAT_S : - SIMDBinaryIntSmall<int_wasm_sub_sat_signed, "sub_sat_s", 114>; -defm SUB_SAT_U : - SIMDBinaryIntSmall<int_wasm_sub_sat_unsigned, "sub_sat_u", 115>; +defm SUB_SAT_S : SIMDBinaryIntSmall<ssubsat, "sub_sat_s", 114>; +defm SUB_SAT_U : SIMDBinaryIntSmall<usubsat, "sub_sat_u", 115>; // Integer multiplication: mul let isCommutable = 1 in diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index a033d2de79c877..feaea1c29e5a70 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -44,9 +44,9 @@ define <16 x i8> @add_sat_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; CHECK-NEXT: .functype sub_sat_s_v16i8 (v128, v128) -> (v128){{$}} ; CHECK-NEXT: i8x16.sub_sat_s $push[[R:[0-9]+]]=, $0, $1{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) define <16 x i8> @sub_sat_s_v16i8(<16 x i8> %x, <16 x i8> %y) { - %a = call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8( + %a = call <16 x i8> @llvm.ssub.sat.v16i8( <16 x i8> %x, <16 x i8> %y ) ret <16 x i8> %a @@ -56,9 +56,9 @@ define <16 x i8> @sub_sat_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; CHECK-NEXT: .functype sub_sat_u_v16i8 (v128, v128) -> (v128){{$}} ; CHECK-NEXT: i8x16.sub_sat_u $push[[R:[0-9]+]]=, $0, $1{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) define <16 x i8> @sub_sat_u_v16i8(<16 x i8> %x, <16 x i8> %y) { - %a = call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8( + %a = call <16 x i8> @llvm.usub.sat.v16i8( <16 x i8> %x, <16 x i8> %y ) ret <16 x i8> %a @@ -216,9 +216,9 @@ define <8 x i16> @add_sat_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; CHECK-NEXT: .functype sub_sat_s_v8i16 (v128, v128) -> (v128){{$}} ; CHECK-NEXT: i16x8.sub_sat_s $push[[R:[0-9]+]]=, $0, $1{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) define <8 x i16> @sub_sat_s_v8i16(<8 x i16> %x, <8 x i16> %y) { - %a = call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16( + %a = call <8 x i16> @llvm.ssub.sat.v8i16( <8 x i16> %x, <8 x i16> %y ) ret <8 x i16> %a @@ -228,9 +228,9 @@ define <8 x i16> @sub_sat_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; CHECK-NEXT: .functype sub_sat_u_v8i16 (v128, v128) -> (v128){{$}} ; CHECK-NEXT: i16x8.sub_sat_u $push[[R:[0-9]+]]=, $0, $1{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) define <8 x i16> @sub_sat_u_v8i16(<8 x i16> %x, <8 x i16> %y) { - %a = call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16( + %a = call <8 x i16> @llvm.usub.sat.v8i16( <8 x i16> %x, <8 x i16> %y ) ret <8 x i16> %a >From d8126272ce77732ee1e806c450501b8c75dcbe78 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim <llvm-...@redking.me.uk> Date: Sat, 21 Sep 2024 09:20:14 +0100 Subject: [PATCH 2/2] Fix header tests --- clang/test/Headers/wasm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c index 0fae8557a066dd..8da1d97fe13280 100644 --- a/clang/test/Headers/wasm.c +++ b/clang/test/Headers/wasm.c @@ -1677,7 +1677,7 @@ v128_t test_i8x16_sub(v128_t a, v128_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1689,7 +1689,7 @@ v128_t test_i8x16_sub_sat(v128_t a, v128_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1897,7 +1897,7 @@ v128_t test_i16x8_sub(v128_t a, v128_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1909,7 +1909,7 @@ v128_t test_i16x8_sub_sat(v128_t a, v128_t b) { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits