https://github.com/brendandahl created https://github.com/llvm/llvm-project/pull/99388
Use a builtin and intrinsic until half types are better supported for instruction selection. >From a6d65f276fba7487fdecf2e31edef457f74fbafe Mon Sep 17 00:00:00 2001 From: Brendan Dahl <brendan.d...@gmail.com> Date: Wed, 17 Jul 2024 20:10:20 +0000 Subject: [PATCH] [WebAssembly] Implement f16x8.replace_lane instruction. Use a builtin and intrinsic until half types are better supported for instruction selection. --- clang/include/clang/Basic/BuiltinsWebAssembly.def | 1 + clang/lib/CodeGen/CGBuiltin.cpp | 7 +++++++ clang/test/CodeGen/builtins-wasm.c | 6 ++++++ llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 4 ++++ llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 13 +++++++++++++ llvm/test/CodeGen/WebAssembly/half-precision.ll | 8 ++++++++ llvm/test/MC/WebAssembly/simd-encodings.s | 3 +++ 7 files changed, 42 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 2a45f8a6582a2..df304a71e475e 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -201,6 +201,7 @@ TARGET_BUILTIN(__builtin_wasm_loadf16_f32, "fh*", "nU", "half-precision") TARGET_BUILTIN(__builtin_wasm_storef16_f32, "vfh*", "n", "half-precision") TARGET_BUILTIN(__builtin_wasm_splat_f16x8, "V8hf", "nc", "half-precision") TARGET_BUILTIN(__builtin_wasm_extract_lane_f16x8, "fV8hi", "nc", "half-precision") +TARGET_BUILTIN(__builtin_wasm_replace_lane_f16x8, "V8hV8hif", "nc", "half-precision") // Reference Types builtins // Some builtins are custom type-checked - see 't' as part of the third argument, diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 67027f8aa93f3..402b7a7b20e61 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -21386,6 +21386,13 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8); return Builder.CreateCall(Callee, {Vector, Index}); } + case WebAssembly::BI__builtin_wasm_replace_lane_f16x8: { + Value *Vector = EmitScalarExpr(E->getArg(0)); + Value *Index = EmitScalarExpr(E->getArg(1)); + Value *Val = EmitScalarExpr(E->getArg(2)); + Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_replace_lane_f16x8); + return Builder.CreateCall(Callee, {Vector, Index, Val}); + } case WebAssembly::BI__builtin_wasm_table_get: { assert(E->getArg(0)->getType()->isArrayType()); Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 75861b1b4bd6d..f494aeada0157 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -840,6 +840,12 @@ float extract_lane_f16x8(f16x8 a, int i) { return __builtin_wasm_extract_lane_f16x8(a, i); } +f16x8 replace_lane_f16x8(f16x8 a, int i, float v) { + // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %a, i32 %i, float %v) + // WEBASSEMBLY-NEXT: ret <8 x half> %0 + return __builtin_wasm_replace_lane_f16x8(a, i, v); +} + f16x8 min_f16x8(f16x8 a, f16x8 b) { // WEBASSEMBLY: %0 = tail call <8 x half> @llvm.minimum.v8f16(<8 x half> %a, <8 x half> %b) // WEBASSEMBLY-NEXT: ret <8 x half> %0 diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 47aab196a6d4f..4d2df1c44ebce 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -363,6 +363,10 @@ def int_wasm_extract_lane_f16x8: DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_v8f16_ty, llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_replace_lane_f16x8: + DefaultAttrsIntrinsic<[llvm_v8f16_ty], + [llvm_v8f16_ty, llvm_i32_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 2ee430c88169d..f11fe12c6ecb8 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -702,6 +702,19 @@ defm "" : ReplaceLane<I64x2, 30>; defm "" : ReplaceLane<F32x4, 32>; defm "" : ReplaceLane<F64x2, 34>; +// For now use an instrinsic for f16x8.replace_lane instead of ReplaceLane above +// since LL generated with half type arguments is not well supported and creates +// conversions from f16->f32. +defm REPLACE_LANE_F16x8 : + HALF_PRECISION_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, F32:$x), + (outs), (ins vec_i8imm_op:$idx), + [(set (v8f16 V128:$dst), (int_wasm_replace_lane_f16x8 + (v8f16 V128:$vec), + (i32 LaneIdx8:$idx), + (f32 F32:$x)))], + "f16x8.replace_lane\t$dst, $vec, $idx, $x", + "f16x8.replace_lane\t$idx", 0x122>; + // Lower undef lane indices to zero def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef), (REPLACE_LANE_I8x16 $vec, 0, $x)>; diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll index fa78f5f9591d6..dba4138ad59cc 100644 --- a/llvm/test/CodeGen/WebAssembly/half-precision.ll +++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll @@ -36,6 +36,14 @@ define float @extract_lane_v8f16(<8 x half> %v) { ret float %r } +; CHECK-LABEL: replace_lane_v8f16: +; CHECK: f16x8.replace_lane $push0=, $0, 1, $1 +; CHECK-NEXT: return $pop0 +define <8 x half> @replace_lane_v8f16(<8 x half> %v, float %f) { + %r = call <8 x half> @llvm.wasm.replace.lane.f16x8(<8 x half> %v, i32 1, float %f) + ret <8 x half> %r +} + ; CHECK-LABEL: add_v8f16: ; CHECK: f16x8.add $push0=, $0, $1 ; CHECK-NEXT: return $pop0 diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index 8c3483bfaad7a..7ae4d47d888cf 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -851,6 +851,9 @@ main: # CHECK: f16x8.extract_lane 1 # encoding: [0xfd,0xa1,0x02,0x01] f16x8.extract_lane 1 + # CHECK: f16x8.replace_lane 1 # encoding: [0xfd,0xa2,0x02,0x01] + f16x8.replace_lane 1 + # CHECK: f16x8.add # encoding: [0xfd,0xb4,0x02] f16x8.add _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits